Added option to exclude C# using directives from duplicated code analysis.

To exclude C# using directives the system property 'ignore_usings=true' has to be specified on the command line with '-Dignore_usings=true'.
This commit is contained in:
Jan van Nunen
2015-10-23 15:11:25 +02:00
parent aa87701ee5
commit 7be198aaa3
3 changed files with 373 additions and 270 deletions

View File

@ -3,15 +3,24 @@
*/
package net.sourceforge.pmd.cpd;
import java.util.Properties;
/**
* Language implementation for C#
*/
public class CsLanguage extends AbstractLanguage {
/**
* Creates a new C# Language instance.
*/
public CsLanguage() {
this(System.getProperties());
}
public CsLanguage(Properties properties) {
super("C#", "cs", new CsTokenizer(), ".cs");
setProperties(properties);
}
public final void setProperties(Properties properties) {
CsTokenizer tokenizer = (CsTokenizer)getTokenizer();
tokenizer.setProperties(properties);
}
}

View File

@ -5,9 +5,13 @@ package net.sourceforge.pmd.cpd;
import java.io.BufferedReader;
import java.io.CharArrayReader;
import java.io.Closeable;
import java.io.IOException;
import java.io.PushbackReader;
import java.util.Properties;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.RandomStringUtils;
/**
* This class does a best-guess try-anything tokenization.
@ -16,13 +20,78 @@ import org.apache.commons.io.IOUtils;
*/
public class CsTokenizer implements Tokenizer {
public static final String IGNORE_USINGS = "ignore_usings";
private boolean ignoreUsings = false;
public void setProperties(Properties properties) {
if (properties.containsKey(IGNORE_USINGS)) {
ignoreUsings = Boolean.parseBoolean(properties.getProperty(IGNORE_USINGS, "false"));
}
}
@Override
public void tokenize(SourceCode sourceCode, Tokens tokenEntries) {
BufferedReader reader = new BufferedReader(new CharArrayReader(sourceCode.getCodeBuffer().toString()
.toCharArray()));
Tokenizer tokenizer =
new Tokenizer(sourceCode.getCodeBuffer().toString());
Token token = tokenizer.getNextToken();
while (!token.equals(Token.EOF)) {
Token lookAhead = tokenizer.getNextToken();
// Ignore using directives
// Only using directives should be ignored, because these are used to import namespaces
//
// Using directive: 'using System.Math;'
// Using statement: 'using (Font font1 = new Font(..)) { .. }'
if (ignoreUsings &&
"using".equals(token.image) &&
!"(".equals(lookAhead.image)
) {
// We replace the 'using' token by a random token, because it should not be part of
// any duplication block. When we omit it from the token stream, there is a change that
// we get a duplication block that starts before the 'using' directives and ends afterwards.
String randomTokenText =
RandomStringUtils.randomAlphanumeric(20);
token = new Token(randomTokenText, token.lineNumber);
//Skip all other tokens of the using directive to prevent a partial matching
while (!";".equals(lookAhead.image) && !lookAhead.equals(Token.EOF)) {
lookAhead = tokenizer.getNextToken();
}
}
if (!";".equals(token.image)) {
tokenEntries.add(new TokenEntry(token.image, sourceCode.getFileName(), token.lineNumber));
}
token = lookAhead;
}
tokenEntries.add(TokenEntry.getEOF());
IOUtils.closeQuietly(tokenizer);
}
public void setIgnoreUsings(boolean ignoreUsings) {
this.ignoreUsings = ignoreUsings;
}
private static class Tokenizer implements Closeable {
private boolean endOfFile;
private int line;
private final PushbackReader reader;
public Tokenizer(String sourceCode) {
endOfFile = false;
line = 1;
reader = new PushbackReader(new BufferedReader(new CharArrayReader(sourceCode.toCharArray())));
}
public Token getNextToken() {
if (endOfFile) {
return Token.EOF;
}
try {
int ic = reader.read();
int line = 1;
char c;
StringBuilder b;
while (ic != -1) {
@ -41,32 +110,27 @@ public class CsTokenizer implements Tokenizer {
ic = reader.read();
break;
// ignore semicolons
case ';':
ic = reader.read();
break;
return new Token(";", line);
// < << <= <<= > >> >= >>=
case '<':
case '>':
ic = reader.read();
if (ic == '=') {
tokenEntries.add(new TokenEntry(c + "=", sourceCode.getFileName(), line));
ic = reader.read();
return new Token(c + "=", line);
} else if (ic == c) {
ic = reader.read();
if (ic == '=') {
tokenEntries.add(new TokenEntry(c + c + "=", sourceCode
.getFileName(), line));
ic = reader.read();
return new Token(c + c + "=", line);
} else {
tokenEntries.add(new TokenEntry(String.valueOf(c) + c, sourceCode
.getFileName(), line));
reader.unread(ic);
return new Token(String.valueOf(c) + c, line);
}
} else {
tokenEntries.add(new TokenEntry(String.valueOf(c), sourceCode.getFileName(), line));
reader.unread(ic);
return new Token(String.valueOf(c), line);
}
break;
// = == & &= && | |= || + += ++ - -= --
case '=':
@ -76,13 +140,11 @@ public class CsTokenizer implements Tokenizer {
case '-':
ic = reader.read();
if (ic == '=' || ic == c) {
tokenEntries.add(new TokenEntry(c + String.valueOf((char) ic), sourceCode
.getFileName(), line));
ic = reader.read();
return new Token(c + String.valueOf((char) ic), line);
} else {
tokenEntries.add(new TokenEntry(String.valueOf(c), sourceCode.getFileName(), line));
reader.unread(ic);
return new Token(String.valueOf(c), line);
}
break;
// ! != * *= % %= ^ ^= ~ ~=
case '!':
@ -92,12 +154,11 @@ public class CsTokenizer implements Tokenizer {
case '~':
ic = reader.read();
if (ic == '=') {
tokenEntries.add(new TokenEntry(c + "=", sourceCode.getFileName(), line));
ic = reader.read();
return new Token(c + "=", line);
} else {
tokenEntries.add(new TokenEntry(String.valueOf(c), sourceCode.getFileName(), line));
reader.unread(ic);
return new Token(String.valueOf(c), line);
}
break;
// strings & chars
case '"':
@ -126,9 +187,7 @@ public class CsTokenizer implements Tokenizer {
if (ic != -1) {
b.append((char) ic);
}
tokenEntries.add(new TokenEntry(b.toString(), sourceCode.getFileName(), beginLine));
ic = reader.read();
break;
return new Token(b.toString(), beginLine);
// / /= /*...*/ //...
case '/':
@ -180,13 +239,11 @@ public class CsTokenizer implements Tokenizer {
break;
case '=':
tokenEntries.add(new TokenEntry("/=", sourceCode.getFileName(), line));
ic = reader.read();
break;
return new Token("/=", line);
default:
tokenEntries.add(new TokenEntry("/", sourceCode.getFileName(), line));
break;
reader.unread(ic);
return new Token("/", line);
}
break;
@ -198,7 +255,8 @@ public class CsTokenizer implements Tokenizer {
b.append(c);
c = (char) (ic = reader.read());
} while (Character.isJavaIdentifierPart(c));
tokenEntries.add(new TokenEntry(b.toString(), sourceCode.getFileName(), line));
reader.unread(ic);
return new Token(b.toString(), line);
}
// numbers
else if (Character.isDigit(c) || c == '.') {
@ -214,22 +272,37 @@ public class CsTokenizer implements Tokenizer {
}
c = (char) (ic = reader.read());
} while ("1234567890.iIlLfFdDsSuUeExX".indexOf(c) != -1);
tokenEntries.add(new TokenEntry(b.toString(), sourceCode.getFileName(), line));
reader.unread(ic);
return new Token(b.toString(), line);
}
// anything else
else {
tokenEntries.add(new TokenEntry(String.valueOf(c), sourceCode.getFileName(), line));
ic = reader.read();
break;
return new Token(String.valueOf(c), line);
}
}
}
} catch (IOException e) {
e.printStackTrace();
} finally {
IOUtils.closeQuietly(reader);
tokenEntries.add(TokenEntry.getEOF());
}
endOfFile = true;
return Token.EOF;
}
@Override
public void close() throws IOException {
reader.close();
}
}
private static class Token {
public static final Token EOF = new Token("EOF", -1);
public final String image;
public final int lineNumber;
public Token(String image, int lineNumber) {
this.image = image;
this.lineNumber = lineNumber;
}
}
}

View File

@ -5,6 +5,7 @@
package net.sourceforge.pmd.cpd;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNotEquals;
import java.util.List;
@ -13,12 +14,13 @@ import org.junit.Test;
public class CsTokenizerTest {
private CsTokenizer tokenizer = new CsTokenizer();
private CsTokenizer tokenizer;
private Tokens tokens;
@Before
public void init() {
tokenizer = new CsTokenizer();
tokens = new Tokens();
TokenEntry.clearImages();
}
@ -138,6 +140,25 @@ public class CsTokenizerTest {
assertEquals(8, tokens.getTokens().get(14).getBeginLine());
}
@Test
public void testIgnoreUsingDirectives() {
tokenizer.setIgnoreUsings(true);
tokenizer.tokenize(toSourceCode("using System.Text;\n"), tokens);
assertNotEquals("using", tokens.getTokens().get(0).toString());
assertEquals(2, tokens.size());
}
@Test
public void testUsingStatementsAreNotIgnored() {
tokenizer.setIgnoreUsings(true);
tokenizer.tokenize(toSourceCode(
"using (Font font1 = new Font(\"Arial\", 10.0f)) {\n"
+ " byte charset = font1.GdiCharSet;\n"
+ "}\n"
), tokens);
assertEquals("using", tokens.getTokens().get(0).toString());
}
private SourceCode toSourceCode(String source) {
return new SourceCode(new SourceCode.StringCodeLoader(source));
}