Added option to exclude C# using directives from duplicated code analysis.
To exclude C# using directives the system property 'ignore_usings=true' has to be specified on the command line with '-Dignore_usings=true'.
This commit is contained in:
@ -3,15 +3,24 @@
|
||||
*/
|
||||
package net.sourceforge.pmd.cpd;
|
||||
|
||||
import java.util.Properties;
|
||||
|
||||
/**
|
||||
* Language implementation for C#
|
||||
*/
|
||||
public class CsLanguage extends AbstractLanguage {
|
||||
|
||||
/**
|
||||
* Creates a new C# Language instance.
|
||||
*/
|
||||
public CsLanguage() {
|
||||
this(System.getProperties());
|
||||
}
|
||||
|
||||
public CsLanguage(Properties properties) {
|
||||
super("C#", "cs", new CsTokenizer(), ".cs");
|
||||
setProperties(properties);
|
||||
}
|
||||
|
||||
public final void setProperties(Properties properties) {
|
||||
CsTokenizer tokenizer = (CsTokenizer)getTokenizer();
|
||||
tokenizer.setProperties(properties);
|
||||
}
|
||||
}
|
||||
|
@ -5,9 +5,13 @@ package net.sourceforge.pmd.cpd;
|
||||
|
||||
import java.io.BufferedReader;
|
||||
import java.io.CharArrayReader;
|
||||
import java.io.Closeable;
|
||||
import java.io.IOException;
|
||||
import java.io.PushbackReader;
|
||||
import java.util.Properties;
|
||||
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.commons.lang3.RandomStringUtils;
|
||||
|
||||
/**
|
||||
* This class does a best-guess try-anything tokenization.
|
||||
@ -16,13 +20,78 @@ import org.apache.commons.io.IOUtils;
|
||||
*/
|
||||
public class CsTokenizer implements Tokenizer {
|
||||
|
||||
public static final String IGNORE_USINGS = "ignore_usings";
|
||||
|
||||
private boolean ignoreUsings = false;
|
||||
|
||||
public void setProperties(Properties properties) {
|
||||
if (properties.containsKey(IGNORE_USINGS)) {
|
||||
ignoreUsings = Boolean.parseBoolean(properties.getProperty(IGNORE_USINGS, "false"));
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void tokenize(SourceCode sourceCode, Tokens tokenEntries) {
|
||||
BufferedReader reader = new BufferedReader(new CharArrayReader(sourceCode.getCodeBuffer().toString()
|
||||
.toCharArray()));
|
||||
Tokenizer tokenizer =
|
||||
new Tokenizer(sourceCode.getCodeBuffer().toString());
|
||||
Token token = tokenizer.getNextToken();
|
||||
|
||||
while (!token.equals(Token.EOF)) {
|
||||
Token lookAhead = tokenizer.getNextToken();
|
||||
|
||||
// Ignore using directives
|
||||
// Only using directives should be ignored, because these are used to import namespaces
|
||||
//
|
||||
// Using directive: 'using System.Math;'
|
||||
// Using statement: 'using (Font font1 = new Font(..)) { .. }'
|
||||
if (ignoreUsings &&
|
||||
"using".equals(token.image) &&
|
||||
!"(".equals(lookAhead.image)
|
||||
) {
|
||||
// We replace the 'using' token by a random token, because it should not be part of
|
||||
// any duplication block. When we omit it from the token stream, there is a change that
|
||||
// we get a duplication block that starts before the 'using' directives and ends afterwards.
|
||||
String randomTokenText =
|
||||
RandomStringUtils.randomAlphanumeric(20);
|
||||
|
||||
token = new Token(randomTokenText, token.lineNumber);
|
||||
//Skip all other tokens of the using directive to prevent a partial matching
|
||||
while (!";".equals(lookAhead.image) && !lookAhead.equals(Token.EOF)) {
|
||||
lookAhead = tokenizer.getNextToken();
|
||||
}
|
||||
}
|
||||
if (!";".equals(token.image)) {
|
||||
tokenEntries.add(new TokenEntry(token.image, sourceCode.getFileName(), token.lineNumber));
|
||||
}
|
||||
token = lookAhead;
|
||||
}
|
||||
tokenEntries.add(TokenEntry.getEOF());
|
||||
IOUtils.closeQuietly(tokenizer);
|
||||
}
|
||||
|
||||
public void setIgnoreUsings(boolean ignoreUsings) {
|
||||
this.ignoreUsings = ignoreUsings;
|
||||
}
|
||||
|
||||
|
||||
private static class Tokenizer implements Closeable {
|
||||
private boolean endOfFile;
|
||||
private int line;
|
||||
private final PushbackReader reader;
|
||||
|
||||
public Tokenizer(String sourceCode) {
|
||||
endOfFile = false;
|
||||
line = 1;
|
||||
reader = new PushbackReader(new BufferedReader(new CharArrayReader(sourceCode.toCharArray())));
|
||||
}
|
||||
|
||||
public Token getNextToken() {
|
||||
if (endOfFile) {
|
||||
return Token.EOF;
|
||||
}
|
||||
|
||||
try {
|
||||
int ic = reader.read();
|
||||
int line = 1;
|
||||
char c;
|
||||
StringBuilder b;
|
||||
while (ic != -1) {
|
||||
@ -41,32 +110,27 @@ public class CsTokenizer implements Tokenizer {
|
||||
ic = reader.read();
|
||||
break;
|
||||
|
||||
// ignore semicolons
|
||||
case ';':
|
||||
ic = reader.read();
|
||||
break;
|
||||
return new Token(";", line);
|
||||
|
||||
// < << <= <<= > >> >= >>=
|
||||
case '<':
|
||||
case '>':
|
||||
ic = reader.read();
|
||||
if (ic == '=') {
|
||||
tokenEntries.add(new TokenEntry(c + "=", sourceCode.getFileName(), line));
|
||||
ic = reader.read();
|
||||
return new Token(c + "=", line);
|
||||
} else if (ic == c) {
|
||||
ic = reader.read();
|
||||
if (ic == '=') {
|
||||
tokenEntries.add(new TokenEntry(c + c + "=", sourceCode
|
||||
.getFileName(), line));
|
||||
ic = reader.read();
|
||||
return new Token(c + c + "=", line);
|
||||
} else {
|
||||
tokenEntries.add(new TokenEntry(String.valueOf(c) + c, sourceCode
|
||||
.getFileName(), line));
|
||||
reader.unread(ic);
|
||||
return new Token(String.valueOf(c) + c, line);
|
||||
}
|
||||
} else {
|
||||
tokenEntries.add(new TokenEntry(String.valueOf(c), sourceCode.getFileName(), line));
|
||||
reader.unread(ic);
|
||||
return new Token(String.valueOf(c), line);
|
||||
}
|
||||
break;
|
||||
|
||||
// = == & &= && | |= || + += ++ - -= --
|
||||
case '=':
|
||||
@ -76,13 +140,11 @@ public class CsTokenizer implements Tokenizer {
|
||||
case '-':
|
||||
ic = reader.read();
|
||||
if (ic == '=' || ic == c) {
|
||||
tokenEntries.add(new TokenEntry(c + String.valueOf((char) ic), sourceCode
|
||||
.getFileName(), line));
|
||||
ic = reader.read();
|
||||
return new Token(c + String.valueOf((char) ic), line);
|
||||
} else {
|
||||
tokenEntries.add(new TokenEntry(String.valueOf(c), sourceCode.getFileName(), line));
|
||||
reader.unread(ic);
|
||||
return new Token(String.valueOf(c), line);
|
||||
}
|
||||
break;
|
||||
|
||||
// ! != * *= % %= ^ ^= ~ ~=
|
||||
case '!':
|
||||
@ -92,12 +154,11 @@ public class CsTokenizer implements Tokenizer {
|
||||
case '~':
|
||||
ic = reader.read();
|
||||
if (ic == '=') {
|
||||
tokenEntries.add(new TokenEntry(c + "=", sourceCode.getFileName(), line));
|
||||
ic = reader.read();
|
||||
return new Token(c + "=", line);
|
||||
} else {
|
||||
tokenEntries.add(new TokenEntry(String.valueOf(c), sourceCode.getFileName(), line));
|
||||
reader.unread(ic);
|
||||
return new Token(String.valueOf(c), line);
|
||||
}
|
||||
break;
|
||||
|
||||
// strings & chars
|
||||
case '"':
|
||||
@ -126,9 +187,7 @@ public class CsTokenizer implements Tokenizer {
|
||||
if (ic != -1) {
|
||||
b.append((char) ic);
|
||||
}
|
||||
tokenEntries.add(new TokenEntry(b.toString(), sourceCode.getFileName(), beginLine));
|
||||
ic = reader.read();
|
||||
break;
|
||||
return new Token(b.toString(), beginLine);
|
||||
|
||||
// / /= /*...*/ //...
|
||||
case '/':
|
||||
@ -180,13 +239,11 @@ public class CsTokenizer implements Tokenizer {
|
||||
break;
|
||||
|
||||
case '=':
|
||||
tokenEntries.add(new TokenEntry("/=", sourceCode.getFileName(), line));
|
||||
ic = reader.read();
|
||||
break;
|
||||
return new Token("/=", line);
|
||||
|
||||
default:
|
||||
tokenEntries.add(new TokenEntry("/", sourceCode.getFileName(), line));
|
||||
break;
|
||||
reader.unread(ic);
|
||||
return new Token("/", line);
|
||||
}
|
||||
break;
|
||||
|
||||
@ -198,7 +255,8 @@ public class CsTokenizer implements Tokenizer {
|
||||
b.append(c);
|
||||
c = (char) (ic = reader.read());
|
||||
} while (Character.isJavaIdentifierPart(c));
|
||||
tokenEntries.add(new TokenEntry(b.toString(), sourceCode.getFileName(), line));
|
||||
reader.unread(ic);
|
||||
return new Token(b.toString(), line);
|
||||
}
|
||||
// numbers
|
||||
else if (Character.isDigit(c) || c == '.') {
|
||||
@ -214,22 +272,37 @@ public class CsTokenizer implements Tokenizer {
|
||||
}
|
||||
c = (char) (ic = reader.read());
|
||||
} while ("1234567890.iIlLfFdDsSuUeExX".indexOf(c) != -1);
|
||||
|
||||
tokenEntries.add(new TokenEntry(b.toString(), sourceCode.getFileName(), line));
|
||||
reader.unread(ic);
|
||||
return new Token(b.toString(), line);
|
||||
}
|
||||
// anything else
|
||||
else {
|
||||
tokenEntries.add(new TokenEntry(String.valueOf(c), sourceCode.getFileName(), line));
|
||||
ic = reader.read();
|
||||
break;
|
||||
return new Token(String.valueOf(c), line);
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch (IOException e) {
|
||||
e.printStackTrace();
|
||||
} finally {
|
||||
IOUtils.closeQuietly(reader);
|
||||
tokenEntries.add(TokenEntry.getEOF());
|
||||
}
|
||||
endOfFile = true;
|
||||
return Token.EOF;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
reader.close();
|
||||
}
|
||||
}
|
||||
|
||||
private static class Token {
|
||||
public static final Token EOF = new Token("EOF", -1);
|
||||
|
||||
public final String image;
|
||||
public final int lineNumber;
|
||||
|
||||
public Token(String image, int lineNumber) {
|
||||
this.image = image;
|
||||
this.lineNumber = lineNumber;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -5,6 +5,7 @@
|
||||
package net.sourceforge.pmd.cpd;
|
||||
|
||||
import static org.junit.Assert.assertEquals;
|
||||
import static org.junit.Assert.assertNotEquals;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
@ -13,12 +14,13 @@ import org.junit.Test;
|
||||
|
||||
public class CsTokenizerTest {
|
||||
|
||||
private CsTokenizer tokenizer = new CsTokenizer();
|
||||
private CsTokenizer tokenizer;
|
||||
|
||||
private Tokens tokens;
|
||||
|
||||
@Before
|
||||
public void init() {
|
||||
tokenizer = new CsTokenizer();
|
||||
tokens = new Tokens();
|
||||
TokenEntry.clearImages();
|
||||
}
|
||||
@ -138,6 +140,25 @@ public class CsTokenizerTest {
|
||||
assertEquals(8, tokens.getTokens().get(14).getBeginLine());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testIgnoreUsingDirectives() {
|
||||
tokenizer.setIgnoreUsings(true);
|
||||
tokenizer.tokenize(toSourceCode("using System.Text;\n"), tokens);
|
||||
assertNotEquals("using", tokens.getTokens().get(0).toString());
|
||||
assertEquals(2, tokens.size());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testUsingStatementsAreNotIgnored() {
|
||||
tokenizer.setIgnoreUsings(true);
|
||||
tokenizer.tokenize(toSourceCode(
|
||||
"using (Font font1 = new Font(\"Arial\", 10.0f)) {\n"
|
||||
+ " byte charset = font1.GdiCharSet;\n"
|
||||
+ "}\n"
|
||||
), tokens);
|
||||
assertEquals("using", tokens.getTokens().get(0).toString());
|
||||
}
|
||||
|
||||
private SourceCode toSourceCode(String source) {
|
||||
return new SourceCode(new SourceCode.StringCodeLoader(source));
|
||||
}
|
||||
|
Reference in New Issue
Block a user