false
.
+ */
+ String IGNORE_USINGS = "ignore_usings";
+
/**
* Enables or disabled skipping of blocks like a pre-processor.
* It is a boolean property.
diff --git a/pmd-cs/src/main/java/net/sourceforge/pmd/cpd/CsLanguage.java b/pmd-cs/src/main/java/net/sourceforge/pmd/cpd/CsLanguage.java
index 2926b9521b..f6a1bd145f 100644
--- a/pmd-cs/src/main/java/net/sourceforge/pmd/cpd/CsLanguage.java
+++ b/pmd-cs/src/main/java/net/sourceforge/pmd/cpd/CsLanguage.java
@@ -3,15 +3,24 @@
*/
package net.sourceforge.pmd.cpd;
+import java.util.Properties;
+
/**
* Language implementation for C#
*/
public class CsLanguage extends AbstractLanguage {
- /**
- * Creates a new C# Language instance.
- */
public CsLanguage() {
+ this(System.getProperties());
+ }
+
+ public CsLanguage(Properties properties) {
super("C#", "cs", new CsTokenizer(), ".cs");
+ setProperties(properties);
+ }
+
+ public final void setProperties(Properties properties) {
+ CsTokenizer tokenizer = (CsTokenizer)getTokenizer();
+ tokenizer.setProperties(properties);
}
}
diff --git a/pmd-cs/src/main/java/net/sourceforge/pmd/cpd/CsTokenizer.java b/pmd-cs/src/main/java/net/sourceforge/pmd/cpd/CsTokenizer.java
index 2ccdccc3c5..c9dcce4a4e 100644
--- a/pmd-cs/src/main/java/net/sourceforge/pmd/cpd/CsTokenizer.java
+++ b/pmd-cs/src/main/java/net/sourceforge/pmd/cpd/CsTokenizer.java
@@ -5,9 +5,13 @@ package net.sourceforge.pmd.cpd;
import java.io.BufferedReader;
import java.io.CharArrayReader;
+import java.io.Closeable;
import java.io.IOException;
+import java.io.PushbackReader;
+import java.util.Properties;
import org.apache.commons.io.IOUtils;
+import org.apache.commons.lang3.RandomStringUtils;
/**
* This class does a best-guess try-anything tokenization.
@@ -16,220 +20,287 @@ import org.apache.commons.io.IOUtils;
*/
public class CsTokenizer implements Tokenizer {
+ private boolean ignoreUsings = false;
+
+ public void setProperties(Properties properties) {
+ if (properties.containsKey(IGNORE_USINGS)) {
+ ignoreUsings = Boolean.parseBoolean(properties.getProperty(IGNORE_USINGS, "false"));
+ }
+ }
+
@Override
public void tokenize(SourceCode sourceCode, Tokens tokenEntries) {
- BufferedReader reader = new BufferedReader(new CharArrayReader(sourceCode.getCodeBuffer().toString()
- .toCharArray()));
- try {
- int ic = reader.read();
- int line = 1;
- char c;
- StringBuilder b;
- while (ic != -1) {
- c = (char) ic;
- switch (c) {
- // new line
- case '\n':
- line++;
- ic = reader.read();
- break;
+ Tokenizer tokenizer =
+ new Tokenizer(sourceCode.getCodeBuffer().toString());
+ Token token = tokenizer.getNextToken();
- // white space
- case ' ':
- case '\t':
- case '\r':
- ic = reader.read();
- break;
+ while (!token.equals(Token.EOF)) {
+ Token lookAhead = tokenizer.getNextToken();
- // ignore semicolons
- case ';':
- ic = reader.read();
- break;
+ // Ignore using directives
+ // Only using directives should be ignored, because these are used to import namespaces
+ //
+ // Using directive: 'using System.Math;'
+ // Using statement: 'using (Font font1 = new Font(..)) { .. }'
+ if (ignoreUsings &&
+ "using".equals(token.image) &&
+ !"(".equals(lookAhead.image)
+ ) {
+ // We replace the 'using' token by a random token, because it should not be part of
+ // any duplication block. When we omit it from the token stream, there is a change that
+ // we get a duplication block that starts before the 'using' directives and ends afterwards.
+ String randomTokenText =
+ RandomStringUtils.randomAlphanumeric(20);
- // < << <= <<= > >> >= >>=
- case '<':
- case '>':
- ic = reader.read();
- if (ic == '=') {
- tokenEntries.add(new TokenEntry(c + "=", sourceCode.getFileName(), line));
+ token = new Token(randomTokenText, token.lineNumber);
+ //Skip all other tokens of the using directive to prevent a partial matching
+ while (!";".equals(lookAhead.image) && !lookAhead.equals(Token.EOF)) {
+ lookAhead = tokenizer.getNextToken();
+ }
+ }
+ if (!";".equals(token.image)) {
+ tokenEntries.add(new TokenEntry(token.image, sourceCode.getFileName(), token.lineNumber));
+ }
+ token = lookAhead;
+ }
+ tokenEntries.add(TokenEntry.getEOF());
+ IOUtils.closeQuietly(tokenizer);
+ }
+
+ public void setIgnoreUsings(boolean ignoreUsings) {
+ this.ignoreUsings = ignoreUsings;
+ }
+
+
+ private static class Tokenizer implements Closeable {
+ private boolean endOfFile;
+ private int line;
+ private final PushbackReader reader;
+
+ public Tokenizer(String sourceCode) {
+ endOfFile = false;
+ line = 1;
+ reader = new PushbackReader(new BufferedReader(new CharArrayReader(sourceCode.toCharArray())));
+ }
+
+ public Token getNextToken() {
+ if (endOfFile) {
+ return Token.EOF;
+ }
+
+ try {
+ int ic = reader.read();
+ char c;
+ StringBuilder b;
+ while (ic != -1) {
+ c = (char) ic;
+ switch (c) {
+ // new line
+ case '\n':
+ line++;
ic = reader.read();
- } else if (ic == c) {
- ic = reader.read();
- if (ic == '=') {
- tokenEntries.add(new TokenEntry(c + c + "=", sourceCode
- .getFileName(), line));
- ic = reader.read();
- } else {
- tokenEntries.add(new TokenEntry(String.valueOf(c) + c, sourceCode
- .getFileName(), line));
- }
- } else {
- tokenEntries.add(new TokenEntry(String.valueOf(c), sourceCode.getFileName(), line));
- }
- break;
-
- // = == & &= && | |= || + += ++ - -= --
- case '=':
- case '&':
- case '|':
- case '+':
- case '-':
- ic = reader.read();
- if (ic == '=' || ic == c) {
- tokenEntries.add(new TokenEntry(c + String.valueOf((char) ic), sourceCode
- .getFileName(), line));
- ic = reader.read();
- } else {
- tokenEntries.add(new TokenEntry(String.valueOf(c), sourceCode.getFileName(), line));
- }
- break;
-
- // ! != * *= % %= ^ ^= ~ ~=
- case '!':
- case '*':
- case '%':
- case '^':
- case '~':
- ic = reader.read();
- if (ic == '=') {
- tokenEntries.add(new TokenEntry(c + "=", sourceCode.getFileName(), line));
- ic = reader.read();
- } else {
- tokenEntries.add(new TokenEntry(String.valueOf(c), sourceCode.getFileName(), line));
- }
- break;
-
- // strings & chars
- case '"':
- case '\'':
- int beginLine = line;
- b = new StringBuilder();
- b.append(c);
- while ((ic = reader.read()) != c) {
- if (ic == -1) {
- break;
- }
- b.append((char) ic);
- if (ic == '\\') {
- int next = reader.read();
- if (next != -1) {
- b.append((char) next);
-
- if (next == '\n') {
- line++;
- }
- }
- } else if (ic == '\n') {
- line++;
- }
- }
- if (ic != -1) {
- b.append((char) ic);
- }
- tokenEntries.add(new TokenEntry(b.toString(), sourceCode.getFileName(), beginLine));
- ic = reader.read();
- break;
-
- // / /= /*...*/ //...
- case '/':
- switch (c = (char) (ic = reader.read())) {
- case '*':
- //int beginLine = line;
- int state = 1;
- b = new StringBuilder();
- b.append("/*");
-
- while ((ic = reader.read()) != -1) {
- c = (char) ic;
- b.append(c);
-
- if (c == '\n') {
- line++;
- }
-
- if (state == 1) {
- if (c == '*') {
- state = 2;
- }
- } else {
- if (c == '/') {
- ic = reader.read();
- break;
- } else if (c != '*') {
- state = 1;
- }
- }
- }
- // ignore the /* comment
- // tokenEntries.add(new TokenEntry(b.toString(),
- // sourceCode.getFileName(), beginLine));
break;
- case '/':
+ // white space
+ case ' ':
+ case '\t':
+ case '\r':
+ ic = reader.read();
+ break;
+
+ case ';':
+ return new Token(";", line);
+
+ // < << <= <<= > >> >= >>=
+ case '<':
+ case '>':
+ ic = reader.read();
+ if (ic == '=') {
+ return new Token(c + "=", line);
+ } else if (ic == c) {
+ ic = reader.read();
+ if (ic == '=') {
+ return new Token(c + c + "=", line);
+ } else {
+ reader.unread(ic);
+ return new Token(String.valueOf(c) + c, line);
+ }
+ } else {
+ reader.unread(ic);
+ return new Token(String.valueOf(c), line);
+ }
+
+ // = == & &= && | |= || + += ++ - -= --
+ case '=':
+ case '&':
+ case '|':
+ case '+':
+ case '-':
+ ic = reader.read();
+ if (ic == '=' || ic == c) {
+ return new Token(c + String.valueOf((char) ic), line);
+ } else {
+ reader.unread(ic);
+ return new Token(String.valueOf(c), line);
+ }
+
+ // ! != * *= % %= ^ ^= ~ ~=
+ case '!':
+ case '*':
+ case '%':
+ case '^':
+ case '~':
+ ic = reader.read();
+ if (ic == '=') {
+ return new Token(c + "=", line);
+ } else {
+ reader.unread(ic);
+ return new Token(String.valueOf(c), line);
+ }
+
+ // strings & chars
+ case '"':
+ case '\'':
+ int beginLine = line;
b = new StringBuilder();
- b.append("//");
- while ((ic = reader.read()) != '\n') {
+ b.append(c);
+ while ((ic = reader.read()) != c) {
if (ic == -1) {
break;
}
b.append((char) ic);
- }
- // ignore the // comment
- // tokenEntries.add(new TokenEntry(b.toString(),
- // sourceCode.getFileName(), line));
- break;
+ if (ic == '\\') {
+ int next = reader.read();
+ if (next != -1) {
+ b.append((char) next);
- case '=':
- tokenEntries.add(new TokenEntry("/=", sourceCode.getFileName(), line));
- ic = reader.read();
+ if (next == '\n') {
+ line++;
+ }
+ }
+ } else if (ic == '\n') {
+ line++;
+ }
+ }
+ if (ic != -1) {
+ b.append((char) ic);
+ }
+ return new Token(b.toString(), beginLine);
+
+ // / /= /*...*/ //...
+ case '/':
+ switch (c = (char) (ic = reader.read())) {
+ case '*':
+ //int beginLine = line;
+ int state = 1;
+ b = new StringBuilder();
+ b.append("/*");
+
+ while ((ic = reader.read()) != -1) {
+ c = (char) ic;
+ b.append(c);
+
+ if (c == '\n') {
+ line++;
+ }
+
+ if (state == 1) {
+ if (c == '*') {
+ state = 2;
+ }
+ } else {
+ if (c == '/') {
+ ic = reader.read();
+ break;
+ } else if (c != '*') {
+ state = 1;
+ }
+ }
+ }
+ // ignore the /* comment
+ // tokenEntries.add(new TokenEntry(b.toString(),
+ // sourceCode.getFileName(), beginLine));
+ break;
+
+ case '/':
+ b = new StringBuilder();
+ b.append("//");
+ while ((ic = reader.read()) != '\n') {
+ if (ic == -1) {
+ break;
+ }
+ b.append((char) ic);
+ }
+ // ignore the // comment
+ // tokenEntries.add(new TokenEntry(b.toString(),
+ // sourceCode.getFileName(), line));
+ break;
+
+ case '=':
+ return new Token("/=", line);
+
+ default:
+ reader.unread(ic);
+ return new Token("/", line);
+ }
break;
default:
- tokenEntries.add(new TokenEntry("/", sourceCode.getFileName(), line));
- break;
- }
- break;
-
- default:
- // [a-zA-Z_][a-zA-Z_0-9]*
- if (Character.isJavaIdentifierStart(c)) {
- b = new StringBuilder();
- do {
- b.append(c);
- c = (char) (ic = reader.read());
- } while (Character.isJavaIdentifierPart(c));
- tokenEntries.add(new TokenEntry(b.toString(), sourceCode.getFileName(), line));
- }
- // numbers
- else if (Character.isDigit(c) || c == '.') {
- b = new StringBuilder();
- do {
- b.append(c);
- if (c == 'e' || c == 'E') {
- c = (char) (ic = reader.read());
- if ("1234567890-".indexOf(c) == -1) {
- break;
- }
+ // [a-zA-Z_][a-zA-Z_0-9]*
+ if (Character.isJavaIdentifierStart(c)) {
+ b = new StringBuilder();
+ do {
b.append(c);
- }
- c = (char) (ic = reader.read());
- } while ("1234567890.iIlLfFdDsSuUeExX".indexOf(c) != -1);
-
- tokenEntries.add(new TokenEntry(b.toString(), sourceCode.getFileName(), line));
- }
- // anything else
- else {
- tokenEntries.add(new TokenEntry(String.valueOf(c), sourceCode.getFileName(), line));
- ic = reader.read();
- break;
+ c = (char) (ic = reader.read());
+ } while (Character.isJavaIdentifierPart(c));
+ reader.unread(ic);
+ return new Token(b.toString(), line);
+ }
+ // numbers
+ else if (Character.isDigit(c) || c == '.') {
+ b = new StringBuilder();
+ do {
+ b.append(c);
+ if (c == 'e' || c == 'E') {
+ c = (char) (ic = reader.read());
+ if ("1234567890-".indexOf(c) == -1) {
+ break;
+ }
+ b.append(c);
+ }
+ c = (char) (ic = reader.read());
+ } while ("1234567890.iIlLfFdDsSuUeExX".indexOf(c) != -1);
+ reader.unread(ic);
+ return new Token(b.toString(), line);
+ }
+ // anything else
+ else {
+ return new Token(String.valueOf(c), line);
+ }
}
}
+ } catch (IOException e) {
+ e.printStackTrace();
}
- } catch (IOException e) {
- e.printStackTrace();
- } finally {
- IOUtils.closeQuietly(reader);
- tokenEntries.add(TokenEntry.getEOF());
+ endOfFile = true;
+ return Token.EOF;
+ }
+
+ @Override
+ public void close() throws IOException {
+ reader.close();
+ }
+ }
+
+ private static class Token {
+ public static final Token EOF = new Token("EOF", -1);
+
+ public final String image;
+ public final int lineNumber;
+
+ public Token(String image, int lineNumber) {
+ this.image = image;
+ this.lineNumber = lineNumber;
}
}
}
diff --git a/pmd-cs/src/test/java/net/sourceforge/pmd/cpd/CsTokenizerTest.java b/pmd-cs/src/test/java/net/sourceforge/pmd/cpd/CsTokenizerTest.java
index 072be103db..9c1a9a9014 100644
--- a/pmd-cs/src/test/java/net/sourceforge/pmd/cpd/CsTokenizerTest.java
+++ b/pmd-cs/src/test/java/net/sourceforge/pmd/cpd/CsTokenizerTest.java
@@ -5,6 +5,7 @@
package net.sourceforge.pmd.cpd;
import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotEquals;
import java.util.List;
@@ -13,132 +14,152 @@ import org.junit.Test;
public class CsTokenizerTest {
- private CsTokenizer tokenizer = new CsTokenizer();
+ private CsTokenizer tokenizer;
private Tokens tokens;
@Before
public void init() {
- tokens = new Tokens();
- TokenEntry.clearImages();
+ tokenizer = new CsTokenizer();
+ tokens = new Tokens();
+ TokenEntry.clearImages();
}
@Test
public void testSimpleClass() {
- tokenizer.tokenize(toSourceCode("class Foo {}"), tokens);
- assertEquals(5, tokens.size());
+ tokenizer.tokenize(toSourceCode("class Foo {}"), tokens);
+ assertEquals(5, tokens.size());
}
@Test
public void testSimpleClassDuplicatedTokens() {
- tokenizer.tokenize(toSourceCode("class Foo { class Foo { } }"), tokens);
- assertEquals(9, tokens.size());
- List