diff --git a/pmd-cs/src/main/java/net/sourceforge/pmd/cpd/CsLanguage.java b/pmd-cs/src/main/java/net/sourceforge/pmd/cpd/CsLanguage.java index 2926b9521b..f6a1bd145f 100644 --- a/pmd-cs/src/main/java/net/sourceforge/pmd/cpd/CsLanguage.java +++ b/pmd-cs/src/main/java/net/sourceforge/pmd/cpd/CsLanguage.java @@ -3,15 +3,24 @@ */ package net.sourceforge.pmd.cpd; +import java.util.Properties; + /** * Language implementation for C# */ public class CsLanguage extends AbstractLanguage { - /** - * Creates a new C# Language instance. - */ public CsLanguage() { + this(System.getProperties()); + } + + public CsLanguage(Properties properties) { super("C#", "cs", new CsTokenizer(), ".cs"); + setProperties(properties); + } + + public final void setProperties(Properties properties) { + CsTokenizer tokenizer = (CsTokenizer)getTokenizer(); + tokenizer.setProperties(properties); } } diff --git a/pmd-cs/src/main/java/net/sourceforge/pmd/cpd/CsTokenizer.java b/pmd-cs/src/main/java/net/sourceforge/pmd/cpd/CsTokenizer.java index 2ccdccc3c5..ae68ce1a0d 100644 --- a/pmd-cs/src/main/java/net/sourceforge/pmd/cpd/CsTokenizer.java +++ b/pmd-cs/src/main/java/net/sourceforge/pmd/cpd/CsTokenizer.java @@ -5,9 +5,13 @@ package net.sourceforge.pmd.cpd; import java.io.BufferedReader; import java.io.CharArrayReader; +import java.io.Closeable; import java.io.IOException; +import java.io.PushbackReader; +import java.util.Properties; import org.apache.commons.io.IOUtils; +import org.apache.commons.lang3.RandomStringUtils; /** * This class does a best-guess try-anything tokenization. @@ -16,220 +20,289 @@ import org.apache.commons.io.IOUtils; */ public class CsTokenizer implements Tokenizer { + public static final String IGNORE_USINGS = "ignore_usings"; + + private boolean ignoreUsings = false; + + public void setProperties(Properties properties) { + if (properties.containsKey(IGNORE_USINGS)) { + ignoreUsings = Boolean.parseBoolean(properties.getProperty(IGNORE_USINGS, "false")); + } + } + @Override public void tokenize(SourceCode sourceCode, Tokens tokenEntries) { - BufferedReader reader = new BufferedReader(new CharArrayReader(sourceCode.getCodeBuffer().toString() - .toCharArray())); - try { - int ic = reader.read(); - int line = 1; - char c; - StringBuilder b; - while (ic != -1) { - c = (char) ic; - switch (c) { - // new line - case '\n': - line++; - ic = reader.read(); - break; + Tokenizer tokenizer = + new Tokenizer(sourceCode.getCodeBuffer().toString()); + Token token = tokenizer.getNextToken(); - // white space - case ' ': - case '\t': - case '\r': - ic = reader.read(); - break; + while (!token.equals(Token.EOF)) { + Token lookAhead = tokenizer.getNextToken(); - // ignore semicolons - case ';': - ic = reader.read(); - break; + // Ignore using directives + // Only using directives should be ignored, because these are used to import namespaces + // + // Using directive: 'using System.Math;' + // Using statement: 'using (Font font1 = new Font(..)) { .. }' + if (ignoreUsings && + "using".equals(token.image) && + !"(".equals(lookAhead.image) + ) { + // We replace the 'using' token by a random token, because it should not be part of + // any duplication block. When we omit it from the token stream, there is a change that + // we get a duplication block that starts before the 'using' directives and ends afterwards. + String randomTokenText = + RandomStringUtils.randomAlphanumeric(20); - // < << <= <<= > >> >= >>= - case '<': - case '>': - ic = reader.read(); - if (ic == '=') { - tokenEntries.add(new TokenEntry(c + "=", sourceCode.getFileName(), line)); + token = new Token(randomTokenText, token.lineNumber); + //Skip all other tokens of the using directive to prevent a partial matching + while (!";".equals(lookAhead.image) && !lookAhead.equals(Token.EOF)) { + lookAhead = tokenizer.getNextToken(); + } + } + if (!";".equals(token.image)) { + tokenEntries.add(new TokenEntry(token.image, sourceCode.getFileName(), token.lineNumber)); + } + token = lookAhead; + } + tokenEntries.add(TokenEntry.getEOF()); + IOUtils.closeQuietly(tokenizer); + } + + public void setIgnoreUsings(boolean ignoreUsings) { + this.ignoreUsings = ignoreUsings; + } + + + private static class Tokenizer implements Closeable { + private boolean endOfFile; + private int line; + private final PushbackReader reader; + + public Tokenizer(String sourceCode) { + endOfFile = false; + line = 1; + reader = new PushbackReader(new BufferedReader(new CharArrayReader(sourceCode.toCharArray()))); + } + + public Token getNextToken() { + if (endOfFile) { + return Token.EOF; + } + + try { + int ic = reader.read(); + char c; + StringBuilder b; + while (ic != -1) { + c = (char) ic; + switch (c) { + // new line + case '\n': + line++; ic = reader.read(); - } else if (ic == c) { - ic = reader.read(); - if (ic == '=') { - tokenEntries.add(new TokenEntry(c + c + "=", sourceCode - .getFileName(), line)); - ic = reader.read(); - } else { - tokenEntries.add(new TokenEntry(String.valueOf(c) + c, sourceCode - .getFileName(), line)); - } - } else { - tokenEntries.add(new TokenEntry(String.valueOf(c), sourceCode.getFileName(), line)); - } - break; - - // = == & &= && | |= || + += ++ - -= -- - case '=': - case '&': - case '|': - case '+': - case '-': - ic = reader.read(); - if (ic == '=' || ic == c) { - tokenEntries.add(new TokenEntry(c + String.valueOf((char) ic), sourceCode - .getFileName(), line)); - ic = reader.read(); - } else { - tokenEntries.add(new TokenEntry(String.valueOf(c), sourceCode.getFileName(), line)); - } - break; - - // ! != * *= % %= ^ ^= ~ ~= - case '!': - case '*': - case '%': - case '^': - case '~': - ic = reader.read(); - if (ic == '=') { - tokenEntries.add(new TokenEntry(c + "=", sourceCode.getFileName(), line)); - ic = reader.read(); - } else { - tokenEntries.add(new TokenEntry(String.valueOf(c), sourceCode.getFileName(), line)); - } - break; - - // strings & chars - case '"': - case '\'': - int beginLine = line; - b = new StringBuilder(); - b.append(c); - while ((ic = reader.read()) != c) { - if (ic == -1) { - break; - } - b.append((char) ic); - if (ic == '\\') { - int next = reader.read(); - if (next != -1) { - b.append((char) next); - - if (next == '\n') { - line++; - } - } - } else if (ic == '\n') { - line++; - } - } - if (ic != -1) { - b.append((char) ic); - } - tokenEntries.add(new TokenEntry(b.toString(), sourceCode.getFileName(), beginLine)); - ic = reader.read(); - break; - - // / /= /*...*/ //... - case '/': - switch (c = (char) (ic = reader.read())) { - case '*': - //int beginLine = line; - int state = 1; - b = new StringBuilder(); - b.append("/*"); - - while ((ic = reader.read()) != -1) { - c = (char) ic; - b.append(c); - - if (c == '\n') { - line++; - } - - if (state == 1) { - if (c == '*') { - state = 2; - } - } else { - if (c == '/') { - ic = reader.read(); - break; - } else if (c != '*') { - state = 1; - } - } - } - // ignore the /* comment - // tokenEntries.add(new TokenEntry(b.toString(), - // sourceCode.getFileName(), beginLine)); break; - case '/': + // white space + case ' ': + case '\t': + case '\r': + ic = reader.read(); + break; + + case ';': + return new Token(";", line); + + // < << <= <<= > >> >= >>= + case '<': + case '>': + ic = reader.read(); + if (ic == '=') { + return new Token(c + "=", line); + } else if (ic == c) { + ic = reader.read(); + if (ic == '=') { + return new Token(c + c + "=", line); + } else { + reader.unread(ic); + return new Token(String.valueOf(c) + c, line); + } + } else { + reader.unread(ic); + return new Token(String.valueOf(c), line); + } + + // = == & &= && | |= || + += ++ - -= -- + case '=': + case '&': + case '|': + case '+': + case '-': + ic = reader.read(); + if (ic == '=' || ic == c) { + return new Token(c + String.valueOf((char) ic), line); + } else { + reader.unread(ic); + return new Token(String.valueOf(c), line); + } + + // ! != * *= % %= ^ ^= ~ ~= + case '!': + case '*': + case '%': + case '^': + case '~': + ic = reader.read(); + if (ic == '=') { + return new Token(c + "=", line); + } else { + reader.unread(ic); + return new Token(String.valueOf(c), line); + } + + // strings & chars + case '"': + case '\'': + int beginLine = line; b = new StringBuilder(); - b.append("//"); - while ((ic = reader.read()) != '\n') { + b.append(c); + while ((ic = reader.read()) != c) { if (ic == -1) { break; } b.append((char) ic); - } - // ignore the // comment - // tokenEntries.add(new TokenEntry(b.toString(), - // sourceCode.getFileName(), line)); - break; + if (ic == '\\') { + int next = reader.read(); + if (next != -1) { + b.append((char) next); - case '=': - tokenEntries.add(new TokenEntry("/=", sourceCode.getFileName(), line)); - ic = reader.read(); + if (next == '\n') { + line++; + } + } + } else if (ic == '\n') { + line++; + } + } + if (ic != -1) { + b.append((char) ic); + } + return new Token(b.toString(), beginLine); + + // / /= /*...*/ //... + case '/': + switch (c = (char) (ic = reader.read())) { + case '*': + //int beginLine = line; + int state = 1; + b = new StringBuilder(); + b.append("/*"); + + while ((ic = reader.read()) != -1) { + c = (char) ic; + b.append(c); + + if (c == '\n') { + line++; + } + + if (state == 1) { + if (c == '*') { + state = 2; + } + } else { + if (c == '/') { + ic = reader.read(); + break; + } else if (c != '*') { + state = 1; + } + } + } + // ignore the /* comment + // tokenEntries.add(new TokenEntry(b.toString(), + // sourceCode.getFileName(), beginLine)); + break; + + case '/': + b = new StringBuilder(); + b.append("//"); + while ((ic = reader.read()) != '\n') { + if (ic == -1) { + break; + } + b.append((char) ic); + } + // ignore the // comment + // tokenEntries.add(new TokenEntry(b.toString(), + // sourceCode.getFileName(), line)); + break; + + case '=': + return new Token("/=", line); + + default: + reader.unread(ic); + return new Token("/", line); + } break; default: - tokenEntries.add(new TokenEntry("/", sourceCode.getFileName(), line)); - break; - } - break; - - default: - // [a-zA-Z_][a-zA-Z_0-9]* - if (Character.isJavaIdentifierStart(c)) { - b = new StringBuilder(); - do { - b.append(c); - c = (char) (ic = reader.read()); - } while (Character.isJavaIdentifierPart(c)); - tokenEntries.add(new TokenEntry(b.toString(), sourceCode.getFileName(), line)); - } - // numbers - else if (Character.isDigit(c) || c == '.') { - b = new StringBuilder(); - do { - b.append(c); - if (c == 'e' || c == 'E') { - c = (char) (ic = reader.read()); - if ("1234567890-".indexOf(c) == -1) { - break; - } + // [a-zA-Z_][a-zA-Z_0-9]* + if (Character.isJavaIdentifierStart(c)) { + b = new StringBuilder(); + do { b.append(c); - } - c = (char) (ic = reader.read()); - } while ("1234567890.iIlLfFdDsSuUeExX".indexOf(c) != -1); - - tokenEntries.add(new TokenEntry(b.toString(), sourceCode.getFileName(), line)); - } - // anything else - else { - tokenEntries.add(new TokenEntry(String.valueOf(c), sourceCode.getFileName(), line)); - ic = reader.read(); - break; + c = (char) (ic = reader.read()); + } while (Character.isJavaIdentifierPart(c)); + reader.unread(ic); + return new Token(b.toString(), line); + } + // numbers + else if (Character.isDigit(c) || c == '.') { + b = new StringBuilder(); + do { + b.append(c); + if (c == 'e' || c == 'E') { + c = (char) (ic = reader.read()); + if ("1234567890-".indexOf(c) == -1) { + break; + } + b.append(c); + } + c = (char) (ic = reader.read()); + } while ("1234567890.iIlLfFdDsSuUeExX".indexOf(c) != -1); + reader.unread(ic); + return new Token(b.toString(), line); + } + // anything else + else { + return new Token(String.valueOf(c), line); + } } } + } catch (IOException e) { + e.printStackTrace(); } - } catch (IOException e) { - e.printStackTrace(); - } finally { - IOUtils.closeQuietly(reader); - tokenEntries.add(TokenEntry.getEOF()); + endOfFile = true; + return Token.EOF; + } + + @Override + public void close() throws IOException { + reader.close(); + } + } + + private static class Token { + public static final Token EOF = new Token("EOF", -1); + + public final String image; + public final int lineNumber; + + public Token(String image, int lineNumber) { + this.image = image; + this.lineNumber = lineNumber; } } } diff --git a/pmd-cs/src/test/java/net/sourceforge/pmd/cpd/CsTokenizerTest.java b/pmd-cs/src/test/java/net/sourceforge/pmd/cpd/CsTokenizerTest.java index 072be103db..9c1a9a9014 100644 --- a/pmd-cs/src/test/java/net/sourceforge/pmd/cpd/CsTokenizerTest.java +++ b/pmd-cs/src/test/java/net/sourceforge/pmd/cpd/CsTokenizerTest.java @@ -5,6 +5,7 @@ package net.sourceforge.pmd.cpd; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotEquals; import java.util.List; @@ -13,132 +14,152 @@ import org.junit.Test; public class CsTokenizerTest { - private CsTokenizer tokenizer = new CsTokenizer(); + private CsTokenizer tokenizer; private Tokens tokens; @Before public void init() { - tokens = new Tokens(); - TokenEntry.clearImages(); + tokenizer = new CsTokenizer(); + tokens = new Tokens(); + TokenEntry.clearImages(); } @Test public void testSimpleClass() { - tokenizer.tokenize(toSourceCode("class Foo {}"), tokens); - assertEquals(5, tokens.size()); + tokenizer.tokenize(toSourceCode("class Foo {}"), tokens); + assertEquals(5, tokens.size()); } @Test public void testSimpleClassDuplicatedTokens() { - tokenizer.tokenize(toSourceCode("class Foo { class Foo { } }"), tokens); - assertEquals(9, tokens.size()); - List tokenList = tokens.getTokens(); - assertEquals(tokenList.get(0).getIdentifier(), tokenList.get(3).getIdentifier()); - assertEquals(tokenList.get(1).getIdentifier(), tokenList.get(4).getIdentifier()); - assertEquals(tokenList.get(2).getIdentifier(), tokenList.get(5).getIdentifier()); - assertEquals(tokenList.get(6).getIdentifier(), tokenList.get(7).getIdentifier()); + tokenizer.tokenize(toSourceCode("class Foo { class Foo { } }"), tokens); + assertEquals(9, tokens.size()); + List tokenList = tokens.getTokens(); + assertEquals(tokenList.get(0).getIdentifier(), tokenList.get(3).getIdentifier()); + assertEquals(tokenList.get(1).getIdentifier(), tokenList.get(4).getIdentifier()); + assertEquals(tokenList.get(2).getIdentifier(), tokenList.get(5).getIdentifier()); + assertEquals(tokenList.get(6).getIdentifier(), tokenList.get(7).getIdentifier()); } @Test public void testSimpleClassMethodMultipleLines() { - tokenizer.tokenize(toSourceCode( - "class Foo {\n" - + " public String foo(int a) {\n" - + " int i = a;\n" - + " return \"x\" + a;\n" - + " }\n" - + "}"), tokens); - assertEquals(22, tokens.size()); - List tokenList = tokens.getTokens(); - assertEquals(1, tokenList.get(0).getBeginLine()); - assertEquals(2, tokenList.get(4).getBeginLine()); - assertEquals(3, tokenList.get(11).getBeginLine()); + tokenizer.tokenize(toSourceCode( + "class Foo {\n" + + " public String foo(int a) {\n" + + " int i = a;\n" + + " return \"x\" + a;\n" + + " }\n" + + "}"), tokens); + assertEquals(22, tokens.size()); + List tokenList = tokens.getTokens(); + assertEquals(1, tokenList.get(0).getBeginLine()); + assertEquals(2, tokenList.get(4).getBeginLine()); + assertEquals(3, tokenList.get(11).getBeginLine()); } @Test public void testStrings() { - tokenizer.tokenize(toSourceCode("String s =\"aaa \\\"b\\n\";"), tokens); - assertEquals(5, tokens.size()); + tokenizer.tokenize(toSourceCode("String s =\"aaa \\\"b\\n\";"), tokens); + assertEquals(5, tokens.size()); } @Test public void testOpenString() { - tokenizer.tokenize(toSourceCode("String s =\"aaa \\\"b\\"), tokens); - assertEquals(5, tokens.size()); + tokenizer.tokenize(toSourceCode("String s =\"aaa \\\"b\\"), tokens); + assertEquals(5, tokens.size()); } @Test public void testCommentsIgnored1() { - tokenizer.tokenize(toSourceCode("class Foo { /* class * ** X */ }"), tokens); - assertEquals(5, tokens.size()); + tokenizer.tokenize(toSourceCode("class Foo { /* class * ** X */ }"), tokens); + assertEquals(5, tokens.size()); } @Test public void testCommentsIgnored2() { - tokenizer.tokenize(toSourceCode("class Foo { // class X /* aaa */ \n }"), tokens); - assertEquals(5, tokens.size()); + tokenizer.tokenize(toSourceCode("class Foo { // class X /* aaa */ \n }"), tokens); + assertEquals(5, tokens.size()); } @Test public void testCommentsIgnored3() { - tokenizer.tokenize(toSourceCode("class Foo { /// class X /* aaa */ \n }"), tokens); - assertEquals(5, tokens.size()); + tokenizer.tokenize(toSourceCode("class Foo { /// class X /* aaa */ \n }"), tokens); + assertEquals(5, tokens.size()); } @Test public void testMoreTokens() { - tokenizer.tokenize(toSourceCode( - "class Foo {\n" - + " void bar() {\n" - + " int a = 1 >> 2; \n" - + " a += 1; \n" - + " a++; \n" - + " a /= 3e2; \n" - + " float f = -3.1; \n" - + " f *= 2; \n" - + " bool b = ! (f == 2.0 || f >= 1.0 && f <= 2.0) \n" - + " }\n" - + "}" - ), tokens); - assertEquals(50, tokens.size()); + tokenizer.tokenize(toSourceCode( + "class Foo {\n" + + " void bar() {\n" + + " int a = 1 >> 2; \n" + + " a += 1; \n" + + " a++; \n" + + " a /= 3e2; \n" + + " float f = -3.1; \n" + + " f *= 2; \n" + + " bool b = ! (f == 2.0 || f >= 1.0 && f <= 2.0) \n" + + " }\n" + + "}" + ), tokens); + assertEquals(50, tokens.size()); } @Test public void testLineNumberAfterMultilineComment() { - tokenizer.tokenize(toSourceCode( - "/* This is a multiline comment \n" - + " * \n" - + " * Lorem ipsum dolor sit amet, \n" - + " * consectetur adipiscing elit \n" - + " */\n" - + "\n" - + "class Foo {\n" - + "\n" - + "}" - ), tokens); - assertEquals(5, tokens.size()); - assertEquals(7, tokens.getTokens().get(0).getBeginLine()); + tokenizer.tokenize(toSourceCode( + "/* This is a multiline comment \n" + + " * \n" + + " * Lorem ipsum dolor sit amet, \n" + + " * consectetur adipiscing elit \n" + + " */\n" + + "\n" + + "class Foo {\n" + + "\n" + + "}" + ), tokens); + assertEquals(5, tokens.size()); + assertEquals(7, tokens.getTokens().get(0).getBeginLine()); } @Test public void testLineNumberAfterMultilineString() { - tokenizer.tokenize(toSourceCode( - "class Foo {\n" - + " void bar() {\n" - + " String query = \n" - + " @\"SELECT foo, bar\n" - + " FROM table \n" - + " WHERE id = 42\"; \n" - + " }\n" - + "}" - ), tokens); - assertEquals(16, tokens.size()); - assertEquals(8, tokens.getTokens().get(14).getBeginLine()); + tokenizer.tokenize(toSourceCode( + "class Foo {\n" + + " void bar() {\n" + + " String query = \n" + + " @\"SELECT foo, bar\n" + + " FROM table \n" + + " WHERE id = 42\"; \n" + + " }\n" + + "}" + ), tokens); + assertEquals(16, tokens.size()); + assertEquals(8, tokens.getTokens().get(14).getBeginLine()); + } + + @Test + public void testIgnoreUsingDirectives() { + tokenizer.setIgnoreUsings(true); + tokenizer.tokenize(toSourceCode("using System.Text;\n"), tokens); + assertNotEquals("using", tokens.getTokens().get(0).toString()); + assertEquals(2, tokens.size()); + } + + @Test + public void testUsingStatementsAreNotIgnored() { + tokenizer.setIgnoreUsings(true); + tokenizer.tokenize(toSourceCode( + "using (Font font1 = new Font(\"Arial\", 10.0f)) {\n" + + " byte charset = font1.GdiCharSet;\n" + + "}\n" + ), tokens); + assertEquals("using", tokens.getTokens().get(0).toString()); } private SourceCode toSourceCode(String source) { - return new SourceCode(new SourceCode.StringCodeLoader(source)); + return new SourceCode(new SourceCode.StringCodeLoader(source)); } }