diff --git a/pmd/src/main/java/net/sourceforge/pmd/cpd/CsLanguage.java b/pmd/src/main/java/net/sourceforge/pmd/cpd/CsLanguage.java new file mode 100644 index 0000000000..eebca47820 --- /dev/null +++ b/pmd/src/main/java/net/sourceforge/pmd/cpd/CsLanguage.java @@ -0,0 +1,10 @@ +/** + * BSD-style license; for more info see http://pmd.sourceforge.net/license.html + */ +package net.sourceforge.pmd.cpd; + +public class CsLanguage extends AbstractLanguage { + public CsLanguage() { + super(new CsTokenizer(), ".cs"); + } +} diff --git a/pmd/src/main/java/net/sourceforge/pmd/cpd/CsTokenizer.java b/pmd/src/main/java/net/sourceforge/pmd/cpd/CsTokenizer.java new file mode 100644 index 0000000000..bdc637c53e --- /dev/null +++ b/pmd/src/main/java/net/sourceforge/pmd/cpd/CsTokenizer.java @@ -0,0 +1,238 @@ +/** + * BSD-style license; for more info see http://pmd.sourceforge.net/license.html + */ +package net.sourceforge.pmd.cpd; + +import java.io.BufferedReader; +import java.io.CharArrayReader; +import java.io.IOException; + +import net.sourceforge.pmd.util.IOUtil; + +/** + * This class does a best-guess try-anything tokenization. + * + * @author jheintz + */ +public class CsTokenizer implements Tokenizer { + + public void tokenize(SourceCode sourceCode, Tokens tokenEntries) { + BufferedReader reader = new BufferedReader(new CharArrayReader(sourceCode.getCodeBuffer().toString().toCharArray())); + try { + int ic = reader.read(), line=1; + char c; + StringBuilder b; + while(ic!=-1) + { + c = (char)ic; + switch(c) + { + // new line + case '\n': + line++; + ic = reader.read(); + break; + + // white space + case ' ': + case '\t': + case '\r': + ic = reader.read(); + break; + + // ignore semicolons + case ';': + ic = reader.read(); + break; + + // < << <= <<= > >> >= >>= + case '<': + case '>': + ic = reader.read(); + if(ic == '=') + { + tokenEntries.add(new TokenEntry(String.valueOf(c)+"=", sourceCode.getFileName(), line)); + ic = reader.read(); + } + else if(ic == c) + { + ic = reader.read(); + if(ic == '=') + { + tokenEntries.add(new TokenEntry(String.valueOf(c)+String.valueOf(c)+"=", sourceCode.getFileName(), line)); + ic = reader.read(); + } + else + { + tokenEntries.add(new TokenEntry(String.valueOf(c)+String.valueOf(c), sourceCode.getFileName(), line)); + } + } + else + { + tokenEntries.add(new TokenEntry(String.valueOf(c), sourceCode.getFileName(), line)); + } + break; + + // = == & &= && | |= || + += ++ - -= -- + case '=': + case '&': + case '|': + case '+': + case '-': + ic = reader.read(); + if(ic == '=' || ic == c) + { + tokenEntries.add(new TokenEntry(String.valueOf(c)+String.valueOf((char)ic), sourceCode.getFileName(), line)); + ic = reader.read(); + } + else + { + tokenEntries.add(new TokenEntry(String.valueOf(c), sourceCode.getFileName(), line)); + } + break; + + // ! != * *= % %= ^ ^= ~ ~= + case '!': + case '*': + case '%': + case '^': + case '~': + ic = reader.read(); + if(ic == '=') + { + tokenEntries.add(new TokenEntry(String.valueOf(c)+"=", sourceCode.getFileName(), line)); + ic = reader.read(); + } + else + { + tokenEntries.add(new TokenEntry(String.valueOf(c), sourceCode.getFileName(), line)); + } + break; + + // strings & chars + case '"': + case '\'': + b = new StringBuilder(); + b.append(c); + while((ic = reader.read()) != c) + { + if(ic == -1) + break; + b.append((char)ic); + if(ic == '\\') { + int next = reader.read(); + if (next != -1) b.append((char)next); + } + } + if (ic != -1) b.append((char)ic); + tokenEntries.add(new TokenEntry(b.toString(), sourceCode.getFileName(), line)); + ic = reader.read(); + break; + + // / /= /*...*/ //... + case '/': + switch(c = (char)(ic = reader.read())) + { + case '*': + int state = 1; + b = new StringBuilder(); + b.append("/*"); + + while((ic = reader.read()) != -1) + { + c = (char)ic; + b.append(c); + + if(state==1) + { + if(c == '*') + state = 2; + } + else + { + if(c == '/') { + ic = reader.read(); + break; + } else if(c != '*') { + state = 1; + } + } + } + // ignore the /* comment + //tokenEntries.add(new TokenEntry(b.toString(), sourceCode.getFileName(), line)); + break; + + case '/': + b = new StringBuilder(); + b.append("//"); + while((ic = reader.read()) != '\n') + { + if(ic==-1) + break; + b.append((char)ic); + } + // ignore the // comment + //tokenEntries.add(new TokenEntry(b.toString(), sourceCode.getFileName(), line)); + break; + + case '=': + tokenEntries.add(new TokenEntry("/=", sourceCode.getFileName(), line)); + ic = reader.read(); + break; + + default: + tokenEntries.add(new TokenEntry("/", sourceCode.getFileName(), line)); + break; + } + break; + + + + default: + // [a-zA-Z_][a-zA-Z_0-9]* + if(Character.isJavaIdentifierStart(c)) + { + b = new StringBuilder(); + do + { + b.append(c); + c = (char)(ic = reader.read()); + } while(Character.isJavaIdentifierPart(c)); + tokenEntries.add(new TokenEntry(b.toString(), sourceCode.getFileName(), line)); + } + // numbers + else if(Character.isDigit(c) || c == '.') + { + b = new StringBuilder(); + do + { + b.append(c); + if(c == 'e' || c == 'E') + { + c = (char)(ic = reader.read()); + if("1234567890-".indexOf(c)==-1) + break; + b.append(c); + } + c = (char)(ic = reader.read()); + } while("1234567890.iIlLfFdDsSuUeExX".indexOf(c)!=-1); + + tokenEntries.add(new TokenEntry(b.toString(), sourceCode.getFileName(), line)); + } + // anything else + else + { + tokenEntries.add(new TokenEntry(String.valueOf(c), sourceCode.getFileName(), line)); + ic = reader.read(); + break; + } + } + } + } catch (IOException e) { + e.printStackTrace(); + } finally { + IOUtil.closeQuietly(reader); + tokenEntries.add(TokenEntry.getEOF()); + } + } +} diff --git a/pmd/src/site/xdocs/cpd-usage.xml b/pmd/src/site/xdocs/cpd-usage.xml index 32509815f6..2a61067ce3 100644 --- a/pmd/src/site/xdocs/cpd-usage.xml +++ b/pmd/src/site/xdocs/cpd-usage.xml @@ -22,7 +22,7 @@

Each rewrite made it much faster, and now it can process the JDK 1.4 java.* packages in about 4 seconds (on my workstation, at least).

Here's a screenshot of CPD after running on the JDK java.lang package.

-

Note that CPD works with Java, JSP, C, C++, Fortran and PHP code. Your own language is missing ? See how to add it here

+

Note that CPD works with Java, JSP, C, C++, C#, Fortran and PHP code. Your own language is missing ? See how to add it here

CPD is included with PMD, which you can download here. Or, if you have Java Web Start, you can run CPD by clicking here.

@@ -76,7 +76,7 @@ language - Flag to select the appropriate language (e.g. cpp, cs java, php, ruby, and ecmascript); defaults to java. + Flag to select the appropriate language (e.g. cpp, cs, java, php, ruby, and ecmascript); defaults to java. No diff --git a/pmd/src/test/java/net/sourceforge/pmd/cpd/CsTokenizerTest.java b/pmd/src/test/java/net/sourceforge/pmd/cpd/CsTokenizerTest.java new file mode 100644 index 0000000000..88e4de1883 --- /dev/null +++ b/pmd/src/test/java/net/sourceforge/pmd/cpd/CsTokenizerTest.java @@ -0,0 +1,111 @@ +/** + * BSD-style license; for more info see http://pmd.sourceforge.net/license.html + */ + +package net.sourceforge.pmd.cpd; + +import static org.junit.Assert.assertEquals; + +import java.util.List; + +import org.junit.Before; +import org.junit.Test; + +public class CsTokenizerTest { + + private CsTokenizer tokenizer = new CsTokenizer(); + + private Tokens tokens; + + @Before + public void init() { + tokens = new Tokens(); + TokenEntry.clearImages(); + } + + @Test + public void testSimpleClass() { + tokenizer.tokenize(toSourceCode("class Foo {}"), tokens); + assertEquals(5, tokens.size()); + } + + @Test + public void testSimpleClassDuplicatedTokens() { + tokenizer.tokenize(toSourceCode("class Foo { class Foo { } }"), tokens); + assertEquals(9, tokens.size()); + List tokenList = tokens.getTokens(); + assertEquals(tokenList.get(0).getIdentifier(), tokenList.get(3).getIdentifier()); + assertEquals(tokenList.get(1).getIdentifier(), tokenList.get(4).getIdentifier()); + assertEquals(tokenList.get(2).getIdentifier(), tokenList.get(5).getIdentifier()); + assertEquals(tokenList.get(6).getIdentifier(), tokenList.get(7).getIdentifier()); + } + + @Test + public void testSimpleClassMethodMultipleLines() { + tokenizer.tokenize(toSourceCode( + "class Foo {\n" + + " public String foo(int a) {\n" + + " int i = a;\n" + + " return \"x\" + a;\n" + + " }\n" + + "}"), tokens); + assertEquals(22, tokens.size()); + List tokenList = tokens.getTokens(); + assertEquals(1, tokenList.get(0).getBeginLine()); + assertEquals(2, tokenList.get(4).getBeginLine()); + assertEquals(3, tokenList.get(11).getBeginLine()); + } + + @Test + public void testStrings() { + tokenizer.tokenize(toSourceCode("String s =\"aaa \\\"b\\n\";"), tokens); + assertEquals(5, tokens.size()); + } + + @Test + public void testOpenString() { + tokenizer.tokenize(toSourceCode("String s =\"aaa \\\"b\\"), tokens); + assertEquals(5, tokens.size()); + } + + + @Test + public void testCommentsIgnored1() { + tokenizer.tokenize(toSourceCode("class Foo { /* class * ** X */ }"), tokens); + assertEquals(5, tokens.size()); + } + + @Test + public void testCommentsIgnored2() { + tokenizer.tokenize(toSourceCode("class Foo { // class X /* aaa */ \n }"), tokens); + assertEquals(5, tokens.size()); + } + + @Test + public void testCommentsIgnored3() { + tokenizer.tokenize(toSourceCode("class Foo { /// class X /* aaa */ \n }"), tokens); + assertEquals(5, tokens.size()); + } + + @Test + public void testMoreTokens() { + tokenizer.tokenize(toSourceCode( + "class Foo {\n" + + " void bar() {\n" + + " int a = 1 >> 2; \n" + + " a += 1; \n" + + " a++; \n" + + " a /= 3e2; \n" + + " float f = -3.1; \n" + + " f *= 2; \n" + + " bool b = ! (f == 2.0 || f >= 1.0 && f <= 2.0) \n" + + " }\n" + + "}" + ), tokens); + assertEquals(50, tokens.size()); + } + + private SourceCode toSourceCode(String source) { + return new SourceCode(new SourceCode.StringCodeLoader(source)); + } +} diff --git a/pmd/src/test/java/net/sourceforge/pmd/cpd/JavaTokensTokenizerTest.java b/pmd/src/test/java/net/sourceforge/pmd/cpd/JavaTokensTokenizerTest.java index ab36b2cd6f..75d214134e 100644 --- a/pmd/src/test/java/net/sourceforge/pmd/cpd/JavaTokensTokenizerTest.java +++ b/pmd/src/test/java/net/sourceforge/pmd/cpd/JavaTokensTokenizerTest.java @@ -5,10 +5,6 @@ package net.sourceforge.pmd.cpd; import static org.junit.Assert.assertEquals; import net.sourceforge.pmd.PMD; -import net.sourceforge.pmd.cpd.JavaTokenizer; -import net.sourceforge.pmd.cpd.SourceCode; -import net.sourceforge.pmd.cpd.Tokenizer; -import net.sourceforge.pmd.cpd.Tokens; import org.junit.Test; @@ -24,6 +20,15 @@ public class JavaTokensTokenizerTest { assertEquals("public class Foo {}", sourceCode.getSlice(1, 1)); } + @Test + public void testCommentsIgnored() throws Throwable { + Tokenizer tokenizer = new JavaTokenizer(); + SourceCode sourceCode = new SourceCode(new SourceCode.StringCodeLoader("public class Foo { // class Bar */ \n }")); + Tokens tokens = new Tokens(); + tokenizer.tokenize(sourceCode, tokens); + assertEquals(6, tokens.size()); + } + @Test public void test2() throws Throwable { Tokenizer t = new JavaTokenizer();