From a96ccf97417ee0e648b3c073825a76592c0adb6c Mon Sep 17 00:00:00 2001 From: Andreas Dangel Date: Sun, 14 Apr 2013 13:33:00 +0200 Subject: [PATCH] pmd: fix #1082 CPD performance issue on larger projects --- pmd/etc/changelog.txt | 1 + .../sourceforge/pmd/cpd/JavaTokenizer.java | 175 +++++++++--------- .../pmd/cpd/JavaTokensTokenizerTest.java | 35 +++- 3 files changed, 119 insertions(+), 92 deletions(-) diff --git a/pmd/etc/changelog.txt b/pmd/etc/changelog.txt index fab003d122..c61eafe26c 100644 --- a/pmd/etc/changelog.txt +++ b/pmd/etc/changelog.txt @@ -1,6 +1,7 @@ ????? ??, 2013 - 5.0.4: Fixed bug 1081: Regression: CPD skipping all files when using relative paths +Fixed bug 1082: CPD performance issue on larger projects April 5, 2013 - 5.0.3: diff --git a/pmd/src/main/java/net/sourceforge/pmd/cpd/JavaTokenizer.java b/pmd/src/main/java/net/sourceforge/pmd/cpd/JavaTokenizer.java index 9446032d7f..954023ded9 100644 --- a/pmd/src/main/java/net/sourceforge/pmd/cpd/JavaTokenizer.java +++ b/pmd/src/main/java/net/sourceforge/pmd/cpd/JavaTokenizer.java @@ -4,7 +4,7 @@ package net.sourceforge.pmd.cpd; import java.io.StringReader; -import java.util.*; +import java.util.Properties; import net.sourceforge.pmd.lang.LanguageVersion; import net.sourceforge.pmd.lang.LanguageVersionHandler; @@ -23,8 +23,6 @@ public class JavaTokenizer implements Tokenizer { private boolean ignoreAnnotations; private boolean ignoreLiterals; private boolean ignoreIdentifiers; - List discarders = new ArrayList(); - public void setProperties(Properties properties) { ignoreAnnotations = Boolean.parseBoolean(properties.getProperty(IGNORE_ANNOTATIONS, "false")); @@ -42,22 +40,17 @@ public class JavaTokenizer implements Tokenizer { fileName, new StringReader(stringBuilder.toString())); Token currentToken = (Token) tokenMgr.getNextToken(); - initDiscarders(); + TokenDiscarder discarder = new TokenDiscarder(ignoreAnnotations); while (currentToken.image.length() > 0) { - for (Discarder discarder : discarders) { - discarder.add(currentToken); - } + discarder.updateState(currentToken); - if (inDiscardingState()) { + if (discarder.isDiscarding()) { currentToken = (Token) tokenMgr.getNextToken(); continue; } - //skip semicolons - if (currentToken.kind != JavaParserConstants.SEMICOLON) { - processToken(tokenEntries, fileName, currentToken); - } + processToken(tokenEntries, fileName, currentToken); currentToken = (Token) tokenMgr.getNextToken(); } tokenEntries.add(TokenEntry.getEOF()); @@ -68,7 +61,8 @@ public class JavaTokenizer implements Tokenizer { if (ignoreLiterals && (currentToken.kind == JavaParserConstants.STRING_LITERAL || currentToken.kind == JavaParserConstants.CHARACTER_LITERAL - || currentToken.kind == JavaParserConstants.DECIMAL_LITERAL || currentToken.kind == JavaParserConstants.FLOATING_POINT_LITERAL)) { + || currentToken.kind == JavaParserConstants.DECIMAL_LITERAL + || currentToken.kind == JavaParserConstants.FLOATING_POINT_LITERAL)) { image = String.valueOf(currentToken.kind); } if (ignoreIdentifiers && currentToken.kind == JavaParserConstants.IDENTIFIER) { @@ -77,23 +71,6 @@ public class JavaTokenizer implements Tokenizer { tokenEntries.add(new TokenEntry(image, fileName, currentToken.beginLine)); } - private void initDiscarders() { - if (ignoreAnnotations) - discarders.add(new AnnotationStateDiscarder()); - discarders.add(new SuppressCPDDiscarder()); - discarders.add(new KeyWordToSemiColonStateDiscarder(JavaParserConstants.IMPORT)); - discarders.add(new KeyWordToSemiColonStateDiscarder(JavaParserConstants.PACKAGE)); - } - - private boolean inDiscardingState() { - boolean discarding = false; - for (Discarder discarder : discarders) { - if (discarder.isDiscarding()) - discarding = true; - } - return discarding; - } - public void setIgnoreLiterals(boolean ignore) { this.ignoreLiterals = ignore; } @@ -106,83 +83,101 @@ public class JavaTokenizer implements Tokenizer { this.ignoreAnnotations = ignoreAnnotations; } - static public interface Discarder { - public void add(Token token); + /** + * The {@link TokenDiscarder} consumes token by token and maintains state. + * It can detect, whether the current token belongs to an annotation and whether + * the current token should be discarded by CPD. + *

+ * By default, it discards semicolons, package and import statements, and enables CPD suppression. + * Optionally, all annotations can be ignored, too. + *

+ */ + private static class TokenDiscarder { + private boolean isAnnotation = false; + private boolean nextTokenEndsAnnotation = false; + private int annotationStack = 0; - public boolean isDiscarding(); - } + private boolean discardingSemicolon = false; + private boolean discardingKeywords = false; + private boolean discardingSuppressing = false; + private boolean discardingAnnotations = false; + private boolean ignoreAnnotations = false; - static public class AnnotationStateDiscarder implements Discarder { + public TokenDiscarder(boolean ignoreAnnotations) { + this.ignoreAnnotations = ignoreAnnotations; + } - Stack tokenStack = new Stack(); + public void updateState(Token currentToken) { + detectAnnotations(currentToken); - public void add(Token token) { - if (isDiscarding() && tokenStack.size() == 2 && token.kind != JavaParserConstants.LPAREN) { - tokenStack.clear(); - } - - if (token.kind == JavaParserConstants.AT && !isDiscarding()) { - tokenStack.push(token); - return; - } - if (token.kind == JavaParserConstants.RPAREN && isDiscarding()) { - Token popped = null; - while ((popped = tokenStack.pop()).kind != JavaParserConstants.LPAREN) ; - return; - - } else { - if (isDiscarding()) - tokenStack.push(token); + skipSemicolon(currentToken); + skipPackageAndImport(currentToken); + skipCPDSuppression(currentToken); + if (ignoreAnnotations) { + skipAnnotations(); } } - public boolean isDiscarding() { - return !tokenStack.isEmpty(); + public void skipPackageAndImport(Token currentToken) { + if (currentToken.kind == JavaParserConstants.PACKAGE || currentToken.kind == JavaParserConstants.IMPORT) { + discardingKeywords = true; + } else if (discardingKeywords && currentToken.kind == JavaParserConstants.SEMICOLON) { + discardingKeywords = false; + } } - } - - static public class KeyWordToSemiColonStateDiscarder implements Discarder { - - private final int keyword; - Stack tokenStack = new Stack(); - - public KeyWordToSemiColonStateDiscarder(int keyword) { - this.keyword = keyword; + public void skipSemicolon(Token currentToken) { + if (currentToken.kind == JavaParserConstants.SEMICOLON) { + discardingSemicolon = true; + } else if (discardingSemicolon && currentToken.kind != JavaParserConstants.SEMICOLON) { + discardingSemicolon = false; + } } - public void add(Token token) { - if (token.kind == keyword) - tokenStack.add(token); - if (token.kind == JavaParserConstants.SEMICOLON && isDiscarding()) - tokenStack.clear(); - } - - public boolean isDiscarding() { - return !tokenStack.isEmpty(); - } - - } - - static public class SuppressCPDDiscarder implements Discarder { - AnnotationStateDiscarder asm = new AnnotationStateDiscarder(); - Boolean discarding = false; - - public void add(Token token) { - asm.add(token); + public void skipCPDSuppression(Token currentToken) { //if processing an annotation, look for a CPD-START or CPD-END - if (asm.isDiscarding()) { - if (CPD_START.equals(token.image)) - discarding = true; - if (CPD_END.equals(token.image) && discarding) - discarding = false; + if (isAnnotation) { + if (!discardingSuppressing && currentToken.kind == JavaParserConstants.STRING_LITERAL && CPD_START.equals(currentToken.image)) { + discardingSuppressing = true; + } else if (discardingSuppressing && currentToken.kind == JavaParserConstants.STRING_LITERAL && CPD_END.equals(currentToken.image)) { + discardingSuppressing = false; + } + } + } + + public void skipAnnotations() { + if (!discardingAnnotations && isAnnotation) { + discardingAnnotations = true; + } else if (discardingAnnotations && !isAnnotation) { + discardingAnnotations = false; } } public boolean isDiscarding() { - return discarding; + boolean result = discardingSemicolon || discardingKeywords || discardingAnnotations || discardingSuppressing; + return result; } + public void detectAnnotations(Token currentToken) { + if (isAnnotation && nextTokenEndsAnnotation) { + isAnnotation = false; + nextTokenEndsAnnotation = false; + } + if (isAnnotation) { + if (currentToken.kind == JavaParserConstants.LPAREN) { + annotationStack++; + } else if (currentToken.kind == JavaParserConstants.RPAREN) { + annotationStack--; + if (annotationStack == 0) { + nextTokenEndsAnnotation = true; + } + } else if (annotationStack == 0 && currentToken.kind != JavaParserConstants.IDENTIFIER && currentToken.kind != JavaParserConstants.LPAREN) { + isAnnotation = false; + } + } + if (currentToken.kind == JavaParserConstants.AT) { + isAnnotation = true; + } + } } - } diff --git a/pmd/src/test/java/net/sourceforge/pmd/cpd/JavaTokensTokenizerTest.java b/pmd/src/test/java/net/sourceforge/pmd/cpd/JavaTokensTokenizerTest.java index 8543fba061..d35c6ac4d1 100644 --- a/pmd/src/test/java/net/sourceforge/pmd/cpd/JavaTokensTokenizerTest.java +++ b/pmd/src/test/java/net/sourceforge/pmd/cpd/JavaTokensTokenizerTest.java @@ -134,8 +134,6 @@ public class JavaTokensTokenizerTest { Tokens tokens = new Tokens(); t.tokenize(sourceCode, tokens); TokenEntry.getEOF(); - System.out.println(sourceCode.getSlice(0,5)); - assertEquals(6, tokens.size()); } @@ -169,7 +167,40 @@ public class JavaTokensTokenizerTest { t.tokenize(sourceCode, tokens); TokenEntry.getEOF(); assertEquals(10, tokens.size()); + } + + @Test + public void testIgnoreBetweenSpecialAnnotationAndIgnoreAnnotations() throws Throwable { + JavaTokenizer t = new JavaTokenizer(); + t.setIgnoreAnnotations(true); + SourceCode sourceCode = new SourceCode( + new SourceCode.StringCodeLoader( + "package foo.bar.baz;" + + PMD.EOL + + "@SuppressWarnings({\"woof\",\"CPD-START\"})" + + PMD.EOL + + "@SuppressWarnings(\"CPD-START\")" + + PMD.EOL + + + "@ MyAnnotation (\"ugh\")" + + PMD.EOL + + "@NamedQueries({" + + PMD.EOL + + "@NamedQuery(" + + PMD.EOL + + ")})" + + PMD.EOL + + "public class Foo {}" + + PMD.EOL + + "@SuppressWarnings({\"ugh\",\"CPD-END\"})" + + PMD.EOL + + )); + Tokens tokens = new Tokens(); + t.tokenize(sourceCode, tokens); + TokenEntry.getEOF(); + assertEquals(1, tokens.size()); }