diff --git a/pmd-core/src/main/java/net/sourceforge/pmd/cpd/token/JavaCCTokenFilter.java b/pmd-core/src/main/java/net/sourceforge/pmd/cpd/token/JavaCCTokenFilter.java new file mode 100644 index 0000000000..0fb4a41a52 --- /dev/null +++ b/pmd-core/src/main/java/net/sourceforge/pmd/cpd/token/JavaCCTokenFilter.java @@ -0,0 +1,85 @@ +/** + * BSD-style license; for more info see http://pmd.sourceforge.net/license.html + */ + +package net.sourceforge.pmd.cpd.token; + +import net.sourceforge.pmd.lang.TokenManager; +import net.sourceforge.pmd.lang.ast.GenericToken; + +/** + * A generic filter for JavaCC-based token managers that allows to use comments + * to enable / disable analysis of parts of the stream + */ +public class JavaCCTokenFilter implements TokenFilter { + + private final TokenManager tokenManager; + private boolean discardingSuppressing; + + /** + * Creates a new JavaCCTokenFilter + * @param tokenManager The token manager from which to retrieve tokens to be filtered + */ + public JavaCCTokenFilter(final TokenManager tokenManager) { + this.tokenManager = tokenManager; + } + + @Override + public final GenericToken getNextToken() { + GenericToken currentToken = (GenericToken) tokenManager.getNextToken(); + while (!currentToken.getImage().isEmpty()) { + analyzeToken(currentToken); + processCPDSuppression(currentToken); + + if (isDiscarding()) { + currentToken = (GenericToken) tokenManager.getNextToken(); + continue; + } + + return currentToken; + } + + return null; + } + + private boolean isDiscarding() { + return discardingSuppressing || isLanguageSpecificDiscarding(); + } + + private void processCPDSuppression(final GenericToken currentToken) { + // Check if a comment is altering the suppression state + GenericToken comment = currentToken.getPreviousComment(); + while (comment != null) { + if (comment.getImage().contains("CPD-OFF")) { + discardingSuppressing = true; + break; + } + if (comment.getImage().contains("CPD-ON")) { + discardingSuppressing = false; + break; + } + comment = comment.getPreviousComment(); + } + } + + /** + * Extension point for subclasses to indicate tokens are to be filtered. + * + * @return True if tokens should be filtered, false otherwise + */ + protected boolean isLanguageSpecificDiscarding() { + return false; + } + + /** + * Extension point for subclasses to analyze all tokens (before filtering) + * and update internal status to decide on custom discard rules. + * + * @param currentToken The token to be analyzed + * @see #isLanguageSpecificDiscarding() + */ + protected void analyzeToken(final GenericToken currentToken) { + // noop + } + +} diff --git a/pmd-core/src/main/java/net/sourceforge/pmd/cpd/token/TokenFilter.java b/pmd-core/src/main/java/net/sourceforge/pmd/cpd/token/TokenFilter.java new file mode 100644 index 0000000000..965db0cbfd --- /dev/null +++ b/pmd-core/src/main/java/net/sourceforge/pmd/cpd/token/TokenFilter.java @@ -0,0 +1,19 @@ +/** + * BSD-style license; for more info see http://pmd.sourceforge.net/license.html + */ + +package net.sourceforge.pmd.cpd.token; + +import net.sourceforge.pmd.lang.ast.GenericToken; + +/** + * Defines filter to be applied to the token stream during CPD analysis + */ +public interface TokenFilter { + + /** + * Retrieves the next token to pass the filter + * @return The next token to pass the filter, or null if the end of the stream was reached + */ + GenericToken getNextToken(); +} diff --git a/pmd-core/src/main/java/net/sourceforge/pmd/lang/TokenManager.java b/pmd-core/src/main/java/net/sourceforge/pmd/lang/TokenManager.java index 6673d13a68..e0e67c78cc 100644 --- a/pmd-core/src/main/java/net/sourceforge/pmd/lang/TokenManager.java +++ b/pmd-core/src/main/java/net/sourceforge/pmd/lang/TokenManager.java @@ -8,6 +8,7 @@ package net.sourceforge.pmd.lang; * Common interface for interacting with parser Token Managers. */ public interface TokenManager { + // TODO : Change the return to GenericToken in 7.0.0 - maybe even use generics TokenManager Object getNextToken(); void setFileName(String fileName); diff --git a/pmd-java/src/main/java/net/sourceforge/pmd/cpd/JavaTokenizer.java b/pmd-java/src/main/java/net/sourceforge/pmd/cpd/JavaTokenizer.java index 505c150996..4ed923a31a 100644 --- a/pmd-java/src/main/java/net/sourceforge/pmd/cpd/JavaTokenizer.java +++ b/pmd-java/src/main/java/net/sourceforge/pmd/cpd/JavaTokenizer.java @@ -9,9 +9,11 @@ import java.util.Deque; import java.util.LinkedList; import java.util.Properties; +import net.sourceforge.pmd.cpd.token.JavaCCTokenFilter; import net.sourceforge.pmd.lang.LanguageRegistry; import net.sourceforge.pmd.lang.LanguageVersionHandler; import net.sourceforge.pmd.lang.TokenManager; +import net.sourceforge.pmd.lang.ast.GenericToken; import net.sourceforge.pmd.lang.java.JavaLanguageModule; import net.sourceforge.pmd.lang.java.ast.JavaParserConstants; import net.sourceforge.pmd.lang.java.ast.Token; @@ -31,34 +33,30 @@ public class JavaTokenizer implements Tokenizer { ignoreIdentifiers = Boolean.parseBoolean(properties.getProperty(IGNORE_IDENTIFIERS, "false")); } + @Override public void tokenize(SourceCode sourceCode, Tokens tokenEntries) { - StringBuilder stringBuilder = sourceCode.getCodeBuffer(); - - // Note that Java version is irrelevant for tokenizing - LanguageVersionHandler languageVersionHandler = LanguageRegistry.getLanguage(JavaLanguageModule.NAME) - .getVersion("1.4").getLanguageVersionHandler(); - String fileName = sourceCode.getFileName(); - TokenManager tokenMgr = languageVersionHandler.getParser(languageVersionHandler.getDefaultParserOptions()) - .getTokenManager(fileName, new StringReader(stringBuilder.toString())); - Token currentToken = (Token) tokenMgr.getNextToken(); - - TokenDiscarder discarder = new TokenDiscarder(ignoreAnnotations); - ConstructorDetector constructorDetector = new ConstructorDetector(ignoreIdentifiers); - - while (currentToken.image.length() > 0) { - discarder.updateState(currentToken); - - if (discarder.isDiscarding()) { - currentToken = (Token) tokenMgr.getNextToken(); - continue; - } + final String fileName = sourceCode.getFileName(); + final JavaTokenFilter tokenFilter = createTokenFilter(sourceCode); + final ConstructorDetector constructorDetector = new ConstructorDetector(ignoreIdentifiers); + Token currentToken = (Token) tokenFilter.getNextToken(); + while (currentToken != null) { processToken(tokenEntries, fileName, currentToken, constructorDetector); - currentToken = (Token) tokenMgr.getNextToken(); + currentToken = (Token) tokenFilter.getNextToken(); } tokenEntries.add(TokenEntry.getEOF()); } + private JavaTokenFilter createTokenFilter(final SourceCode sourceCode) { + final StringBuilder stringBuilder = sourceCode.getCodeBuffer(); + // Note that Java version is irrelevant for tokenizing + final LanguageVersionHandler languageVersionHandler = LanguageRegistry.getLanguage(JavaLanguageModule.NAME) + .getVersion("1.4").getLanguageVersionHandler(); + final TokenManager tokenMgr = languageVersionHandler.getParser(languageVersionHandler.getDefaultParserOptions()) + .getTokenManager(sourceCode.getFileName(), new StringReader(stringBuilder.toString())); + return new JavaTokenFilter(tokenMgr, ignoreAnnotations); + } + private void processToken(Tokens tokenEntries, String fileName, Token currentToken, ConstructorDetector constructorDetector) { String image = currentToken.image; @@ -93,15 +91,14 @@ public class JavaTokenizer implements Tokenizer { } /** - * The {@link TokenDiscarder} consumes token by token and maintains state. - * It can detect, whether the current token belongs to an annotation and - * whether the current token should be discarded by CPD. + * The {@link JavaTokenFilter} extends the {@link JavaCCTokenFilter} to discard + * Java-specific tokens. *

* By default, it discards semicolons, package and import statements, and - * enables CPD suppression. Optionally, all annotations can be ignored, too. + * enables annotation-based CPD suppression. Optionally, all annotations can be ignored, too. *

*/ - private static class TokenDiscarder { + private static class JavaTokenFilter extends JavaCCTokenFilter { private boolean isAnnotation = false; private boolean nextTokenEndsAnnotation = false; private int annotationStack = 0; @@ -112,22 +109,24 @@ public class JavaTokenizer implements Tokenizer { private boolean discardingAnnotations = false; private boolean ignoreAnnotations = false; - TokenDiscarder(boolean ignoreAnnotations) { + JavaTokenFilter(final TokenManager tokenManager, final boolean ignoreAnnotations) { + super(tokenManager); this.ignoreAnnotations = ignoreAnnotations; } - public void updateState(Token currentToken) { - detectAnnotations(currentToken); + @Override + protected void analyzeToken(final GenericToken currentToken) { + detectAnnotations((Token) currentToken); - skipSemicolon(currentToken); - skipPackageAndImport(currentToken); - skipCPDSuppression(currentToken); + skipSemicolon((Token) currentToken); + skipPackageAndImport((Token) currentToken); + skipAnnotationSuppression((Token) currentToken); if (ignoreAnnotations) { skipAnnotations(); } } - private void skipPackageAndImport(Token currentToken) { + private void skipPackageAndImport(final Token currentToken) { if (currentToken.kind == JavaParserConstants.PACKAGE || currentToken.kind == JavaParserConstants.IMPORT) { discardingKeywords = true; } else if (discardingKeywords && currentToken.kind == JavaParserConstants.SEMICOLON) { @@ -135,7 +134,7 @@ public class JavaTokenizer implements Tokenizer { } } - private void skipSemicolon(Token currentToken) { + private void skipSemicolon(final Token currentToken) { if (currentToken.kind == JavaParserConstants.SEMICOLON) { discardingSemicolon = true; } else if (discardingSemicolon && currentToken.kind != JavaParserConstants.SEMICOLON) { @@ -143,21 +142,7 @@ public class JavaTokenizer implements Tokenizer { } } - private void skipCPDSuppression(Token currentToken) { - // Check if a comment is altering the suppression state - Token st = currentToken.specialToken; - while (st != null) { - if (st.image.contains("CPD-OFF")) { - discardingSuppressing = true; - break; - } - if (st.image.contains("CPD-ON")) { - discardingSuppressing = false; - break; - } - st = st.specialToken; - } - + private void skipAnnotationSuppression(final Token currentToken) { // if processing an annotation, look for a CPD-START or CPD-END if (isAnnotation) { if (!discardingSuppressing && currentToken.kind == JavaParserConstants.STRING_LITERAL @@ -178,7 +163,8 @@ public class JavaTokenizer implements Tokenizer { } } - public boolean isDiscarding() { + @Override + protected boolean isLanguageSpecificDiscarding() { return discardingSemicolon || discardingKeywords || discardingAnnotations || discardingSuppressing; }