[core] Refactor CPD token filtering

- Define a generic `TokenFilter` interface in pmd-core
 - Provide a base, extension-friendly `JavaCCTokenFilter` to process and
filter JavaCC token streams, honoring `CPD-OFF` and `CPD-ON` comments
 - Refactor the `JavaTokenizer` to use `JavaCCTokenFilter` by extending
it and adding custom Java-specific token filters
This commit is contained in:
Juan Martín Sotuyo Dodero
2018-04-11 01:49:40 -03:00
parent 379eaed2c8
commit d6bea21ed6
4 changed files with 141 additions and 50 deletions

View File

@ -0,0 +1,85 @@
/**
* BSD-style license; for more info see http://pmd.sourceforge.net/license.html
*/
package net.sourceforge.pmd.cpd.token;
import net.sourceforge.pmd.lang.TokenManager;
import net.sourceforge.pmd.lang.ast.GenericToken;
/**
* A generic filter for JavaCC-based token managers that allows to use comments
* to enable / disable analysis of parts of the stream
*/
public class JavaCCTokenFilter implements TokenFilter {
private final TokenManager tokenManager;
private boolean discardingSuppressing;
/**
* Creates a new JavaCCTokenFilter
* @param tokenManager The token manager from which to retrieve tokens to be filtered
*/
public JavaCCTokenFilter(final TokenManager tokenManager) {
this.tokenManager = tokenManager;
}
@Override
public final GenericToken getNextToken() {
GenericToken currentToken = (GenericToken) tokenManager.getNextToken();
while (!currentToken.getImage().isEmpty()) {
analyzeToken(currentToken);
processCPDSuppression(currentToken);
if (isDiscarding()) {
currentToken = (GenericToken) tokenManager.getNextToken();
continue;
}
return currentToken;
}
return null;
}
private boolean isDiscarding() {
return discardingSuppressing || isLanguageSpecificDiscarding();
}
private void processCPDSuppression(final GenericToken currentToken) {
// Check if a comment is altering the suppression state
GenericToken comment = currentToken.getPreviousComment();
while (comment != null) {
if (comment.getImage().contains("CPD-OFF")) {
discardingSuppressing = true;
break;
}
if (comment.getImage().contains("CPD-ON")) {
discardingSuppressing = false;
break;
}
comment = comment.getPreviousComment();
}
}
/**
* Extension point for subclasses to indicate tokens are to be filtered.
*
* @return True if tokens should be filtered, false otherwise
*/
protected boolean isLanguageSpecificDiscarding() {
return false;
}
/**
* Extension point for subclasses to analyze all tokens (before filtering)
* and update internal status to decide on custom discard rules.
*
* @param currentToken The token to be analyzed
* @see #isLanguageSpecificDiscarding()
*/
protected void analyzeToken(final GenericToken currentToken) {
// noop
}
}

View File

@ -0,0 +1,19 @@
/**
* BSD-style license; for more info see http://pmd.sourceforge.net/license.html
*/
package net.sourceforge.pmd.cpd.token;
import net.sourceforge.pmd.lang.ast.GenericToken;
/**
* Defines filter to be applied to the token stream during CPD analysis
*/
public interface TokenFilter {
/**
* Retrieves the next token to pass the filter
* @return The next token to pass the filter, or null if the end of the stream was reached
*/
GenericToken getNextToken();
}

View File

@ -8,6 +8,7 @@ package net.sourceforge.pmd.lang;
* Common interface for interacting with parser Token Managers.
*/
public interface TokenManager {
// TODO : Change the return to GenericToken in 7.0.0 - maybe even use generics TokenManager<T extends GenericToken>
Object getNextToken();
void setFileName(String fileName);

View File

@ -9,9 +9,11 @@ import java.util.Deque;
import java.util.LinkedList;
import java.util.Properties;
import net.sourceforge.pmd.cpd.token.JavaCCTokenFilter;
import net.sourceforge.pmd.lang.LanguageRegistry;
import net.sourceforge.pmd.lang.LanguageVersionHandler;
import net.sourceforge.pmd.lang.TokenManager;
import net.sourceforge.pmd.lang.ast.GenericToken;
import net.sourceforge.pmd.lang.java.JavaLanguageModule;
import net.sourceforge.pmd.lang.java.ast.JavaParserConstants;
import net.sourceforge.pmd.lang.java.ast.Token;
@ -31,34 +33,30 @@ public class JavaTokenizer implements Tokenizer {
ignoreIdentifiers = Boolean.parseBoolean(properties.getProperty(IGNORE_IDENTIFIERS, "false"));
}
@Override
public void tokenize(SourceCode sourceCode, Tokens tokenEntries) {
StringBuilder stringBuilder = sourceCode.getCodeBuffer();
// Note that Java version is irrelevant for tokenizing
LanguageVersionHandler languageVersionHandler = LanguageRegistry.getLanguage(JavaLanguageModule.NAME)
.getVersion("1.4").getLanguageVersionHandler();
String fileName = sourceCode.getFileName();
TokenManager tokenMgr = languageVersionHandler.getParser(languageVersionHandler.getDefaultParserOptions())
.getTokenManager(fileName, new StringReader(stringBuilder.toString()));
Token currentToken = (Token) tokenMgr.getNextToken();
TokenDiscarder discarder = new TokenDiscarder(ignoreAnnotations);
ConstructorDetector constructorDetector = new ConstructorDetector(ignoreIdentifiers);
while (currentToken.image.length() > 0) {
discarder.updateState(currentToken);
if (discarder.isDiscarding()) {
currentToken = (Token) tokenMgr.getNextToken();
continue;
}
final String fileName = sourceCode.getFileName();
final JavaTokenFilter tokenFilter = createTokenFilter(sourceCode);
final ConstructorDetector constructorDetector = new ConstructorDetector(ignoreIdentifiers);
Token currentToken = (Token) tokenFilter.getNextToken();
while (currentToken != null) {
processToken(tokenEntries, fileName, currentToken, constructorDetector);
currentToken = (Token) tokenMgr.getNextToken();
currentToken = (Token) tokenFilter.getNextToken();
}
tokenEntries.add(TokenEntry.getEOF());
}
private JavaTokenFilter createTokenFilter(final SourceCode sourceCode) {
final StringBuilder stringBuilder = sourceCode.getCodeBuffer();
// Note that Java version is irrelevant for tokenizing
final LanguageVersionHandler languageVersionHandler = LanguageRegistry.getLanguage(JavaLanguageModule.NAME)
.getVersion("1.4").getLanguageVersionHandler();
final TokenManager tokenMgr = languageVersionHandler.getParser(languageVersionHandler.getDefaultParserOptions())
.getTokenManager(sourceCode.getFileName(), new StringReader(stringBuilder.toString()));
return new JavaTokenFilter(tokenMgr, ignoreAnnotations);
}
private void processToken(Tokens tokenEntries, String fileName, Token currentToken,
ConstructorDetector constructorDetector) {
String image = currentToken.image;
@ -93,15 +91,14 @@ public class JavaTokenizer implements Tokenizer {
}
/**
* The {@link TokenDiscarder} consumes token by token and maintains state.
* It can detect, whether the current token belongs to an annotation and
* whether the current token should be discarded by CPD.
* The {@link JavaTokenFilter} extends the {@link JavaCCTokenFilter} to discard
* Java-specific tokens.
* <p>
* By default, it discards semicolons, package and import statements, and
* enables CPD suppression. Optionally, all annotations can be ignored, too.
* enables annotation-based CPD suppression. Optionally, all annotations can be ignored, too.
* </p>
*/
private static class TokenDiscarder {
private static class JavaTokenFilter extends JavaCCTokenFilter {
private boolean isAnnotation = false;
private boolean nextTokenEndsAnnotation = false;
private int annotationStack = 0;
@ -112,22 +109,24 @@ public class JavaTokenizer implements Tokenizer {
private boolean discardingAnnotations = false;
private boolean ignoreAnnotations = false;
TokenDiscarder(boolean ignoreAnnotations) {
JavaTokenFilter(final TokenManager tokenManager, final boolean ignoreAnnotations) {
super(tokenManager);
this.ignoreAnnotations = ignoreAnnotations;
}
public void updateState(Token currentToken) {
detectAnnotations(currentToken);
@Override
protected void analyzeToken(final GenericToken currentToken) {
detectAnnotations((Token) currentToken);
skipSemicolon(currentToken);
skipPackageAndImport(currentToken);
skipCPDSuppression(currentToken);
skipSemicolon((Token) currentToken);
skipPackageAndImport((Token) currentToken);
skipAnnotationSuppression((Token) currentToken);
if (ignoreAnnotations) {
skipAnnotations();
}
}
private void skipPackageAndImport(Token currentToken) {
private void skipPackageAndImport(final Token currentToken) {
if (currentToken.kind == JavaParserConstants.PACKAGE || currentToken.kind == JavaParserConstants.IMPORT) {
discardingKeywords = true;
} else if (discardingKeywords && currentToken.kind == JavaParserConstants.SEMICOLON) {
@ -135,7 +134,7 @@ public class JavaTokenizer implements Tokenizer {
}
}
private void skipSemicolon(Token currentToken) {
private void skipSemicolon(final Token currentToken) {
if (currentToken.kind == JavaParserConstants.SEMICOLON) {
discardingSemicolon = true;
} else if (discardingSemicolon && currentToken.kind != JavaParserConstants.SEMICOLON) {
@ -143,21 +142,7 @@ public class JavaTokenizer implements Tokenizer {
}
}
private void skipCPDSuppression(Token currentToken) {
// Check if a comment is altering the suppression state
Token st = currentToken.specialToken;
while (st != null) {
if (st.image.contains("CPD-OFF")) {
discardingSuppressing = true;
break;
}
if (st.image.contains("CPD-ON")) {
discardingSuppressing = false;
break;
}
st = st.specialToken;
}
private void skipAnnotationSuppression(final Token currentToken) {
// if processing an annotation, look for a CPD-START or CPD-END
if (isAnnotation) {
if (!discardingSuppressing && currentToken.kind == JavaParserConstants.STRING_LITERAL
@ -178,7 +163,8 @@ public class JavaTokenizer implements Tokenizer {
}
}
public boolean isDiscarding() {
@Override
protected boolean isLanguageSpecificDiscarding() {
return discardingSemicolon || discardingKeywords || discardingAnnotations
|| discardingSuppressing;
}