[core] Refactor CPD token filtering
- Define a generic `TokenFilter` interface in pmd-core - Provide a base, extension-friendly `JavaCCTokenFilter` to process and filter JavaCC token streams, honoring `CPD-OFF` and `CPD-ON` comments - Refactor the `JavaTokenizer` to use `JavaCCTokenFilter` by extending it and adding custom Java-specific token filters
This commit is contained in:
@ -0,0 +1,85 @@
|
|||||||
|
/**
|
||||||
|
* BSD-style license; for more info see http://pmd.sourceforge.net/license.html
|
||||||
|
*/
|
||||||
|
|
||||||
|
package net.sourceforge.pmd.cpd.token;
|
||||||
|
|
||||||
|
import net.sourceforge.pmd.lang.TokenManager;
|
||||||
|
import net.sourceforge.pmd.lang.ast.GenericToken;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A generic filter for JavaCC-based token managers that allows to use comments
|
||||||
|
* to enable / disable analysis of parts of the stream
|
||||||
|
*/
|
||||||
|
public class JavaCCTokenFilter implements TokenFilter {
|
||||||
|
|
||||||
|
private final TokenManager tokenManager;
|
||||||
|
private boolean discardingSuppressing;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Creates a new JavaCCTokenFilter
|
||||||
|
* @param tokenManager The token manager from which to retrieve tokens to be filtered
|
||||||
|
*/
|
||||||
|
public JavaCCTokenFilter(final TokenManager tokenManager) {
|
||||||
|
this.tokenManager = tokenManager;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public final GenericToken getNextToken() {
|
||||||
|
GenericToken currentToken = (GenericToken) tokenManager.getNextToken();
|
||||||
|
while (!currentToken.getImage().isEmpty()) {
|
||||||
|
analyzeToken(currentToken);
|
||||||
|
processCPDSuppression(currentToken);
|
||||||
|
|
||||||
|
if (isDiscarding()) {
|
||||||
|
currentToken = (GenericToken) tokenManager.getNextToken();
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
return currentToken;
|
||||||
|
}
|
||||||
|
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
private boolean isDiscarding() {
|
||||||
|
return discardingSuppressing || isLanguageSpecificDiscarding();
|
||||||
|
}
|
||||||
|
|
||||||
|
private void processCPDSuppression(final GenericToken currentToken) {
|
||||||
|
// Check if a comment is altering the suppression state
|
||||||
|
GenericToken comment = currentToken.getPreviousComment();
|
||||||
|
while (comment != null) {
|
||||||
|
if (comment.getImage().contains("CPD-OFF")) {
|
||||||
|
discardingSuppressing = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if (comment.getImage().contains("CPD-ON")) {
|
||||||
|
discardingSuppressing = false;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
comment = comment.getPreviousComment();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Extension point for subclasses to indicate tokens are to be filtered.
|
||||||
|
*
|
||||||
|
* @return True if tokens should be filtered, false otherwise
|
||||||
|
*/
|
||||||
|
protected boolean isLanguageSpecificDiscarding() {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Extension point for subclasses to analyze all tokens (before filtering)
|
||||||
|
* and update internal status to decide on custom discard rules.
|
||||||
|
*
|
||||||
|
* @param currentToken The token to be analyzed
|
||||||
|
* @see #isLanguageSpecificDiscarding()
|
||||||
|
*/
|
||||||
|
protected void analyzeToken(final GenericToken currentToken) {
|
||||||
|
// noop
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@ -0,0 +1,19 @@
|
|||||||
|
/**
|
||||||
|
* BSD-style license; for more info see http://pmd.sourceforge.net/license.html
|
||||||
|
*/
|
||||||
|
|
||||||
|
package net.sourceforge.pmd.cpd.token;
|
||||||
|
|
||||||
|
import net.sourceforge.pmd.lang.ast.GenericToken;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Defines filter to be applied to the token stream during CPD analysis
|
||||||
|
*/
|
||||||
|
public interface TokenFilter {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Retrieves the next token to pass the filter
|
||||||
|
* @return The next token to pass the filter, or null if the end of the stream was reached
|
||||||
|
*/
|
||||||
|
GenericToken getNextToken();
|
||||||
|
}
|
@ -8,6 +8,7 @@ package net.sourceforge.pmd.lang;
|
|||||||
* Common interface for interacting with parser Token Managers.
|
* Common interface for interacting with parser Token Managers.
|
||||||
*/
|
*/
|
||||||
public interface TokenManager {
|
public interface TokenManager {
|
||||||
|
// TODO : Change the return to GenericToken in 7.0.0 - maybe even use generics TokenManager<T extends GenericToken>
|
||||||
Object getNextToken();
|
Object getNextToken();
|
||||||
|
|
||||||
void setFileName(String fileName);
|
void setFileName(String fileName);
|
||||||
|
@ -9,9 +9,11 @@ import java.util.Deque;
|
|||||||
import java.util.LinkedList;
|
import java.util.LinkedList;
|
||||||
import java.util.Properties;
|
import java.util.Properties;
|
||||||
|
|
||||||
|
import net.sourceforge.pmd.cpd.token.JavaCCTokenFilter;
|
||||||
import net.sourceforge.pmd.lang.LanguageRegistry;
|
import net.sourceforge.pmd.lang.LanguageRegistry;
|
||||||
import net.sourceforge.pmd.lang.LanguageVersionHandler;
|
import net.sourceforge.pmd.lang.LanguageVersionHandler;
|
||||||
import net.sourceforge.pmd.lang.TokenManager;
|
import net.sourceforge.pmd.lang.TokenManager;
|
||||||
|
import net.sourceforge.pmd.lang.ast.GenericToken;
|
||||||
import net.sourceforge.pmd.lang.java.JavaLanguageModule;
|
import net.sourceforge.pmd.lang.java.JavaLanguageModule;
|
||||||
import net.sourceforge.pmd.lang.java.ast.JavaParserConstants;
|
import net.sourceforge.pmd.lang.java.ast.JavaParserConstants;
|
||||||
import net.sourceforge.pmd.lang.java.ast.Token;
|
import net.sourceforge.pmd.lang.java.ast.Token;
|
||||||
@ -31,34 +33,30 @@ public class JavaTokenizer implements Tokenizer {
|
|||||||
ignoreIdentifiers = Boolean.parseBoolean(properties.getProperty(IGNORE_IDENTIFIERS, "false"));
|
ignoreIdentifiers = Boolean.parseBoolean(properties.getProperty(IGNORE_IDENTIFIERS, "false"));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
public void tokenize(SourceCode sourceCode, Tokens tokenEntries) {
|
public void tokenize(SourceCode sourceCode, Tokens tokenEntries) {
|
||||||
StringBuilder stringBuilder = sourceCode.getCodeBuffer();
|
final String fileName = sourceCode.getFileName();
|
||||||
|
final JavaTokenFilter tokenFilter = createTokenFilter(sourceCode);
|
||||||
// Note that Java version is irrelevant for tokenizing
|
final ConstructorDetector constructorDetector = new ConstructorDetector(ignoreIdentifiers);
|
||||||
LanguageVersionHandler languageVersionHandler = LanguageRegistry.getLanguage(JavaLanguageModule.NAME)
|
|
||||||
.getVersion("1.4").getLanguageVersionHandler();
|
|
||||||
String fileName = sourceCode.getFileName();
|
|
||||||
TokenManager tokenMgr = languageVersionHandler.getParser(languageVersionHandler.getDefaultParserOptions())
|
|
||||||
.getTokenManager(fileName, new StringReader(stringBuilder.toString()));
|
|
||||||
Token currentToken = (Token) tokenMgr.getNextToken();
|
|
||||||
|
|
||||||
TokenDiscarder discarder = new TokenDiscarder(ignoreAnnotations);
|
|
||||||
ConstructorDetector constructorDetector = new ConstructorDetector(ignoreIdentifiers);
|
|
||||||
|
|
||||||
while (currentToken.image.length() > 0) {
|
|
||||||
discarder.updateState(currentToken);
|
|
||||||
|
|
||||||
if (discarder.isDiscarding()) {
|
|
||||||
currentToken = (Token) tokenMgr.getNextToken();
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
Token currentToken = (Token) tokenFilter.getNextToken();
|
||||||
|
while (currentToken != null) {
|
||||||
processToken(tokenEntries, fileName, currentToken, constructorDetector);
|
processToken(tokenEntries, fileName, currentToken, constructorDetector);
|
||||||
currentToken = (Token) tokenMgr.getNextToken();
|
currentToken = (Token) tokenFilter.getNextToken();
|
||||||
}
|
}
|
||||||
tokenEntries.add(TokenEntry.getEOF());
|
tokenEntries.add(TokenEntry.getEOF());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private JavaTokenFilter createTokenFilter(final SourceCode sourceCode) {
|
||||||
|
final StringBuilder stringBuilder = sourceCode.getCodeBuffer();
|
||||||
|
// Note that Java version is irrelevant for tokenizing
|
||||||
|
final LanguageVersionHandler languageVersionHandler = LanguageRegistry.getLanguage(JavaLanguageModule.NAME)
|
||||||
|
.getVersion("1.4").getLanguageVersionHandler();
|
||||||
|
final TokenManager tokenMgr = languageVersionHandler.getParser(languageVersionHandler.getDefaultParserOptions())
|
||||||
|
.getTokenManager(sourceCode.getFileName(), new StringReader(stringBuilder.toString()));
|
||||||
|
return new JavaTokenFilter(tokenMgr, ignoreAnnotations);
|
||||||
|
}
|
||||||
|
|
||||||
private void processToken(Tokens tokenEntries, String fileName, Token currentToken,
|
private void processToken(Tokens tokenEntries, String fileName, Token currentToken,
|
||||||
ConstructorDetector constructorDetector) {
|
ConstructorDetector constructorDetector) {
|
||||||
String image = currentToken.image;
|
String image = currentToken.image;
|
||||||
@ -93,15 +91,14 @@ public class JavaTokenizer implements Tokenizer {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The {@link TokenDiscarder} consumes token by token and maintains state.
|
* The {@link JavaTokenFilter} extends the {@link JavaCCTokenFilter} to discard
|
||||||
* It can detect, whether the current token belongs to an annotation and
|
* Java-specific tokens.
|
||||||
* whether the current token should be discarded by CPD.
|
|
||||||
* <p>
|
* <p>
|
||||||
* By default, it discards semicolons, package and import statements, and
|
* By default, it discards semicolons, package and import statements, and
|
||||||
* enables CPD suppression. Optionally, all annotations can be ignored, too.
|
* enables annotation-based CPD suppression. Optionally, all annotations can be ignored, too.
|
||||||
* </p>
|
* </p>
|
||||||
*/
|
*/
|
||||||
private static class TokenDiscarder {
|
private static class JavaTokenFilter extends JavaCCTokenFilter {
|
||||||
private boolean isAnnotation = false;
|
private boolean isAnnotation = false;
|
||||||
private boolean nextTokenEndsAnnotation = false;
|
private boolean nextTokenEndsAnnotation = false;
|
||||||
private int annotationStack = 0;
|
private int annotationStack = 0;
|
||||||
@ -112,22 +109,24 @@ public class JavaTokenizer implements Tokenizer {
|
|||||||
private boolean discardingAnnotations = false;
|
private boolean discardingAnnotations = false;
|
||||||
private boolean ignoreAnnotations = false;
|
private boolean ignoreAnnotations = false;
|
||||||
|
|
||||||
TokenDiscarder(boolean ignoreAnnotations) {
|
JavaTokenFilter(final TokenManager tokenManager, final boolean ignoreAnnotations) {
|
||||||
|
super(tokenManager);
|
||||||
this.ignoreAnnotations = ignoreAnnotations;
|
this.ignoreAnnotations = ignoreAnnotations;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void updateState(Token currentToken) {
|
@Override
|
||||||
detectAnnotations(currentToken);
|
protected void analyzeToken(final GenericToken currentToken) {
|
||||||
|
detectAnnotations((Token) currentToken);
|
||||||
|
|
||||||
skipSemicolon(currentToken);
|
skipSemicolon((Token) currentToken);
|
||||||
skipPackageAndImport(currentToken);
|
skipPackageAndImport((Token) currentToken);
|
||||||
skipCPDSuppression(currentToken);
|
skipAnnotationSuppression((Token) currentToken);
|
||||||
if (ignoreAnnotations) {
|
if (ignoreAnnotations) {
|
||||||
skipAnnotations();
|
skipAnnotations();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private void skipPackageAndImport(Token currentToken) {
|
private void skipPackageAndImport(final Token currentToken) {
|
||||||
if (currentToken.kind == JavaParserConstants.PACKAGE || currentToken.kind == JavaParserConstants.IMPORT) {
|
if (currentToken.kind == JavaParserConstants.PACKAGE || currentToken.kind == JavaParserConstants.IMPORT) {
|
||||||
discardingKeywords = true;
|
discardingKeywords = true;
|
||||||
} else if (discardingKeywords && currentToken.kind == JavaParserConstants.SEMICOLON) {
|
} else if (discardingKeywords && currentToken.kind == JavaParserConstants.SEMICOLON) {
|
||||||
@ -135,7 +134,7 @@ public class JavaTokenizer implements Tokenizer {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private void skipSemicolon(Token currentToken) {
|
private void skipSemicolon(final Token currentToken) {
|
||||||
if (currentToken.kind == JavaParserConstants.SEMICOLON) {
|
if (currentToken.kind == JavaParserConstants.SEMICOLON) {
|
||||||
discardingSemicolon = true;
|
discardingSemicolon = true;
|
||||||
} else if (discardingSemicolon && currentToken.kind != JavaParserConstants.SEMICOLON) {
|
} else if (discardingSemicolon && currentToken.kind != JavaParserConstants.SEMICOLON) {
|
||||||
@ -143,21 +142,7 @@ public class JavaTokenizer implements Tokenizer {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private void skipCPDSuppression(Token currentToken) {
|
private void skipAnnotationSuppression(final Token currentToken) {
|
||||||
// Check if a comment is altering the suppression state
|
|
||||||
Token st = currentToken.specialToken;
|
|
||||||
while (st != null) {
|
|
||||||
if (st.image.contains("CPD-OFF")) {
|
|
||||||
discardingSuppressing = true;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
if (st.image.contains("CPD-ON")) {
|
|
||||||
discardingSuppressing = false;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
st = st.specialToken;
|
|
||||||
}
|
|
||||||
|
|
||||||
// if processing an annotation, look for a CPD-START or CPD-END
|
// if processing an annotation, look for a CPD-START or CPD-END
|
||||||
if (isAnnotation) {
|
if (isAnnotation) {
|
||||||
if (!discardingSuppressing && currentToken.kind == JavaParserConstants.STRING_LITERAL
|
if (!discardingSuppressing && currentToken.kind == JavaParserConstants.STRING_LITERAL
|
||||||
@ -178,7 +163,8 @@ public class JavaTokenizer implements Tokenizer {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public boolean isDiscarding() {
|
@Override
|
||||||
|
protected boolean isLanguageSpecificDiscarding() {
|
||||||
return discardingSemicolon || discardingKeywords || discardingAnnotations
|
return discardingSemicolon || discardingKeywords || discardingAnnotations
|
||||||
|| discardingSuppressing;
|
|| discardingSuppressing;
|
||||||
}
|
}
|
||||||
|
Reference in New Issue
Block a user