Adjusted Kotlin tokenizer to skip over package and import statements.

This prevents CPD from finding duplications in package and import statements.
This commit is contained in:
Dennie Reniers
2018-11-01 11:00:37 +01:00
committed by Maikel Steneker
parent ff55a230d5
commit dc5b18b512

View File

@ -4,7 +4,6 @@
package net.sourceforge.pmd.cpd; package net.sourceforge.pmd.cpd;
import net.sourceforge.pmd.lang.kotlin.antlr4.Kotlin;
import org.antlr.v4.runtime.ANTLRInputStream; import org.antlr.v4.runtime.ANTLRInputStream;
import org.antlr.v4.runtime.BaseErrorListener; import org.antlr.v4.runtime.BaseErrorListener;
import org.antlr.v4.runtime.Lexer; import org.antlr.v4.runtime.Lexer;
@ -14,33 +13,37 @@ import org.antlr.v4.runtime.Token;
import net.sourceforge.pmd.lang.ast.TokenMgrError; import net.sourceforge.pmd.lang.ast.TokenMgrError;
//import net.sourceforge.pmd.lang.kotlin.antlr4.KotlinLexer; //import net.sourceforge.pmd.lang.kotlin.antlr4.KotlinLexer;
import net.sourceforge.pmd.lang.kotlin.antlr4.Kotlin;
/** /**
* The Kotlin Tokenizer * The Kotlin Tokenizer
*/ */
public class KotlinTokenizer implements Tokenizer { public class KotlinTokenizer implements Tokenizer {
private boolean discardingPackageAndImport = false;
@Override @Override
public void tokenize(SourceCode sourceCode, Tokens tokenEntries) { public void tokenize(final SourceCode sourceCode, final Tokens tokenEntries) {
StringBuilder buffer = sourceCode.getCodeBuffer(); final StringBuilder buffer = sourceCode.getCodeBuffer();
try { try {
ANTLRInputStream ais = new ANTLRInputStream(buffer.toString()); final ANTLRInputStream ais = new ANTLRInputStream(buffer.toString());
Kotlin lexer = new Kotlin(ais); final Kotlin lexer = new Kotlin(ais);
lexer.removeErrorListeners(); lexer.removeErrorListeners();
lexer.addErrorListener(new ErrorHandler()); lexer.addErrorListener(new ErrorHandler());
Token token = lexer.nextToken(); Token token = lexer.nextToken();
while (token.getType() != Token.EOF) { while (token.getType() != Token.EOF) {
if (token.getChannel() != Lexer.HIDDEN) { analyzeTokenStart(token);
TokenEntry tokenEntry = new TokenEntry(token.getText(), sourceCode.getFileName(), token.getLine()); if (token.getChannel() != Lexer.HIDDEN && token.getType() != Kotlin.NL && !isDiscarding()) {
final TokenEntry tokenEntry = new TokenEntry(token.getText(), sourceCode.getFileName(), token.getLine());
tokenEntries.add(tokenEntry); tokenEntries.add(tokenEntry);
} }
analyzeTokenEnd(token);
token = lexer.nextToken(); token = lexer.nextToken();
} }
} catch (ANTLRSyntaxError err) { } catch (final ANTLRSyntaxError err) {
// Wrap exceptions of the Kotlin tokenizer in a TokenMgrError, so // Wrap exceptions of the Kotlin tokenizer in a TokenMgrError, so
// they are correctly handled // they are correctly handled
// when CPD is executed with the '--skipLexicalErrors' command line // when CPD is executed with the '--skipLexicalErrors' command line
@ -53,10 +56,29 @@ public class KotlinTokenizer implements Tokenizer {
} }
} }
private boolean isDiscarding() {
return discardingPackageAndImport;
}
private void analyzeTokenStart(final Token currentToken) {
final int type = currentToken.getType();
if (type == Kotlin.PACKAGE || type == Kotlin.IMPORT) {
discardingPackageAndImport = true;
}
}
private void analyzeTokenEnd(final Token currentToken) {
final int type = currentToken.getType();
if (discardingPackageAndImport && (type == Kotlin.SEMICOLON || type == Kotlin.NL)) {
discardingPackageAndImport = false;
}
}
private static class ErrorHandler extends BaseErrorListener { private static class ErrorHandler extends BaseErrorListener {
@Override @Override
public void syntaxError(Recognizer<?, ?> recognizer, Object offendingSymbol, int line, int charPositionInLine, public void syntaxError(final Recognizer<?, ?> recognizer, final Object offendingSymbol, final int line, final int charPositionInLine,
String msg, RecognitionException ex) { final String msg, final RecognitionException ex) {
throw new ANTLRSyntaxError(msg, line, charPositionInLine, ex); throw new ANTLRSyntaxError(msg, line, charPositionInLine, ex);
} }
} }
@ -66,7 +88,7 @@ public class KotlinTokenizer implements Tokenizer {
private final int line; private final int line;
private final int column; private final int column;
ANTLRSyntaxError(String msg, int line, int column, RecognitionException cause) { ANTLRSyntaxError(final String msg, final int line, final int column, final RecognitionException cause) {
super(msg, cause); super(msg, cause);
this.line = line; this.line = line;
this.column = column; this.column = column;