Adjusted Kotlin tokenizer to skip over package and import statements.

This prevents CPD from finding duplications in package and import statements.
2018-11-01 11:00:37 +01:00
parent ff55a230d5
commit dc5b18b512
1 changed files with 34 additions and 12 deletions
--- a/pmd-kotlin/src/main/java/net/sourceforge/pmd/cpd/KotlinTokenizer.java
+++ b/pmd-kotlin/src/main/java/net/sourceforge/pmd/cpd/KotlinTokenizer.java
@ -4,7 +4,6 @@
 package net.sourceforge.pmd.cpd;
 import net.sourceforge.pmd.lang.kotlin.antlr4.Kotlin;
 import org.antlr.v4.runtime.ANTLRInputStream;
 import org.antlr.v4.runtime.BaseErrorListener;
 import org.antlr.v4.runtime.Lexer;
@ -14,33 +13,37 @@ import org.antlr.v4.runtime.Token;
 import net.sourceforge.pmd.lang.ast.TokenMgrError;
 //import net.sourceforge.pmd.lang.kotlin.antlr4.KotlinLexer;
 import net.sourceforge.pmd.lang.kotlin.antlr4.Kotlin;
 /**
 * The Kotlin Tokenizer
 */
 public class KotlinTokenizer implements Tokenizer {
    private boolean discardingPackageAndImport = false;
    @Override
-    public void tokenize(SourceCode sourceCode, Tokens tokenEntries) {
+    public void tokenize(final SourceCode sourceCode, final Tokens tokenEntries) {
-        StringBuilder buffer = sourceCode.getCodeBuffer();
+        final StringBuilder buffer = sourceCode.getCodeBuffer();
        try {
-            ANTLRInputStream ais = new ANTLRInputStream(buffer.toString());
+            final ANTLRInputStream ais = new ANTLRInputStream(buffer.toString());
-            Kotlin lexer = new Kotlin(ais);
+            final Kotlin lexer = new Kotlin(ais);
            lexer.removeErrorListeners();
            lexer.addErrorListener(new ErrorHandler());
            Token token = lexer.nextToken();
            while (token.getType() != Token.EOF) {
-                if (token.getChannel() != Lexer.HIDDEN) {
+                analyzeTokenStart(token);
-                    TokenEntry tokenEntry = new TokenEntry(token.getText(), sourceCode.getFileName(), token.getLine());
+                if (token.getChannel() != Lexer.HIDDEN && token.getType() != Kotlin.NL && !isDiscarding()) {
-
+                    final TokenEntry tokenEntry = new TokenEntry(token.getText(), sourceCode.getFileName(), token.getLine());
                    tokenEntries.add(tokenEntry);
                }
                analyzeTokenEnd(token);
                token = lexer.nextToken();
            }
-        } catch (ANTLRSyntaxError err) {
+        } catch (final ANTLRSyntaxError err) {
            // Wrap exceptions of the Kotlin tokenizer in a TokenMgrError, so
            // they are correctly handled
            // when CPD is executed with the '--skipLexicalErrors' command line
@ -53,10 +56,29 @@ public class KotlinTokenizer implements Tokenizer {
        }
    }
    private boolean isDiscarding() {
        return discardingPackageAndImport;
    }
    private void analyzeTokenStart(final Token currentToken) {
        final int type = currentToken.getType();
        if (type == Kotlin.PACKAGE || type == Kotlin.IMPORT) {
            discardingPackageAndImport = true;
        }
    }
    private void analyzeTokenEnd(final Token currentToken) {
        final int type = currentToken.getType();
        if (discardingPackageAndImport && (type == Kotlin.SEMICOLON || type == Kotlin.NL)) {
            discardingPackageAndImport = false;
        }
    }
    private static class ErrorHandler extends BaseErrorListener {
        @Override
-        public void syntaxError(Recognizer<?, ?> recognizer, Object offendingSymbol, int line, int charPositionInLine,
+        public void syntaxError(final Recognizer<?, ?> recognizer, final Object offendingSymbol, final int line, final int charPositionInLine,
-                String msg, RecognitionException ex) {
+                final String msg, final RecognitionException ex) {
            throw new ANTLRSyntaxError(msg, line, charPositionInLine, ex);
        }
    }
@ -66,7 +88,7 @@ public class KotlinTokenizer implements Tokenizer {
        private final int line;
        private final int column;
-        ANTLRSyntaxError(String msg, int line, int column, RecognitionException cause) {
+        ANTLRSyntaxError(final String msg, final int line, final int column, final RecognitionException cause) {
            super(msg, cause);
            this.line = line;
            this.column = column;