abstraction for cpd with antlr

2018-07-21 20:10:24 -03:00 · 2018-07-21 20:10:24 -03:00 · 32b7cba79a
commit 32b7cba79a
parent 0f9bc63bfc
5 changed files with 103 additions and 76 deletions
--- a/pmd-core/pom.xml
+++ b/pmd-core/pom.xml
@ -96,7 +96,10 @@
            <artifactId>ant</artifactId>
            <scope>provided</scope>
        </dependency>
-
+        <dependency>
+            <groupId>org.antlr</groupId>
+            <artifactId>antlr4-runtime</artifactId>
+        </dependency>
        <dependency>
          <groupId>com.beust</groupId>
          <artifactId>jcommander</artifactId>
--- a/pmd-core/src/main/java/net/sourceforge/pmd/RuleSet.java
+++ b/pmd-core/src/main/java/net/sourceforge/pmd/RuleSet.java
@ -58,12 +58,9 @@ public class RuleSet implements ChecksumAware {

    /**
     * Creates a new RuleSet with the given checksum.
-     * 
-     * @param checksum
-     *            A checksum of the ruleset, should change only if the ruleset
-     *            was configured differently
-     * @param rules
-     *            The rules to be applied as part of this ruleset
+     *
+     * @param builder
+     *            A rule set builder.
     */
    private RuleSet(final RuleSetBuilder builder) {
        checksum = builder.checksum;
--- a/pmd-core/src/main/java/net/sourceforge/pmd/cpd/AntlrTokenizer.java
+++ b/pmd-core/src/main/java/net/sourceforge/pmd/cpd/AntlrTokenizer.java
@ -0,0 +1,91 @@
+/**
+ * BSD-style license; for more info see http://pmd.sourceforge.net/license.html
+ */
+
+package net.sourceforge.pmd.cpd;
+
+import org.antlr.v4.runtime.BaseErrorListener;
+import org.antlr.v4.runtime.CharStream;
+import org.antlr.v4.runtime.CharStreams;
+import org.antlr.v4.runtime.Lexer;
+import org.antlr.v4.runtime.RecognitionException;
+import org.antlr.v4.runtime.Recognizer;
+import org.antlr.v4.runtime.Token;
+
+import net.sourceforge.pmd.lang.ast.TokenMgrError;
+
+public abstract class AntlrTokenizer implements Tokenizer {
+
+    private final Lexer lexer;
+
+    /**
+     * Constructor.
+     *
+     * @param lexer lexer.
+     */
+    public AntlrTokenizer(Lexer lexer) {
+        this.lexer = lexer;
+    }
+
+    @Override
+    public void tokenize(final SourceCode sourceCode, final Tokens tokenEntries) {
+        StringBuilder buffer = sourceCode.getCodeBuffer();
+
+        try {
+            CharStream charStream = CharStreams.fromString(buffer.toString());
+            lexer.reset();
+            lexer.setInputStream(charStream);
+
+            lexer.removeErrorListeners();
+            lexer.addErrorListener(new ErrorHandler());
+            Token token = lexer.nextToken();
+
+            while (token.getType() != Token.EOF) {
+                if (token.getChannel() != Lexer.HIDDEN) {
+                    TokenEntry tokenEntry = new TokenEntry(token.getText(), sourceCode.getFileName(), token.getLine());
+
+                    tokenEntries.add(tokenEntry);
+                }
+                token = lexer.nextToken();
+            }
+        } catch (ANTLRSyntaxError err) {
+            // Wrap exceptions of the ANTLR tokenizer in a TokenMgrError, so
+            // they are correctly handled
+            // when CPD is executed with the '--skipLexicalErrors' command line
+            // option
+            throw new TokenMgrError("Lexical error in file " + sourceCode.getFileName() + " at line " + err.getLine()
+                    + ", column " + err.getColumn() + ".  Encountered: " + err.getMessage(),
+                    TokenMgrError.LEXICAL_ERROR);
+        } finally {
+            tokenEntries.add(TokenEntry.getEOF());
+        }
+    }
+
+    private static class ErrorHandler extends BaseErrorListener {
+        @Override
+        public void syntaxError(Recognizer<?, ?> recognizer, Object offendingSymbol, int line, int charPositionInLine,
+                                String msg, RecognitionException ex) {
+            throw new ANTLRSyntaxError(msg, line, charPositionInLine, ex);
+        }
+    }
+
+    private static class ANTLRSyntaxError extends RuntimeException {
+        private static final long serialVersionUID = 1L;
+        private final int line;
+        private final int column;
+
+        ANTLRSyntaxError(String msg, int line, int column, RecognitionException cause) {
+            super(msg, cause);
+            this.line = line;
+            this.column = column;
+        }
+
+        public int getLine() {
+            return line;
+        }
+
+        public int getColumn() {
+            return column;
+        }
+    }
+}
--- a/pmd-swift/src/main/java/net/sourceforge/pmd/cpd/SwiftTokenizer.java
+++ b/pmd-swift/src/main/java/net/sourceforge/pmd/cpd/SwiftTokenizer.java
@ -4,79 +4,14 @@

 package net.sourceforge.pmd.cpd;

-import org.antlr.v4.runtime.ANTLRInputStream;
-import org.antlr.v4.runtime.BaseErrorListener;
-import org.antlr.v4.runtime.Lexer;
-import org.antlr.v4.runtime.RecognitionException;
-import org.antlr.v4.runtime.Recognizer;
-import org.antlr.v4.runtime.Token;
-
-import net.sourceforge.pmd.lang.ast.TokenMgrError;
 import net.sourceforge.pmd.lang.swift.antlr4.SwiftLexer;

 /**
- * The Swift Tokenizer
+ * SwiftTokenizer
 */
-public class SwiftTokenizer implements Tokenizer {

-    @Override
-    public void tokenize(SourceCode sourceCode, Tokens tokenEntries) {
-        StringBuilder buffer = sourceCode.getCodeBuffer();
-
-        try {
-            ANTLRInputStream ais = new ANTLRInputStream(buffer.toString());
-            SwiftLexer lexer = new SwiftLexer(ais);
-
-            lexer.removeErrorListeners();
-            lexer.addErrorListener(new ErrorHandler());
-            Token token = lexer.nextToken();
-
-            while (token.getType() != Token.EOF) {
-                if (token.getChannel() != Lexer.HIDDEN) {
-                    TokenEntry tokenEntry = new TokenEntry(token.getText(), sourceCode.getFileName(), token.getLine());
-
-                    tokenEntries.add(tokenEntry);
-                }
-                token = lexer.nextToken();
-            }
-        } catch (ANTLRSyntaxError err) {
-            // Wrap exceptions of the Swift tokenizer in a TokenMgrError, so
-            // they are correctly handled
-            // when CPD is executed with the '--skipLexicalErrors' command line
-            // option
-            throw new TokenMgrError("Lexical error in file " + sourceCode.getFileName() + " at line " + err.getLine()
-                    + ", column " + err.getColumn() + ".  Encountered: " + err.getMessage(),
-                    TokenMgrError.LEXICAL_ERROR);
-        } finally {
-            tokenEntries.add(TokenEntry.getEOF());
-        }
-    }
-
-    private static class ErrorHandler extends BaseErrorListener {
-        @Override
-        public void syntaxError(Recognizer<?, ?> recognizer, Object offendingSymbol, int line, int charPositionInLine,
-                String msg, RecognitionException ex) {
-            throw new ANTLRSyntaxError(msg, line, charPositionInLine, ex);
-        }
-    }
-
-    private static class ANTLRSyntaxError extends RuntimeException {
-        private static final long serialVersionUID = 1L;
-        private final int line;
-        private final int column;
-
-        ANTLRSyntaxError(String msg, int line, int column, RecognitionException cause) {
-            super(msg, cause);
-            this.line = line;
-            this.column = column;
-        }
-
-        public int getLine() {
-            return line;
-        }
-
-        public int getColumn() {
-            return column;
-        }
+public class SwiftTokenizer extends AntlrTokenizer {
+    public SwiftTokenizer() {
+        super(new SwiftLexer(null));
    }
 }
--- a/pom.xml
+++ b/pom.xml
@ -286,6 +286,7 @@ Additionally it includes CPD, the copy-paste-detector. CPD finds duplicated code
                    <version>${antlr.version}</version>
                    <configuration>
                        <inputEncoding>${project.build.sourceEncoding}</inputEncoding>
+                        <visitor>true</visitor>
                    </configuration>
                    <executions>
                        <execution>