diff --git a/pmd-core/pom.xml b/pmd-core/pom.xml
index fd7c96cda4..5140684ed5 100644
--- a/pmd-core/pom.xml
+++ b/pmd-core/pom.xml
@@ -96,7 +96,10 @@
ant
provided
-
+
+ org.antlr
+ antlr4-runtime
+
com.beust
jcommander
diff --git a/pmd-core/src/main/java/net/sourceforge/pmd/RuleSet.java b/pmd-core/src/main/java/net/sourceforge/pmd/RuleSet.java
index bbab5ec797..71d5bc24a5 100644
--- a/pmd-core/src/main/java/net/sourceforge/pmd/RuleSet.java
+++ b/pmd-core/src/main/java/net/sourceforge/pmd/RuleSet.java
@@ -58,12 +58,9 @@ public class RuleSet implements ChecksumAware {
/**
* Creates a new RuleSet with the given checksum.
- *
- * @param checksum
- * A checksum of the ruleset, should change only if the ruleset
- * was configured differently
- * @param rules
- * The rules to be applied as part of this ruleset
+ *
+ * @param builder
+ * A rule set builder.
*/
private RuleSet(final RuleSetBuilder builder) {
checksum = builder.checksum;
diff --git a/pmd-core/src/main/java/net/sourceforge/pmd/cpd/AntlrTokenizer.java b/pmd-core/src/main/java/net/sourceforge/pmd/cpd/AntlrTokenizer.java
new file mode 100644
index 0000000000..86529a9275
--- /dev/null
+++ b/pmd-core/src/main/java/net/sourceforge/pmd/cpd/AntlrTokenizer.java
@@ -0,0 +1,91 @@
+/**
+ * BSD-style license; for more info see http://pmd.sourceforge.net/license.html
+ */
+
+package net.sourceforge.pmd.cpd;
+
+import org.antlr.v4.runtime.BaseErrorListener;
+import org.antlr.v4.runtime.CharStream;
+import org.antlr.v4.runtime.CharStreams;
+import org.antlr.v4.runtime.Lexer;
+import org.antlr.v4.runtime.RecognitionException;
+import org.antlr.v4.runtime.Recognizer;
+import org.antlr.v4.runtime.Token;
+
+import net.sourceforge.pmd.lang.ast.TokenMgrError;
+
+public abstract class AntlrTokenizer implements Tokenizer {
+
+ private final Lexer lexer;
+
+ /**
+ * Constructor.
+ *
+ * @param lexer lexer.
+ */
+ public AntlrTokenizer(Lexer lexer) {
+ this.lexer = lexer;
+ }
+
+ @Override
+ public void tokenize(final SourceCode sourceCode, final Tokens tokenEntries) {
+ StringBuilder buffer = sourceCode.getCodeBuffer();
+
+ try {
+ CharStream charStream = CharStreams.fromString(buffer.toString());
+ lexer.reset();
+ lexer.setInputStream(charStream);
+
+ lexer.removeErrorListeners();
+ lexer.addErrorListener(new ErrorHandler());
+ Token token = lexer.nextToken();
+
+ while (token.getType() != Token.EOF) {
+ if (token.getChannel() != Lexer.HIDDEN) {
+ TokenEntry tokenEntry = new TokenEntry(token.getText(), sourceCode.getFileName(), token.getLine());
+
+ tokenEntries.add(tokenEntry);
+ }
+ token = lexer.nextToken();
+ }
+ } catch (ANTLRSyntaxError err) {
+ // Wrap exceptions of the ANTLR tokenizer in a TokenMgrError, so
+ // they are correctly handled
+ // when CPD is executed with the '--skipLexicalErrors' command line
+ // option
+ throw new TokenMgrError("Lexical error in file " + sourceCode.getFileName() + " at line " + err.getLine()
+ + ", column " + err.getColumn() + ". Encountered: " + err.getMessage(),
+ TokenMgrError.LEXICAL_ERROR);
+ } finally {
+ tokenEntries.add(TokenEntry.getEOF());
+ }
+ }
+
+ private static class ErrorHandler extends BaseErrorListener {
+ @Override
+ public void syntaxError(Recognizer, ?> recognizer, Object offendingSymbol, int line, int charPositionInLine,
+ String msg, RecognitionException ex) {
+ throw new ANTLRSyntaxError(msg, line, charPositionInLine, ex);
+ }
+ }
+
+ private static class ANTLRSyntaxError extends RuntimeException {
+ private static final long serialVersionUID = 1L;
+ private final int line;
+ private final int column;
+
+ ANTLRSyntaxError(String msg, int line, int column, RecognitionException cause) {
+ super(msg, cause);
+ this.line = line;
+ this.column = column;
+ }
+
+ public int getLine() {
+ return line;
+ }
+
+ public int getColumn() {
+ return column;
+ }
+ }
+}
diff --git a/pmd-swift/src/main/java/net/sourceforge/pmd/cpd/SwiftTokenizer.java b/pmd-swift/src/main/java/net/sourceforge/pmd/cpd/SwiftTokenizer.java
index d401eefd12..49e4661b23 100644
--- a/pmd-swift/src/main/java/net/sourceforge/pmd/cpd/SwiftTokenizer.java
+++ b/pmd-swift/src/main/java/net/sourceforge/pmd/cpd/SwiftTokenizer.java
@@ -4,79 +4,14 @@
package net.sourceforge.pmd.cpd;
-import org.antlr.v4.runtime.ANTLRInputStream;
-import org.antlr.v4.runtime.BaseErrorListener;
-import org.antlr.v4.runtime.Lexer;
-import org.antlr.v4.runtime.RecognitionException;
-import org.antlr.v4.runtime.Recognizer;
-import org.antlr.v4.runtime.Token;
-
-import net.sourceforge.pmd.lang.ast.TokenMgrError;
import net.sourceforge.pmd.lang.swift.antlr4.SwiftLexer;
/**
- * The Swift Tokenizer
+ * SwiftTokenizer
*/
-public class SwiftTokenizer implements Tokenizer {
- @Override
- public void tokenize(SourceCode sourceCode, Tokens tokenEntries) {
- StringBuilder buffer = sourceCode.getCodeBuffer();
-
- try {
- ANTLRInputStream ais = new ANTLRInputStream(buffer.toString());
- SwiftLexer lexer = new SwiftLexer(ais);
-
- lexer.removeErrorListeners();
- lexer.addErrorListener(new ErrorHandler());
- Token token = lexer.nextToken();
-
- while (token.getType() != Token.EOF) {
- if (token.getChannel() != Lexer.HIDDEN) {
- TokenEntry tokenEntry = new TokenEntry(token.getText(), sourceCode.getFileName(), token.getLine());
-
- tokenEntries.add(tokenEntry);
- }
- token = lexer.nextToken();
- }
- } catch (ANTLRSyntaxError err) {
- // Wrap exceptions of the Swift tokenizer in a TokenMgrError, so
- // they are correctly handled
- // when CPD is executed with the '--skipLexicalErrors' command line
- // option
- throw new TokenMgrError("Lexical error in file " + sourceCode.getFileName() + " at line " + err.getLine()
- + ", column " + err.getColumn() + ". Encountered: " + err.getMessage(),
- TokenMgrError.LEXICAL_ERROR);
- } finally {
- tokenEntries.add(TokenEntry.getEOF());
- }
- }
-
- private static class ErrorHandler extends BaseErrorListener {
- @Override
- public void syntaxError(Recognizer, ?> recognizer, Object offendingSymbol, int line, int charPositionInLine,
- String msg, RecognitionException ex) {
- throw new ANTLRSyntaxError(msg, line, charPositionInLine, ex);
- }
- }
-
- private static class ANTLRSyntaxError extends RuntimeException {
- private static final long serialVersionUID = 1L;
- private final int line;
- private final int column;
-
- ANTLRSyntaxError(String msg, int line, int column, RecognitionException cause) {
- super(msg, cause);
- this.line = line;
- this.column = column;
- }
-
- public int getLine() {
- return line;
- }
-
- public int getColumn() {
- return column;
- }
+public class SwiftTokenizer extends AntlrTokenizer {
+ public SwiftTokenizer() {
+ super(new SwiftLexer(null));
}
}
diff --git a/pom.xml b/pom.xml
index d754cabb39..d215750d9b 100644
--- a/pom.xml
+++ b/pom.xml
@@ -286,6 +286,7 @@ Additionally it includes CPD, the copy-paste-detector. CPD finds duplicated code
${antlr.version}
${project.build.sourceEncoding}
+ true