From 32b7cba79a9a8350fb3b13cbedce804ec3d24157 Mon Sep 17 00:00:00 2001 From: lsoncini Date: Sat, 21 Jul 2018 20:10:24 -0300 Subject: [PATCH] abstraction for cpd with antlr --- pmd-core/pom.xml | 5 +- .../java/net/sourceforge/pmd/RuleSet.java | 9 +- .../sourceforge/pmd/cpd/AntlrTokenizer.java | 91 +++++++++++++++++++ .../sourceforge/pmd/cpd/SwiftTokenizer.java | 73 +-------------- pom.xml | 1 + 5 files changed, 103 insertions(+), 76 deletions(-) create mode 100644 pmd-core/src/main/java/net/sourceforge/pmd/cpd/AntlrTokenizer.java diff --git a/pmd-core/pom.xml b/pmd-core/pom.xml index fd7c96cda4..5140684ed5 100644 --- a/pmd-core/pom.xml +++ b/pmd-core/pom.xml @@ -96,7 +96,10 @@ ant provided - + + org.antlr + antlr4-runtime + com.beust jcommander diff --git a/pmd-core/src/main/java/net/sourceforge/pmd/RuleSet.java b/pmd-core/src/main/java/net/sourceforge/pmd/RuleSet.java index bbab5ec797..71d5bc24a5 100644 --- a/pmd-core/src/main/java/net/sourceforge/pmd/RuleSet.java +++ b/pmd-core/src/main/java/net/sourceforge/pmd/RuleSet.java @@ -58,12 +58,9 @@ public class RuleSet implements ChecksumAware { /** * Creates a new RuleSet with the given checksum. - * - * @param checksum - * A checksum of the ruleset, should change only if the ruleset - * was configured differently - * @param rules - * The rules to be applied as part of this ruleset + * + * @param builder + * A rule set builder. */ private RuleSet(final RuleSetBuilder builder) { checksum = builder.checksum; diff --git a/pmd-core/src/main/java/net/sourceforge/pmd/cpd/AntlrTokenizer.java b/pmd-core/src/main/java/net/sourceforge/pmd/cpd/AntlrTokenizer.java new file mode 100644 index 0000000000..86529a9275 --- /dev/null +++ b/pmd-core/src/main/java/net/sourceforge/pmd/cpd/AntlrTokenizer.java @@ -0,0 +1,91 @@ +/** + * BSD-style license; for more info see http://pmd.sourceforge.net/license.html + */ + +package net.sourceforge.pmd.cpd; + +import org.antlr.v4.runtime.BaseErrorListener; +import org.antlr.v4.runtime.CharStream; +import org.antlr.v4.runtime.CharStreams; +import org.antlr.v4.runtime.Lexer; +import org.antlr.v4.runtime.RecognitionException; +import org.antlr.v4.runtime.Recognizer; +import org.antlr.v4.runtime.Token; + +import net.sourceforge.pmd.lang.ast.TokenMgrError; + +public abstract class AntlrTokenizer implements Tokenizer { + + private final Lexer lexer; + + /** + * Constructor. + * + * @param lexer lexer. + */ + public AntlrTokenizer(Lexer lexer) { + this.lexer = lexer; + } + + @Override + public void tokenize(final SourceCode sourceCode, final Tokens tokenEntries) { + StringBuilder buffer = sourceCode.getCodeBuffer(); + + try { + CharStream charStream = CharStreams.fromString(buffer.toString()); + lexer.reset(); + lexer.setInputStream(charStream); + + lexer.removeErrorListeners(); + lexer.addErrorListener(new ErrorHandler()); + Token token = lexer.nextToken(); + + while (token.getType() != Token.EOF) { + if (token.getChannel() != Lexer.HIDDEN) { + TokenEntry tokenEntry = new TokenEntry(token.getText(), sourceCode.getFileName(), token.getLine()); + + tokenEntries.add(tokenEntry); + } + token = lexer.nextToken(); + } + } catch (ANTLRSyntaxError err) { + // Wrap exceptions of the ANTLR tokenizer in a TokenMgrError, so + // they are correctly handled + // when CPD is executed with the '--skipLexicalErrors' command line + // option + throw new TokenMgrError("Lexical error in file " + sourceCode.getFileName() + " at line " + err.getLine() + + ", column " + err.getColumn() + ". Encountered: " + err.getMessage(), + TokenMgrError.LEXICAL_ERROR); + } finally { + tokenEntries.add(TokenEntry.getEOF()); + } + } + + private static class ErrorHandler extends BaseErrorListener { + @Override + public void syntaxError(Recognizer recognizer, Object offendingSymbol, int line, int charPositionInLine, + String msg, RecognitionException ex) { + throw new ANTLRSyntaxError(msg, line, charPositionInLine, ex); + } + } + + private static class ANTLRSyntaxError extends RuntimeException { + private static final long serialVersionUID = 1L; + private final int line; + private final int column; + + ANTLRSyntaxError(String msg, int line, int column, RecognitionException cause) { + super(msg, cause); + this.line = line; + this.column = column; + } + + public int getLine() { + return line; + } + + public int getColumn() { + return column; + } + } +} diff --git a/pmd-swift/src/main/java/net/sourceforge/pmd/cpd/SwiftTokenizer.java b/pmd-swift/src/main/java/net/sourceforge/pmd/cpd/SwiftTokenizer.java index d401eefd12..49e4661b23 100644 --- a/pmd-swift/src/main/java/net/sourceforge/pmd/cpd/SwiftTokenizer.java +++ b/pmd-swift/src/main/java/net/sourceforge/pmd/cpd/SwiftTokenizer.java @@ -4,79 +4,14 @@ package net.sourceforge.pmd.cpd; -import org.antlr.v4.runtime.ANTLRInputStream; -import org.antlr.v4.runtime.BaseErrorListener; -import org.antlr.v4.runtime.Lexer; -import org.antlr.v4.runtime.RecognitionException; -import org.antlr.v4.runtime.Recognizer; -import org.antlr.v4.runtime.Token; - -import net.sourceforge.pmd.lang.ast.TokenMgrError; import net.sourceforge.pmd.lang.swift.antlr4.SwiftLexer; /** - * The Swift Tokenizer + * SwiftTokenizer */ -public class SwiftTokenizer implements Tokenizer { - @Override - public void tokenize(SourceCode sourceCode, Tokens tokenEntries) { - StringBuilder buffer = sourceCode.getCodeBuffer(); - - try { - ANTLRInputStream ais = new ANTLRInputStream(buffer.toString()); - SwiftLexer lexer = new SwiftLexer(ais); - - lexer.removeErrorListeners(); - lexer.addErrorListener(new ErrorHandler()); - Token token = lexer.nextToken(); - - while (token.getType() != Token.EOF) { - if (token.getChannel() != Lexer.HIDDEN) { - TokenEntry tokenEntry = new TokenEntry(token.getText(), sourceCode.getFileName(), token.getLine()); - - tokenEntries.add(tokenEntry); - } - token = lexer.nextToken(); - } - } catch (ANTLRSyntaxError err) { - // Wrap exceptions of the Swift tokenizer in a TokenMgrError, so - // they are correctly handled - // when CPD is executed with the '--skipLexicalErrors' command line - // option - throw new TokenMgrError("Lexical error in file " + sourceCode.getFileName() + " at line " + err.getLine() - + ", column " + err.getColumn() + ". Encountered: " + err.getMessage(), - TokenMgrError.LEXICAL_ERROR); - } finally { - tokenEntries.add(TokenEntry.getEOF()); - } - } - - private static class ErrorHandler extends BaseErrorListener { - @Override - public void syntaxError(Recognizer recognizer, Object offendingSymbol, int line, int charPositionInLine, - String msg, RecognitionException ex) { - throw new ANTLRSyntaxError(msg, line, charPositionInLine, ex); - } - } - - private static class ANTLRSyntaxError extends RuntimeException { - private static final long serialVersionUID = 1L; - private final int line; - private final int column; - - ANTLRSyntaxError(String msg, int line, int column, RecognitionException cause) { - super(msg, cause); - this.line = line; - this.column = column; - } - - public int getLine() { - return line; - } - - public int getColumn() { - return column; - } +public class SwiftTokenizer extends AntlrTokenizer { + public SwiftTokenizer() { + super(new SwiftLexer(null)); } } diff --git a/pom.xml b/pom.xml index d754cabb39..d215750d9b 100644 --- a/pom.xml +++ b/pom.xml @@ -286,6 +286,7 @@ Additionally it includes CPD, the copy-paste-detector. CPD finds duplicated code ${antlr.version} ${project.build.sourceEncoding} + true