forked from phoedos/pmd
abstraction for cpd with antlr
This commit is contained in:
parent
0f9bc63bfc
commit
32b7cba79a
@ -96,7 +96,10 @@
|
||||
<artifactId>ant</artifactId>
|
||||
<scope>provided</scope>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.antlr</groupId>
|
||||
<artifactId>antlr4-runtime</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.beust</groupId>
|
||||
<artifactId>jcommander</artifactId>
|
||||
|
@ -58,12 +58,9 @@ public class RuleSet implements ChecksumAware {
|
||||
|
||||
/**
|
||||
* Creates a new RuleSet with the given checksum.
|
||||
*
|
||||
* @param checksum
|
||||
* A checksum of the ruleset, should change only if the ruleset
|
||||
* was configured differently
|
||||
* @param rules
|
||||
* The rules to be applied as part of this ruleset
|
||||
*
|
||||
* @param builder
|
||||
* A rule set builder.
|
||||
*/
|
||||
private RuleSet(final RuleSetBuilder builder) {
|
||||
checksum = builder.checksum;
|
||||
|
@ -0,0 +1,91 @@
|
||||
/**
|
||||
* BSD-style license; for more info see http://pmd.sourceforge.net/license.html
|
||||
*/
|
||||
|
||||
package net.sourceforge.pmd.cpd;
|
||||
|
||||
import org.antlr.v4.runtime.BaseErrorListener;
|
||||
import org.antlr.v4.runtime.CharStream;
|
||||
import org.antlr.v4.runtime.CharStreams;
|
||||
import org.antlr.v4.runtime.Lexer;
|
||||
import org.antlr.v4.runtime.RecognitionException;
|
||||
import org.antlr.v4.runtime.Recognizer;
|
||||
import org.antlr.v4.runtime.Token;
|
||||
|
||||
import net.sourceforge.pmd.lang.ast.TokenMgrError;
|
||||
|
||||
public abstract class AntlrTokenizer implements Tokenizer {
|
||||
|
||||
private final Lexer lexer;
|
||||
|
||||
/**
|
||||
* Constructor.
|
||||
*
|
||||
* @param lexer lexer.
|
||||
*/
|
||||
public AntlrTokenizer(Lexer lexer) {
|
||||
this.lexer = lexer;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void tokenize(final SourceCode sourceCode, final Tokens tokenEntries) {
|
||||
StringBuilder buffer = sourceCode.getCodeBuffer();
|
||||
|
||||
try {
|
||||
CharStream charStream = CharStreams.fromString(buffer.toString());
|
||||
lexer.reset();
|
||||
lexer.setInputStream(charStream);
|
||||
|
||||
lexer.removeErrorListeners();
|
||||
lexer.addErrorListener(new ErrorHandler());
|
||||
Token token = lexer.nextToken();
|
||||
|
||||
while (token.getType() != Token.EOF) {
|
||||
if (token.getChannel() != Lexer.HIDDEN) {
|
||||
TokenEntry tokenEntry = new TokenEntry(token.getText(), sourceCode.getFileName(), token.getLine());
|
||||
|
||||
tokenEntries.add(tokenEntry);
|
||||
}
|
||||
token = lexer.nextToken();
|
||||
}
|
||||
} catch (ANTLRSyntaxError err) {
|
||||
// Wrap exceptions of the ANTLR tokenizer in a TokenMgrError, so
|
||||
// they are correctly handled
|
||||
// when CPD is executed with the '--skipLexicalErrors' command line
|
||||
// option
|
||||
throw new TokenMgrError("Lexical error in file " + sourceCode.getFileName() + " at line " + err.getLine()
|
||||
+ ", column " + err.getColumn() + ". Encountered: " + err.getMessage(),
|
||||
TokenMgrError.LEXICAL_ERROR);
|
||||
} finally {
|
||||
tokenEntries.add(TokenEntry.getEOF());
|
||||
}
|
||||
}
|
||||
|
||||
private static class ErrorHandler extends BaseErrorListener {
|
||||
@Override
|
||||
public void syntaxError(Recognizer<?, ?> recognizer, Object offendingSymbol, int line, int charPositionInLine,
|
||||
String msg, RecognitionException ex) {
|
||||
throw new ANTLRSyntaxError(msg, line, charPositionInLine, ex);
|
||||
}
|
||||
}
|
||||
|
||||
private static class ANTLRSyntaxError extends RuntimeException {
|
||||
private static final long serialVersionUID = 1L;
|
||||
private final int line;
|
||||
private final int column;
|
||||
|
||||
ANTLRSyntaxError(String msg, int line, int column, RecognitionException cause) {
|
||||
super(msg, cause);
|
||||
this.line = line;
|
||||
this.column = column;
|
||||
}
|
||||
|
||||
public int getLine() {
|
||||
return line;
|
||||
}
|
||||
|
||||
public int getColumn() {
|
||||
return column;
|
||||
}
|
||||
}
|
||||
}
|
@ -4,79 +4,14 @@
|
||||
|
||||
package net.sourceforge.pmd.cpd;
|
||||
|
||||
import org.antlr.v4.runtime.ANTLRInputStream;
|
||||
import org.antlr.v4.runtime.BaseErrorListener;
|
||||
import org.antlr.v4.runtime.Lexer;
|
||||
import org.antlr.v4.runtime.RecognitionException;
|
||||
import org.antlr.v4.runtime.Recognizer;
|
||||
import org.antlr.v4.runtime.Token;
|
||||
|
||||
import net.sourceforge.pmd.lang.ast.TokenMgrError;
|
||||
import net.sourceforge.pmd.lang.swift.antlr4.SwiftLexer;
|
||||
|
||||
/**
|
||||
* The Swift Tokenizer
|
||||
* SwiftTokenizer
|
||||
*/
|
||||
public class SwiftTokenizer implements Tokenizer {
|
||||
|
||||
@Override
|
||||
public void tokenize(SourceCode sourceCode, Tokens tokenEntries) {
|
||||
StringBuilder buffer = sourceCode.getCodeBuffer();
|
||||
|
||||
try {
|
||||
ANTLRInputStream ais = new ANTLRInputStream(buffer.toString());
|
||||
SwiftLexer lexer = new SwiftLexer(ais);
|
||||
|
||||
lexer.removeErrorListeners();
|
||||
lexer.addErrorListener(new ErrorHandler());
|
||||
Token token = lexer.nextToken();
|
||||
|
||||
while (token.getType() != Token.EOF) {
|
||||
if (token.getChannel() != Lexer.HIDDEN) {
|
||||
TokenEntry tokenEntry = new TokenEntry(token.getText(), sourceCode.getFileName(), token.getLine());
|
||||
|
||||
tokenEntries.add(tokenEntry);
|
||||
}
|
||||
token = lexer.nextToken();
|
||||
}
|
||||
} catch (ANTLRSyntaxError err) {
|
||||
// Wrap exceptions of the Swift tokenizer in a TokenMgrError, so
|
||||
// they are correctly handled
|
||||
// when CPD is executed with the '--skipLexicalErrors' command line
|
||||
// option
|
||||
throw new TokenMgrError("Lexical error in file " + sourceCode.getFileName() + " at line " + err.getLine()
|
||||
+ ", column " + err.getColumn() + ". Encountered: " + err.getMessage(),
|
||||
TokenMgrError.LEXICAL_ERROR);
|
||||
} finally {
|
||||
tokenEntries.add(TokenEntry.getEOF());
|
||||
}
|
||||
}
|
||||
|
||||
private static class ErrorHandler extends BaseErrorListener {
|
||||
@Override
|
||||
public void syntaxError(Recognizer<?, ?> recognizer, Object offendingSymbol, int line, int charPositionInLine,
|
||||
String msg, RecognitionException ex) {
|
||||
throw new ANTLRSyntaxError(msg, line, charPositionInLine, ex);
|
||||
}
|
||||
}
|
||||
|
||||
private static class ANTLRSyntaxError extends RuntimeException {
|
||||
private static final long serialVersionUID = 1L;
|
||||
private final int line;
|
||||
private final int column;
|
||||
|
||||
ANTLRSyntaxError(String msg, int line, int column, RecognitionException cause) {
|
||||
super(msg, cause);
|
||||
this.line = line;
|
||||
this.column = column;
|
||||
}
|
||||
|
||||
public int getLine() {
|
||||
return line;
|
||||
}
|
||||
|
||||
public int getColumn() {
|
||||
return column;
|
||||
}
|
||||
public class SwiftTokenizer extends AntlrTokenizer {
|
||||
public SwiftTokenizer() {
|
||||
super(new SwiftLexer(null));
|
||||
}
|
||||
}
|
||||
|
1
pom.xml
1
pom.xml
@ -286,6 +286,7 @@ Additionally it includes CPD, the copy-paste-detector. CPD finds duplicated code
|
||||
<version>${antlr.version}</version>
|
||||
<configuration>
|
||||
<inputEncoding>${project.build.sourceEncoding}</inputEncoding>
|
||||
<visitor>true</visitor>
|
||||
</configuration>
|
||||
<executions>
|
||||
<execution>
|
||||
|
Loading…
x
Reference in New Issue
Block a user