[core] Add a generic JavaCCTokenizer for CPD

This commit is contained in:
Andreas Dangel
2019-01-26 11:24:32 +01:00
parent a79d4aeb4f
commit 08b7dd49dd
13 changed files with 154 additions and 227 deletions

View File

@ -4,21 +4,21 @@
package net.sourceforge.pmd.cpd;
import java.io.IOException;
import java.io.StringReader;
import java.util.Deque;
import java.util.LinkedList;
import java.util.Properties;
import net.sourceforge.pmd.cpd.token.JavaCCTokenFilter;
import net.sourceforge.pmd.lang.LanguageRegistry;
import net.sourceforge.pmd.lang.LanguageVersionHandler;
import net.sourceforge.pmd.cpd.token.TokenFilter;
import net.sourceforge.pmd.lang.TokenManager;
import net.sourceforge.pmd.lang.ast.GenericToken;
import net.sourceforge.pmd.lang.java.JavaLanguageModule;
import net.sourceforge.pmd.lang.java.JavaTokenManager;
import net.sourceforge.pmd.lang.java.ast.JavaParserConstants;
import net.sourceforge.pmd.lang.java.ast.Token;
public class JavaTokenizer implements Tokenizer {
public class JavaTokenizer extends JavaCCTokenizer {
public static final String CPD_START = "\"CPD-START\"";
public static final String CPD_END = "\"CPD-END\"";
@ -27,6 +27,8 @@ public class JavaTokenizer implements Tokenizer {
private boolean ignoreLiterals;
private boolean ignoreIdentifiers;
private ConstructorDetector constructorDetector;
public void setProperties(Properties properties) {
ignoreAnnotations = Boolean.parseBoolean(properties.getProperty(IGNORE_ANNOTATIONS, "false"));
ignoreLiterals = Boolean.parseBoolean(properties.getProperty(IGNORE_LITERALS, "false"));
@ -34,48 +36,42 @@ public class JavaTokenizer implements Tokenizer {
}
@Override
public void tokenize(SourceCode sourceCode, Tokens tokenEntries) {
final String fileName = sourceCode.getFileName();
final JavaTokenFilter tokenFilter = createTokenFilter(sourceCode);
final ConstructorDetector constructorDetector = new ConstructorDetector(ignoreIdentifiers);
Token currentToken = (Token) tokenFilter.getNextToken();
while (currentToken != null) {
processToken(tokenEntries, fileName, currentToken, constructorDetector);
currentToken = (Token) tokenFilter.getNextToken();
}
tokenEntries.add(TokenEntry.getEOF());
public void tokenize(SourceCode sourceCode, Tokens tokenEntries) throws IOException {
constructorDetector = new ConstructorDetector(ignoreIdentifiers);
super.tokenize(sourceCode, tokenEntries);
}
private JavaTokenFilter createTokenFilter(final SourceCode sourceCode) {
@Override
protected TokenManager getLexerForSource(SourceCode sourceCode) {
final StringBuilder stringBuilder = sourceCode.getCodeBuffer();
// Note that Java version is irrelevant for tokenizing
final LanguageVersionHandler languageVersionHandler = LanguageRegistry.getLanguage(JavaLanguageModule.NAME)
.getVersion("1.4").getLanguageVersionHandler();
final TokenManager tokenMgr = languageVersionHandler.getParser(languageVersionHandler.getDefaultParserOptions())
.getTokenManager(sourceCode.getFileName(), new StringReader(stringBuilder.toString()));
return new JavaTokenFilter(tokenMgr, ignoreAnnotations);
return new JavaTokenManager(new StringReader(stringBuilder.toString()));
}
private void processToken(Tokens tokenEntries, String fileName, Token currentToken,
ConstructorDetector constructorDetector) {
String image = currentToken.image;
@Override
protected TokenFilter getTokenFilter(TokenManager tokenManager) {
return new JavaTokenFilter(tokenManager, ignoreAnnotations);
}
constructorDetector.restoreConstructorToken(tokenEntries, currentToken);
@Override
protected TokenEntry processToken(Tokens tokenEntries, GenericToken currentToken, String fileName) {
String image = currentToken.getImage();
Token javaToken = (Token) currentToken;
if (ignoreLiterals && (currentToken.kind == JavaParserConstants.STRING_LITERAL
|| currentToken.kind == JavaParserConstants.CHARACTER_LITERAL
|| currentToken.kind == JavaParserConstants.DECIMAL_LITERAL
|| currentToken.kind == JavaParserConstants.FLOATING_POINT_LITERAL)) {
image = String.valueOf(currentToken.kind);
constructorDetector.restoreConstructorToken(tokenEntries, javaToken);
if (ignoreLiterals && (javaToken.kind == JavaParserConstants.STRING_LITERAL
|| javaToken.kind == JavaParserConstants.CHARACTER_LITERAL
|| javaToken.kind == JavaParserConstants.DECIMAL_LITERAL
|| javaToken.kind == JavaParserConstants.FLOATING_POINT_LITERAL)) {
image = String.valueOf(javaToken.kind);
}
if (ignoreIdentifiers && currentToken.kind == JavaParserConstants.IDENTIFIER) {
image = String.valueOf(currentToken.kind);
if (ignoreIdentifiers && javaToken.kind == JavaParserConstants.IDENTIFIER) {
image = String.valueOf(javaToken.kind);
}
constructorDetector.processToken(currentToken);
constructorDetector.processToken(javaToken);
tokenEntries.add(new TokenEntry(image, fileName, currentToken.beginLine));
return new TokenEntry(image, fileName, currentToken.getBeginLine());
}
public void setIgnoreLiterals(boolean ignore) {

View File

@ -88,7 +88,7 @@ public class JavaTokensTokenizerTest {
}
@Test
public void testIgnoreComments() {
public void testIgnoreComments() throws IOException {
JavaTokenizer t = new JavaTokenizer();
t.setIgnoreAnnotations(false);
SourceCode sourceCode = new SourceCode(new SourceCode.StringCodeLoader("package foo.bar.baz;" + PMD.EOL

View File

@ -7,6 +7,7 @@ package net.sourceforge.pmd.cpd;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import java.io.IOException;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
@ -32,7 +33,7 @@ public class MatchAlgorithmTest {
}
@Test
public void testSimple() {
public void testSimple() throws IOException {
JavaTokenizer tokenizer = new JavaTokenizer();
SourceCode sourceCode = new SourceCode(new SourceCode.StringCodeLoader(getSampleCode(), "Foo.java"));
Tokens tokens = new Tokens();
@ -63,7 +64,7 @@ public class MatchAlgorithmTest {
}
@Test
public void testIgnore() {
public void testIgnore() throws IOException {
JavaTokenizer tokenizer = new JavaTokenizer();
tokenizer.setIgnoreLiterals(true);
tokenizer.setIgnoreIdentifiers(true);