[core] Add a generic JavaCCTokenizer for CPD

This commit is contained in:
Andreas Dangel
2019-01-26 11:24:32 +01:00
parent a79d4aeb4f
commit 08b7dd49dd
13 changed files with 154 additions and 227 deletions

View File

@ -6,21 +6,18 @@ package net.sourceforge.pmd.cpd;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.Reader;
import java.io.StringReader;
import java.util.Properties;
import net.sourceforge.pmd.PMD;
import net.sourceforge.pmd.cpd.token.JavaCCTokenFilter;
import net.sourceforge.pmd.cpd.token.TokenFilter;
import net.sourceforge.pmd.lang.ast.GenericToken;
import net.sourceforge.pmd.lang.TokenManager;
import net.sourceforge.pmd.lang.cpp.CppTokenManager;
import net.sourceforge.pmd.util.IOUtil;
/**
* The C++ tokenizer.
*/
public class CPPTokenizer implements Tokenizer {
public class CPPTokenizer extends JavaCCTokenizer {
private boolean skipBlocks = true;
private String skipBlocksStart;
@ -48,27 +45,6 @@ public class CPPTokenizer implements Tokenizer {
}
}
@Override
public void tokenize(SourceCode sourceCode, Tokens tokenEntries) {
StringBuilder buffer = sourceCode.getCodeBuffer();
try (Reader reader = IOUtil.skipBOM(new StringReader(maybeSkipBlocks(buffer.toString())))) {
CppTokenManager tokenManager = new CppTokenManager(reader);
tokenManager.setFileName(sourceCode.getFileName());
final TokenFilter tokenFilter = new JavaCCTokenFilter(tokenManager);
GenericToken currentToken = tokenFilter.getNextToken();
while (currentToken != null) {
tokenEntries.add(new TokenEntry(currentToken.getImage(), sourceCode.getFileName(), currentToken.getBeginLine()));
currentToken = tokenFilter.getNextToken();
}
} catch (IOException e) {
e.printStackTrace();
System.err.println("Error parsing " + sourceCode.getFileName());
} finally {
tokenEntries.add(TokenEntry.getEOF());
}
}
private String maybeSkipBlocks(String test) throws IOException {
if (!skipBlocks) {
return test;
@ -92,4 +68,14 @@ public class CPPTokenizer implements Tokenizer {
}
return filtered.toString();
}
@Override
protected TokenManager getLexerForSource(SourceCode sourceCode) {
try {
StringBuilder buffer = sourceCode.getCodeBuffer();
return new CppTokenManager(IOUtil.skipBOM(new StringReader(maybeSkipBlocks(buffer.toString()))));
} catch (IOException e) {
throw new RuntimeException(e);
}
}
}

View File

@ -7,6 +7,7 @@ package net.sourceforge.pmd.cpd;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.fail;
import java.io.IOException;
import java.io.StringReader;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
@ -117,7 +118,7 @@ public class CPPTokenizerContinuationTest {
.getResourceAsStream("cpp/" + name), StandardCharsets.UTF_8);
}
private Tokens parse(String code) {
private Tokens parse(String code) throws IOException {
CPPTokenizer tokenizer = new CPPTokenizer();
tokenizer.setProperties(new Properties());
Tokens tokens = new Tokens();

View File

@ -8,6 +8,7 @@ import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNotSame;
import static org.junit.Assert.assertTrue;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.util.Properties;
@ -159,14 +160,18 @@ public class CPPTokenizerTest {
}
private Tokens parse(String snippet) {
return parse(snippet, false, new Tokens());
try {
return parse(snippet, false, new Tokens());
} catch (IOException e) {
throw new RuntimeException(e);
}
}
private Tokens parse(String snippet, boolean skipBlocks, Tokens tokens) {
private Tokens parse(String snippet, boolean skipBlocks, Tokens tokens) throws IOException {
return parse(snippet, skipBlocks, null, tokens);
}
private Tokens parse(String snippet, boolean skipBlocks, String skipPattern, Tokens tokens) {
private Tokens parse(String snippet, boolean skipBlocks, String skipPattern, Tokens tokens) throws IOException {
Properties properties = new Properties();
properties.setProperty(Tokenizer.OPTION_SKIP_BLOCKS, Boolean.toString(skipBlocks));
if (skipPattern != null) {