Merge branch 'pr-1598'

This commit is contained in:
Juan Martín Sotuyo Dodero
2019-02-17 16:33:33 -03:00
15 changed files with 219 additions and 238 deletions

View File

@ -28,6 +28,8 @@ This is a {{ site.pmd.release_type }} release.
### Fixed Issues
* all
* [#1559](https://github.com/pmd/pmd/issues/1559): \[core] CPD: Lexical error in file (no file name provided)
* java-bestpractices
* [#808](https://github.com/pmd/pmd/issues/808): \[java] AccessorMethodGeneration false positives with compile time constants
* [#1405](https://github.com/pmd/pmd/issues/1405): \[java] New Rule: UseTryWithResources - Replace close and IOUtils.closeQuietly with try-with-resources

View File

@ -8,13 +8,18 @@ import org.antlr.v4.runtime.CharStream;
import org.antlr.v4.runtime.CharStreams;
import org.antlr.v4.runtime.Token;
import net.sourceforge.pmd.annotation.InternalApi;
import net.sourceforge.pmd.cpd.token.AntlrToken;
import net.sourceforge.pmd.lang.antlr.AntlrTokenManager;
import net.sourceforge.pmd.lang.ast.TokenMgrError;
/**
* Generic implementation of a {@link Tokenizer} useful to any Antlr grammar.
*
* @deprecated This is an internal API.
*/
@Deprecated
@InternalApi
public abstract class AntlrTokenizer implements Tokenizer {
protected abstract AntlrTokenManager getLexerForSource(SourceCode sourceCode);
@ -23,6 +28,7 @@ public abstract class AntlrTokenizer implements Tokenizer {
public void tokenize(final SourceCode sourceCode, final Tokens tokenEntries) {
AntlrTokenManager tokenManager = getLexerForSource(sourceCode);
tokenManager.setFileName(sourceCode.getFileName());
try {
AntlrToken token = (AntlrToken) tokenManager.getNextToken();

View File

@ -0,0 +1,46 @@
/**
* BSD-style license; for more info see http://pmd.sourceforge.net/license.html
*/
package net.sourceforge.pmd.cpd.internal;
import java.io.IOException;
import net.sourceforge.pmd.cpd.SourceCode;
import net.sourceforge.pmd.cpd.TokenEntry;
import net.sourceforge.pmd.cpd.Tokenizer;
import net.sourceforge.pmd.cpd.Tokens;
import net.sourceforge.pmd.cpd.token.JavaCCTokenFilter;
import net.sourceforge.pmd.cpd.token.TokenFilter;
import net.sourceforge.pmd.lang.TokenManager;
import net.sourceforge.pmd.lang.ast.GenericToken;
public abstract class JavaCCTokenizer implements Tokenizer {
protected abstract TokenManager getLexerForSource(SourceCode sourceCode);
protected TokenFilter getTokenFilter(TokenManager tokenManager) {
return new JavaCCTokenFilter(tokenManager);
}
protected TokenEntry processToken(Tokens tokenEntries, GenericToken currentToken, String filename) {
return new TokenEntry(currentToken.getImage(), filename, currentToken.getBeginLine());
}
@Override
public void tokenize(SourceCode sourceCode, Tokens tokenEntries) throws IOException {
TokenManager tokenManager = getLexerForSource(sourceCode);
tokenManager.setFileName(sourceCode.getFileName());
try {
final TokenFilter tokenFilter = getTokenFilter(tokenManager);
GenericToken currentToken = tokenFilter.getNextToken();
while (currentToken != null) {
tokenEntries.add(processToken(tokenEntries, currentToken, sourceCode.getFileName()));
currentToken = tokenFilter.getNextToken();
}
} finally {
tokenEntries.add(TokenEntry.getEOF());
}
}
}

View File

@ -6,22 +6,19 @@ package net.sourceforge.pmd.cpd;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.Reader;
import java.io.StringReader;
import java.util.Properties;
import net.sourceforge.pmd.PMD;
import net.sourceforge.pmd.cpd.token.JavaCCTokenFilter;
import net.sourceforge.pmd.cpd.token.TokenFilter;
import net.sourceforge.pmd.lang.ast.GenericToken;
import net.sourceforge.pmd.lang.ast.TokenMgrError;
import net.sourceforge.pmd.cpd.internal.JavaCCTokenizer;
import net.sourceforge.pmd.lang.TokenManager;
import net.sourceforge.pmd.lang.cpp.CppTokenManager;
import net.sourceforge.pmd.util.IOUtil;
/**
* The C++ tokenizer.
*/
public class CPPTokenizer implements Tokenizer {
public class CPPTokenizer extends JavaCCTokenizer {
private boolean skipBlocks = true;
private String skipBlocksStart;
@ -49,26 +46,6 @@ public class CPPTokenizer implements Tokenizer {
}
}
@Override
public void tokenize(SourceCode sourceCode, Tokens tokenEntries) {
StringBuilder buffer = sourceCode.getCodeBuffer();
try (Reader reader = IOUtil.skipBOM(new StringReader(maybeSkipBlocks(buffer.toString())))) {
final TokenFilter tokenFilter = new JavaCCTokenFilter(new CppTokenManager(reader));
GenericToken currentToken = tokenFilter.getNextToken();
while (currentToken != null) {
tokenEntries.add(new TokenEntry(currentToken.getImage(), sourceCode.getFileName(), currentToken.getBeginLine()));
currentToken = tokenFilter.getNextToken();
}
tokenEntries.add(TokenEntry.getEOF());
System.err.println("Added " + sourceCode.getFileName());
} catch (TokenMgrError | IOException err) {
err.printStackTrace();
System.err.println("Skipping " + sourceCode.getFileName() + " due to parse error");
tokenEntries.add(TokenEntry.getEOF());
}
}
private String maybeSkipBlocks(String test) throws IOException {
if (!skipBlocks) {
return test;
@ -92,4 +69,14 @@ public class CPPTokenizer implements Tokenizer {
}
return filtered.toString();
}
@Override
protected TokenManager getLexerForSource(SourceCode sourceCode) {
try {
StringBuilder buffer = sourceCode.getCodeBuffer();
return new CppTokenManager(IOUtil.skipBOM(new StringReader(maybeSkipBlocks(buffer.toString()))));
} catch (IOException e) {
throw new RuntimeException(e);
}
}
}

View File

@ -7,6 +7,7 @@ package net.sourceforge.pmd.cpd;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.fail;
import java.io.IOException;
import java.io.StringReader;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
@ -117,7 +118,7 @@ public class CPPTokenizerContinuationTest {
.getResourceAsStream("cpp/" + name), StandardCharsets.UTF_8);
}
private Tokens parse(String code) {
private Tokens parse(String code) throws IOException {
CPPTokenizer tokenizer = new CPPTokenizer();
tokenizer.setProperties(new Properties());
Tokens tokens = new Tokens();

View File

@ -8,16 +8,23 @@ import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNotSame;
import static org.junit.Assert.assertTrue;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.util.Properties;
import org.apache.commons.io.IOUtils;
import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.ExpectedException;
import net.sourceforge.pmd.PMD;
import net.sourceforge.pmd.lang.ast.TokenMgrError;
public class CPPTokenizerTest {
@Rule
public ExpectedException expectedException = ExpectedException.none();
@Test
public void testUTFwithBOM() {
Tokens tokens = parse("\ufeffint start()\n{ int ret = 1;\nreturn ret;\n}\n");
@ -69,21 +76,31 @@ public class CPPTokenizerTest {
@Test
public void testTokenizerWithSkipBlocks() throws Exception {
String test = IOUtils.toString(CPPTokenizerTest.class.getResourceAsStream("cpp/cpp_with_asm.cpp"), StandardCharsets.UTF_8);
Tokens tokens = parse(test, true);
Tokens tokens = parse(test, true, new Tokens());
assertEquals(19, tokens.size());
}
@Test
public void testTokenizerWithSkipBlocksPattern() throws Exception {
String test = IOUtils.toString(CPPTokenizerTest.class.getResourceAsStream("cpp/cpp_with_asm.cpp"), StandardCharsets.UTF_8);
Tokens tokens = parse(test, true, "#if debug|#endif");
Tokens tokens = new Tokens();
try {
parse(test, true, "#if debug|#endif", tokens);
} catch (TokenMgrError ignored) {
// ignored
}
assertEquals(31, tokens.size());
}
@Test
public void testTokenizerWithoutSkipBlocks() throws Exception {
String test = IOUtils.toString(CPPTokenizerTest.class.getResourceAsStream("cpp/cpp_with_asm.cpp"), StandardCharsets.UTF_8);
Tokens tokens = parse(test, false);
Tokens tokens = new Tokens();
try {
parse(test, false, tokens);
} catch (TokenMgrError ignored) {
// ignored
}
assertEquals(37, tokens.size());
}
@ -128,15 +145,33 @@ public class CPPTokenizerTest {
assertEquals(9, tokens.size());
}
@Test
public void testLexicalErrorFilename() throws Exception {
Properties properties = new Properties();
properties.setProperty(Tokenizer.OPTION_SKIP_BLOCKS, Boolean.toString(false));
String test = IOUtils.toString(CPPTokenizerTest.class.getResourceAsStream("cpp/issue-1559.cpp"), StandardCharsets.UTF_8);
SourceCode code = new SourceCode(new SourceCode.StringCodeLoader(test, "issue-1559.cpp"));
CPPTokenizer tokenizer = new CPPTokenizer();
tokenizer.setProperties(properties);
expectedException.expect(TokenMgrError.class);
expectedException.expectMessage("Lexical error in file issue-1559.cpp at");
tokenizer.tokenize(code, new Tokens());
}
private Tokens parse(String snippet) {
return parse(snippet, false);
try {
return parse(snippet, false, new Tokens());
} catch (IOException e) {
throw new RuntimeException(e);
}
}
private Tokens parse(String snippet, boolean skipBlocks) {
return parse(snippet, skipBlocks, null);
private Tokens parse(String snippet, boolean skipBlocks, Tokens tokens) throws IOException {
return parse(snippet, skipBlocks, null, tokens);
}
private Tokens parse(String snippet, boolean skipBlocks, String skipPattern) {
private Tokens parse(String snippet, boolean skipBlocks, String skipPattern, Tokens tokens) throws IOException {
Properties properties = new Properties();
properties.setProperty(Tokenizer.OPTION_SKIP_BLOCKS, Boolean.toString(skipBlocks));
if (skipPattern != null) {
@ -147,7 +182,6 @@ public class CPPTokenizerTest {
tokenizer.setProperties(properties);
SourceCode code = new SourceCode(new SourceCode.StringCodeLoader(snippet));
Tokens tokens = new Tokens();
tokenizer.tokenize(code, tokens);
return tokens;
}

View File

@ -0,0 +1,11 @@
namespace ABC
{
namespace DEF
{
#ifdef USE_QT
const char* perPixelQml = R"QML(
)QML";
}
}
#endif // USE_QT

View File

@ -4,21 +4,22 @@
package net.sourceforge.pmd.cpd;
import java.io.IOException;
import java.io.StringReader;
import java.util.Deque;
import java.util.LinkedList;
import java.util.Properties;
import net.sourceforge.pmd.cpd.internal.JavaCCTokenizer;
import net.sourceforge.pmd.cpd.token.JavaCCTokenFilter;
import net.sourceforge.pmd.lang.LanguageRegistry;
import net.sourceforge.pmd.lang.LanguageVersionHandler;
import net.sourceforge.pmd.cpd.token.TokenFilter;
import net.sourceforge.pmd.lang.TokenManager;
import net.sourceforge.pmd.lang.ast.GenericToken;
import net.sourceforge.pmd.lang.java.JavaLanguageModule;
import net.sourceforge.pmd.lang.java.JavaTokenManager;
import net.sourceforge.pmd.lang.java.ast.JavaParserConstants;
import net.sourceforge.pmd.lang.java.ast.Token;
public class JavaTokenizer implements Tokenizer {
public class JavaTokenizer extends JavaCCTokenizer {
public static final String CPD_START = "\"CPD-START\"";
public static final String CPD_END = "\"CPD-END\"";
@ -27,6 +28,8 @@ public class JavaTokenizer implements Tokenizer {
private boolean ignoreLiterals;
private boolean ignoreIdentifiers;
private ConstructorDetector constructorDetector;
public void setProperties(Properties properties) {
ignoreAnnotations = Boolean.parseBoolean(properties.getProperty(IGNORE_ANNOTATIONS, "false"));
ignoreLiterals = Boolean.parseBoolean(properties.getProperty(IGNORE_LITERALS, "false"));
@ -34,48 +37,42 @@ public class JavaTokenizer implements Tokenizer {
}
@Override
public void tokenize(SourceCode sourceCode, Tokens tokenEntries) {
final String fileName = sourceCode.getFileName();
final JavaTokenFilter tokenFilter = createTokenFilter(sourceCode);
final ConstructorDetector constructorDetector = new ConstructorDetector(ignoreIdentifiers);
Token currentToken = (Token) tokenFilter.getNextToken();
while (currentToken != null) {
processToken(tokenEntries, fileName, currentToken, constructorDetector);
currentToken = (Token) tokenFilter.getNextToken();
}
tokenEntries.add(TokenEntry.getEOF());
public void tokenize(SourceCode sourceCode, Tokens tokenEntries) throws IOException {
constructorDetector = new ConstructorDetector(ignoreIdentifiers);
super.tokenize(sourceCode, tokenEntries);
}
private JavaTokenFilter createTokenFilter(final SourceCode sourceCode) {
@Override
protected TokenManager getLexerForSource(SourceCode sourceCode) {
final StringBuilder stringBuilder = sourceCode.getCodeBuffer();
// Note that Java version is irrelevant for tokenizing
final LanguageVersionHandler languageVersionHandler = LanguageRegistry.getLanguage(JavaLanguageModule.NAME)
.getVersion("1.4").getLanguageVersionHandler();
final TokenManager tokenMgr = languageVersionHandler.getParser(languageVersionHandler.getDefaultParserOptions())
.getTokenManager(sourceCode.getFileName(), new StringReader(stringBuilder.toString()));
return new JavaTokenFilter(tokenMgr, ignoreAnnotations);
return new JavaTokenManager(new StringReader(stringBuilder.toString()));
}
private void processToken(Tokens tokenEntries, String fileName, Token currentToken,
ConstructorDetector constructorDetector) {
String image = currentToken.image;
@Override
protected TokenFilter getTokenFilter(TokenManager tokenManager) {
return new JavaTokenFilter(tokenManager, ignoreAnnotations);
}
constructorDetector.restoreConstructorToken(tokenEntries, currentToken);
@Override
protected TokenEntry processToken(Tokens tokenEntries, GenericToken currentToken, String fileName) {
String image = currentToken.getImage();
Token javaToken = (Token) currentToken;
if (ignoreLiterals && (currentToken.kind == JavaParserConstants.STRING_LITERAL
|| currentToken.kind == JavaParserConstants.CHARACTER_LITERAL
|| currentToken.kind == JavaParserConstants.DECIMAL_LITERAL
|| currentToken.kind == JavaParserConstants.FLOATING_POINT_LITERAL)) {
image = String.valueOf(currentToken.kind);
constructorDetector.restoreConstructorToken(tokenEntries, javaToken);
if (ignoreLiterals && (javaToken.kind == JavaParserConstants.STRING_LITERAL
|| javaToken.kind == JavaParserConstants.CHARACTER_LITERAL
|| javaToken.kind == JavaParserConstants.DECIMAL_LITERAL
|| javaToken.kind == JavaParserConstants.FLOATING_POINT_LITERAL)) {
image = String.valueOf(javaToken.kind);
}
if (ignoreIdentifiers && currentToken.kind == JavaParserConstants.IDENTIFIER) {
image = String.valueOf(currentToken.kind);
if (ignoreIdentifiers && javaToken.kind == JavaParserConstants.IDENTIFIER) {
image = String.valueOf(javaToken.kind);
}
constructorDetector.processToken(currentToken);
constructorDetector.processToken(javaToken);
tokenEntries.add(new TokenEntry(image, fileName, currentToken.beginLine));
return new TokenEntry(image, fileName, currentToken.getBeginLine());
}
public void setIgnoreLiterals(boolean ignore) {

View File

@ -88,7 +88,7 @@ public class JavaTokensTokenizerTest {
}
@Test
public void testIgnoreComments() {
public void testIgnoreComments() throws IOException {
JavaTokenizer t = new JavaTokenizer();
t.setIgnoreAnnotations(false);
SourceCode sourceCode = new SourceCode(new SourceCode.StringCodeLoader("package foo.bar.baz;" + PMD.EOL

View File

@ -7,6 +7,7 @@ package net.sourceforge.pmd.cpd;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import java.io.IOException;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
@ -32,7 +33,7 @@ public class MatchAlgorithmTest {
}
@Test
public void testSimple() {
public void testSimple() throws IOException {
JavaTokenizer tokenizer = new JavaTokenizer();
SourceCode sourceCode = new SourceCode(new SourceCode.StringCodeLoader(getSampleCode(), "Foo.java"));
Tokens tokens = new Tokens();
@ -63,7 +64,7 @@ public class MatchAlgorithmTest {
}
@Test
public void testIgnore() {
public void testIgnore() throws IOException {
JavaTokenizer tokenizer = new JavaTokenizer();
tokenizer.setIgnoreLiterals(true);
tokenizer.setIgnoreIdentifiers(true);

View File

@ -4,56 +4,39 @@
package net.sourceforge.pmd.cpd;
import java.io.IOException;
import java.io.Reader;
import java.io.StringReader;
import net.sourceforge.pmd.cpd.token.JavaCCTokenFilter;
import net.sourceforge.pmd.cpd.token.TokenFilter;
import net.sourceforge.pmd.lang.LanguageRegistry;
import net.sourceforge.pmd.lang.LanguageVersionHandler;
import net.sourceforge.pmd.lang.ast.TokenMgrError;
import net.sourceforge.pmd.lang.ecmascript.EcmascriptLanguageModule;
import net.sourceforge.pmd.cpd.internal.JavaCCTokenizer;
import net.sourceforge.pmd.lang.TokenManager;
import net.sourceforge.pmd.lang.ast.GenericToken;
import net.sourceforge.pmd.lang.ecmascript5.Ecmascript5TokenManager;
import net.sourceforge.pmd.lang.ecmascript5.ast.Ecmascript5ParserConstants;
import net.sourceforge.pmd.lang.ecmascript5.ast.Token;
import net.sourceforge.pmd.util.IOUtil;
/**
* The Ecmascript Tokenizer
*/
public class EcmascriptTokenizer implements Tokenizer {
public class EcmascriptTokenizer extends JavaCCTokenizer {
@Override
public void tokenize(SourceCode sourceCode, Tokens tokenEntries) {
protected TokenManager getLexerForSource(SourceCode sourceCode) {
StringBuilder buffer = sourceCode.getCodeBuffer();
try (Reader reader = new StringReader(buffer.toString())) {
LanguageVersionHandler languageVersionHandler = LanguageRegistry.getLanguage(EcmascriptLanguageModule.NAME)
.getDefaultVersion().getLanguageVersionHandler();
TokenFilter tokenFilter = new JavaCCTokenFilter(languageVersionHandler
.getParser(languageVersionHandler.getDefaultParserOptions())
.getTokenManager(sourceCode.getFileName(), reader));
Token currentToken = (Token) tokenFilter.getNextToken();
while (currentToken != null) {
tokenEntries.add(
new TokenEntry(getTokenImage(currentToken), sourceCode.getFileName(), currentToken.beginLine));
currentToken = (Token) tokenFilter.getNextToken();
}
tokenEntries.add(TokenEntry.getEOF());
System.err.println("Added " + sourceCode.getFileName());
} catch (TokenMgrError err) {
err.printStackTrace();
System.err.println("Skipping " + sourceCode.getFileName() + " due to parse error");
tokenEntries.add(TokenEntry.getEOF());
} catch (IOException e) {
e.printStackTrace();
}
return new Ecmascript5TokenManager(IOUtil.skipBOM(new StringReader(buffer.toString())));
}
private String getTokenImage(Token token) {
@Override
protected TokenEntry processToken(Tokens tokenEntries, GenericToken currentToken, String filename) {
return new TokenEntry(getTokenImage(currentToken), filename, currentToken.getBeginLine());
}
private String getTokenImage(GenericToken token) {
Token jsToken = (Token) token;
// Remove line continuation characters from string literals
if (token.kind == Ecmascript5ParserConstants.STRING_LITERAL
|| token.kind == Ecmascript5ParserConstants.UNTERMINATED_STRING_LITERAL) {
return token.image.replaceAll("(?<!\\\\)\\\\(\\r\\n|\\r|\\n)", "");
if (jsToken.kind == Ecmascript5ParserConstants.STRING_LITERAL
|| jsToken.kind == Ecmascript5ParserConstants.UNTERMINATED_STRING_LITERAL) {
return token.getImage().replaceAll("(?<!\\\\)\\\\(\\r\\n|\\r|\\n)", "");
}
return token.image;
return token.getImage();
}
}

View File

@ -4,38 +4,21 @@
package net.sourceforge.pmd.cpd;
import java.io.IOException;
import java.io.Reader;
import java.io.StringReader;
import net.sourceforge.pmd.cpd.token.JavaCCTokenFilter;
import net.sourceforge.pmd.cpd.token.TokenFilter;
import net.sourceforge.pmd.lang.ast.TokenMgrError;
import net.sourceforge.pmd.cpd.internal.JavaCCTokenizer;
import net.sourceforge.pmd.lang.TokenManager;
import net.sourceforge.pmd.lang.matlab.MatlabTokenManager;
import net.sourceforge.pmd.lang.matlab.ast.Token;
import net.sourceforge.pmd.util.IOUtil;
/**
* The Matlab Tokenizer.
*/
public class MatlabTokenizer implements Tokenizer {
public class MatlabTokenizer extends JavaCCTokenizer {
@Override
public void tokenize(SourceCode sourceCode, Tokens tokenEntries) {
protected TokenManager getLexerForSource(SourceCode sourceCode) {
StringBuilder buffer = sourceCode.getCodeBuffer();
try (Reader reader = IOUtil.skipBOM(new StringReader(buffer.toString()))) {
final TokenFilter tokenFilter = new JavaCCTokenFilter(new MatlabTokenManager(reader));
Token currentToken = (Token) tokenFilter.getNextToken();
while (currentToken != null) {
tokenEntries.add(new TokenEntry(currentToken.image, sourceCode.getFileName(), currentToken.beginLine));
currentToken = (Token) tokenFilter.getNextToken();
}
tokenEntries.add(TokenEntry.getEOF());
System.err.println("Added " + sourceCode.getFileName());
} catch (TokenMgrError | IOException err) {
err.printStackTrace();
System.err.println("Skipping " + sourceCode.getFileName() + " due to parse error");
tokenEntries.add(TokenEntry.getEOF());
}
return new MatlabTokenManager(IOUtil.skipBOM(new StringReader(buffer.toString())));
}
}

View File

@ -4,39 +4,21 @@
package net.sourceforge.pmd.cpd;
import java.io.IOException;
import java.io.Reader;
import java.io.StringReader;
import net.sourceforge.pmd.cpd.token.JavaCCTokenFilter;
import net.sourceforge.pmd.cpd.token.TokenFilter;
import net.sourceforge.pmd.lang.ast.TokenMgrError;
import net.sourceforge.pmd.cpd.internal.JavaCCTokenizer;
import net.sourceforge.pmd.lang.TokenManager;
import net.sourceforge.pmd.lang.objectivec.ObjectiveCTokenManager;
import net.sourceforge.pmd.lang.objectivec.ast.Token;
import net.sourceforge.pmd.util.IOUtil;
/**
* The Objective-C Tokenizer
*/
public class ObjectiveCTokenizer implements Tokenizer {
public class ObjectiveCTokenizer extends JavaCCTokenizer {
@Override
public void tokenize(SourceCode sourceCode, Tokens tokenEntries) {
protected TokenManager getLexerForSource(SourceCode sourceCode) {
StringBuilder buffer = sourceCode.getCodeBuffer();
try (Reader reader = new StringReader(buffer.toString())) {
final TokenFilter tokenFilter = new JavaCCTokenFilter(new ObjectiveCTokenManager(reader));
Token currentToken = (Token) tokenFilter.getNextToken();
while (currentToken != null) {
tokenEntries.add(new TokenEntry(currentToken.image, sourceCode.getFileName(), currentToken.beginLine));
currentToken = (Token) tokenFilter.getNextToken();
}
tokenEntries.add(TokenEntry.getEOF());
System.err.println("Added " + sourceCode.getFileName());
} catch (TokenMgrError err) {
err.printStackTrace();
System.err.println("Skipping " + sourceCode.getFileName() + " due to parse error");
tokenEntries.add(TokenEntry.getEOF());
} catch (IOException e) {
e.printStackTrace();
}
return new ObjectiveCTokenManager(IOUtil.skipBOM(new StringReader(buffer.toString())));
}
}

View File

@ -6,18 +6,16 @@ package net.sourceforge.pmd.cpd;
import java.io.StringReader;
import java.util.Properties;
import java.util.logging.Level;
import java.util.logging.Logger;
import net.sourceforge.pmd.cpd.token.JavaCCTokenFilter;
import net.sourceforge.pmd.cpd.token.TokenFilter;
import net.sourceforge.pmd.cpd.internal.JavaCCTokenizer;
import net.sourceforge.pmd.lang.TokenManager;
import net.sourceforge.pmd.lang.ast.GenericToken;
import net.sourceforge.pmd.lang.plsql.PLSQLTokenManager;
import net.sourceforge.pmd.lang.plsql.ast.PLSQLParserConstants;
import net.sourceforge.pmd.lang.plsql.ast.Token;
import net.sourceforge.pmd.util.IOUtil;
public class PLSQLTokenizer implements Tokenizer {
private static final Logger LOGGER = Logger.getLogger(PLSQLTokenizer.class.getName());
public class PLSQLTokenizer extends JavaCCTokenizer {
// This is actually useless, the comments are special tokens, never taken into account by CPD
@Deprecated
public static final String IGNORE_COMMENTS = "ignore_comments";
@ -51,58 +49,31 @@ public class PLSQLTokenizer implements Tokenizer {
this.ignoreIdentifiers = ignore;
}
/**
* Read Reader from SourceCode and output an ordered tree of PLSQL tokens.
*
* @param sourceCode
* PLSQL source in file, string or database (any suitable object
* that can return a Reader).
* @param tokenEntries
* Derived based on PLSQL Abstract Syntax Tree (derived from
* PLDOc parser.)
*/
@Override
public void tokenize(SourceCode sourceCode, Tokens tokenEntries) {
long encounteredTokens = 0;
long addedTokens = 0;
protected TokenEntry processToken(Tokens tokenEntries, GenericToken currentToken, String fileName) {
String image = currentToken.getImage();
if (LOGGER.isLoggable(Level.FINE)) {
LOGGER.fine("PLSQLTokenizer: ignoreIdentifiers==" + ignoreIdentifiers);
LOGGER.fine("PLSQLTokenizer: ignoreLiterals==" + ignoreLiterals);
Token plsqlToken = (Token) currentToken;
if (ignoreIdentifiers && plsqlToken.kind == PLSQLParserConstants.IDENTIFIER) {
image = String.valueOf(plsqlToken.kind);
}
String fileName = sourceCode.getFileName();
StringBuilder sb = sourceCode.getCodeBuffer();
TokenFilter tokenFilter = new JavaCCTokenFilter(new PLSQLTokenManager(new StringReader(sb.toString())));
Token currentToken = (Token) tokenFilter.getNextToken();
while (currentToken != null) {
String image = currentToken.image;
encounteredTokens++;
if (ignoreIdentifiers && currentToken.kind == PLSQLParserConstants.IDENTIFIER) {
image = String.valueOf(currentToken.kind);
}
if (ignoreLiterals && (currentToken.kind == PLSQLParserConstants.UNSIGNED_NUMERIC_LITERAL
|| currentToken.kind == PLSQLParserConstants.FLOAT_LITERAL
|| currentToken.kind == PLSQLParserConstants.INTEGER_LITERAL
|| currentToken.kind == PLSQLParserConstants.CHARACTER_LITERAL
|| currentToken.kind == PLSQLParserConstants.STRING_LITERAL
|| currentToken.kind == PLSQLParserConstants.QUOTED_LITERAL)) {
image = String.valueOf(currentToken.kind);
}
tokenEntries.add(new TokenEntry(image, fileName, currentToken.beginLine));
addedTokens++;
currentToken = (Token) tokenFilter.getNextToken();
}
tokenEntries.add(TokenEntry.getEOF());
if (LOGGER.isLoggable(Level.FINE)) {
LOGGER.fine(sourceCode.getFileName() + ": encountered " + encounteredTokens + " tokens;" + " added "
+ addedTokens + " tokens");
if (ignoreLiterals && (plsqlToken.kind == PLSQLParserConstants.UNSIGNED_NUMERIC_LITERAL
|| plsqlToken.kind == PLSQLParserConstants.FLOAT_LITERAL
|| plsqlToken.kind == PLSQLParserConstants.INTEGER_LITERAL
|| plsqlToken.kind == PLSQLParserConstants.CHARACTER_LITERAL
|| plsqlToken.kind == PLSQLParserConstants.STRING_LITERAL
|| plsqlToken.kind == PLSQLParserConstants.QUOTED_LITERAL)) {
image = String.valueOf(plsqlToken.kind);
}
return new TokenEntry(image, fileName, currentToken.getBeginLine());
}
@Override
protected TokenManager getLexerForSource(SourceCode sourceCode) {
StringBuilder stringBuilder = sourceCode.getCodeBuffer();
return new PLSQLTokenManager(IOUtil.skipBOM(new StringReader(stringBuilder.toString())));
}
}

View File

@ -4,44 +4,21 @@
package net.sourceforge.pmd.cpd;
import java.io.IOException;
import java.io.Reader;
import java.io.StringReader;
import net.sourceforge.pmd.cpd.token.JavaCCTokenFilter;
import net.sourceforge.pmd.cpd.token.TokenFilter;
import net.sourceforge.pmd.lang.LanguageRegistry;
import net.sourceforge.pmd.lang.LanguageVersionHandler;
import net.sourceforge.pmd.lang.ast.TokenMgrError;
import net.sourceforge.pmd.lang.python.PythonLanguageModule;
import net.sourceforge.pmd.lang.python.ast.Token;
import net.sourceforge.pmd.cpd.internal.JavaCCTokenizer;
import net.sourceforge.pmd.lang.TokenManager;
import net.sourceforge.pmd.lang.python.PythonTokenManager;
import net.sourceforge.pmd.util.IOUtil;
/**
* The Python tokenizer.
*/
public class PythonTokenizer implements Tokenizer {
public class PythonTokenizer extends JavaCCTokenizer {
@Override
public void tokenize(SourceCode sourceCode, Tokens tokenEntries) {
protected TokenManager getLexerForSource(SourceCode sourceCode) {
StringBuilder buffer = sourceCode.getCodeBuffer();
try (Reader reader = IOUtil.skipBOM(new StringReader(buffer.toString()))) {
LanguageVersionHandler languageVersionHandler = LanguageRegistry.getLanguage(PythonLanguageModule.NAME)
.getDefaultVersion().getLanguageVersionHandler();
TokenFilter tokenFilter = new JavaCCTokenFilter(languageVersionHandler
.getParser(languageVersionHandler.getDefaultParserOptions())
.getTokenManager(sourceCode.getFileName(), reader));
Token currentToken = (Token) tokenFilter.getNextToken();
while (currentToken != null) {
tokenEntries.add(new TokenEntry(currentToken.image, sourceCode.getFileName(), currentToken.beginLine));
currentToken = (Token) tokenFilter.getNextToken();
}
tokenEntries.add(TokenEntry.getEOF());
System.err.println("Added " + sourceCode);
} catch (TokenMgrError | IOException err) {
err.printStackTrace();
System.err.println("Skipping " + sourceCode + " due to parse error");
tokenEntries.add(TokenEntry.getEOF());
}
return new PythonTokenManager(IOUtil.skipBOM(new StringReader(buffer.toString())));
}
}