[core] Add a generic JavaCCTokenizer for CPD
This commit is contained in:
@ -4,51 +4,38 @@
|
||||
|
||||
package net.sourceforge.pmd.cpd;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
import java.io.StringReader;
|
||||
|
||||
import net.sourceforge.pmd.cpd.token.JavaCCTokenFilter;
|
||||
import net.sourceforge.pmd.cpd.token.TokenFilter;
|
||||
import net.sourceforge.pmd.lang.LanguageRegistry;
|
||||
import net.sourceforge.pmd.lang.LanguageVersionHandler;
|
||||
import net.sourceforge.pmd.lang.ecmascript.EcmascriptLanguageModule;
|
||||
import net.sourceforge.pmd.lang.TokenManager;
|
||||
import net.sourceforge.pmd.lang.ast.GenericToken;
|
||||
import net.sourceforge.pmd.lang.ecmascript5.Ecmascript5TokenManager;
|
||||
import net.sourceforge.pmd.lang.ecmascript5.ast.Ecmascript5ParserConstants;
|
||||
import net.sourceforge.pmd.lang.ecmascript5.ast.Token;
|
||||
import net.sourceforge.pmd.util.IOUtil;
|
||||
|
||||
/**
|
||||
* The Ecmascript Tokenizer
|
||||
*/
|
||||
public class EcmascriptTokenizer implements Tokenizer {
|
||||
public class EcmascriptTokenizer extends JavaCCTokenizer {
|
||||
|
||||
@Override
|
||||
public void tokenize(SourceCode sourceCode, Tokens tokenEntries) {
|
||||
protected TokenManager getLexerForSource(SourceCode sourceCode) {
|
||||
StringBuilder buffer = sourceCode.getCodeBuffer();
|
||||
try (Reader reader = new StringReader(buffer.toString())) {
|
||||
LanguageVersionHandler languageVersionHandler = LanguageRegistry.getLanguage(EcmascriptLanguageModule.NAME)
|
||||
.getDefaultVersion().getLanguageVersionHandler();
|
||||
TokenFilter tokenFilter = new JavaCCTokenFilter(languageVersionHandler
|
||||
.getParser(languageVersionHandler.getDefaultParserOptions())
|
||||
.getTokenManager(sourceCode.getFileName(), reader));
|
||||
Token currentToken = (Token) tokenFilter.getNextToken();
|
||||
while (currentToken != null) {
|
||||
tokenEntries.add(
|
||||
new TokenEntry(getTokenImage(currentToken), sourceCode.getFileName(), currentToken.beginLine));
|
||||
currentToken = (Token) tokenFilter.getNextToken();
|
||||
}
|
||||
} catch (IOException e) {
|
||||
e.printStackTrace();
|
||||
} finally {
|
||||
tokenEntries.add(TokenEntry.getEOF());
|
||||
}
|
||||
return new Ecmascript5TokenManager(IOUtil.skipBOM(new StringReader(buffer.toString())));
|
||||
}
|
||||
|
||||
private String getTokenImage(Token token) {
|
||||
@Override
|
||||
protected TokenEntry processToken(Tokens tokenEntries, GenericToken currentToken, String filename) {
|
||||
return new TokenEntry(getTokenImage(currentToken), filename, currentToken.getBeginLine());
|
||||
}
|
||||
|
||||
private String getTokenImage(GenericToken token) {
|
||||
Token jsToken = (Token) token;
|
||||
// Remove line continuation characters from string literals
|
||||
if (token.kind == Ecmascript5ParserConstants.STRING_LITERAL
|
||||
|| token.kind == Ecmascript5ParserConstants.UNTERMINATED_STRING_LITERAL) {
|
||||
return token.image.replaceAll("(?<!\\\\)\\\\(\\r\\n|\\r|\\n)", "");
|
||||
if (jsToken.kind == Ecmascript5ParserConstants.STRING_LITERAL
|
||||
|| jsToken.kind == Ecmascript5ParserConstants.UNTERMINATED_STRING_LITERAL) {
|
||||
return token.getImage().replaceAll("(?<!\\\\)\\\\(\\r\\n|\\r|\\n)", "");
|
||||
}
|
||||
return token.image;
|
||||
return token.getImage();
|
||||
}
|
||||
}
|
||||
|
Reference in New Issue
Block a user