From 166e17f483bc3df38f2be547ba63aa30e29d9f02 Mon Sep 17 00:00:00 2001 From: Maikel Steneker Date: Mon, 12 Nov 2018 14:35:59 +0100 Subject: [PATCH] Simplified Kotlin tokenizer. --- .../sourceforge/pmd/cpd/KotlinTokenizer.java | 92 +++++-------------- 1 file changed, 25 insertions(+), 67 deletions(-) diff --git a/pmd-kotlin/src/main/java/net/sourceforge/pmd/cpd/KotlinTokenizer.java b/pmd-kotlin/src/main/java/net/sourceforge/pmd/cpd/KotlinTokenizer.java index 8ff8349c1f..17da5ec802 100644 --- a/pmd-kotlin/src/main/java/net/sourceforge/pmd/cpd/KotlinTokenizer.java +++ b/pmd-kotlin/src/main/java/net/sourceforge/pmd/cpd/KotlinTokenizer.java @@ -4,101 +4,59 @@ package net.sourceforge.pmd.cpd; -import org.antlr.v4.runtime.ANTLRInputStream; -import org.antlr.v4.runtime.BaseErrorListener; +import org.antlr.v4.runtime.CharStream; import org.antlr.v4.runtime.Lexer; -import org.antlr.v4.runtime.RecognitionException; -import org.antlr.v4.runtime.Recognizer; import org.antlr.v4.runtime.Token; -import net.sourceforge.pmd.lang.ast.TokenMgrError; +import net.sourceforge.pmd.cpd.token.AntlrToken; +import net.sourceforge.pmd.lang.antlr.AntlrTokenManager; import net.sourceforge.pmd.lang.kotlin.antlr4.Kotlin; /** * The Kotlin Tokenizer */ -public class KotlinTokenizer implements Tokenizer { +public class KotlinTokenizer extends AntlrTokenizer { private boolean discardingPackageAndImport = false; @Override - public void tokenize(final SourceCode sourceCode, final Tokens tokenEntries) { - final StringBuilder buffer = sourceCode.getCodeBuffer(); - - try { - final ANTLRInputStream ais = new ANTLRInputStream(buffer.toString()); - final Kotlin lexer = new Kotlin(ais); - - lexer.removeErrorListeners(); - lexer.addErrorListener(new ErrorHandler()); - Token token = lexer.nextToken(); - - while (token.getType() != Token.EOF) { - analyzeTokenStart(token); - if (token.getChannel() != Lexer.HIDDEN && token.getType() != Kotlin.NL && !isDiscarding()) { - final TokenEntry tokenEntry = new TokenEntry(token.getText(), sourceCode.getFileName(), token.getLine()); - tokenEntries.add(tokenEntry); - } - analyzeTokenEnd(token); - token = lexer.nextToken(); + protected AntlrTokenManager getLexerForSource(SourceCode sourceCode) { + CharStream charStream = AntlrTokenizer.getCharStreamFromSourceCode(sourceCode); + final Lexer lexer = new Kotlin(charStream); + final AntlrTokenManager tokenManager = new AntlrTokenManager(lexer, sourceCode.getFileName()) { + @Override + public Object getNextToken() { + AntlrToken nextToken; + boolean done = false; + do { + nextToken = (AntlrToken) super.getNextToken(); + analyzeTokenStart(nextToken); + if (!nextToken.isHidden() && nextToken.getType() != Kotlin.NL && !isDiscarding()) { + done = true; + } + analyzeTokenEnd(nextToken); + } while (!done && nextToken.getType() != Token.EOF); + return nextToken; } - } catch (final ANTLRSyntaxError err) { - // Wrap exceptions of the Kotlin tokenizer in a TokenMgrError, so - // they are correctly handled - // when CPD is executed with the '--skipLexicalErrors' command line - // option - throw new TokenMgrError("Lexical error in file " + sourceCode.getFileName() + " at line " + err.getLine() - + ", column " + err.getColumn() + ". Encountered: " + err.getMessage(), - TokenMgrError.LEXICAL_ERROR); - } finally { - tokenEntries.add(TokenEntry.getEOF()); - } + }; + return tokenManager; } private boolean isDiscarding() { return discardingPackageAndImport; } - private void analyzeTokenStart(final Token currentToken) { + private void analyzeTokenStart(final AntlrToken currentToken) { final int type = currentToken.getType(); if (type == Kotlin.PACKAGE || type == Kotlin.IMPORT) { discardingPackageAndImport = true; } } - private void analyzeTokenEnd(final Token currentToken) { + private void analyzeTokenEnd(final AntlrToken currentToken) { final int type = currentToken.getType(); if (discardingPackageAndImport && (type == Kotlin.SEMICOLON || type == Kotlin.NL)) { discardingPackageAndImport = false; } } - - - private static class ErrorHandler extends BaseErrorListener { - @Override - public void syntaxError(final Recognizer recognizer, final Object offendingSymbol, final int line, final int charPositionInLine, - final String msg, final RecognitionException ex) { - throw new ANTLRSyntaxError(msg, line, charPositionInLine, ex); - } - } - - private static class ANTLRSyntaxError extends RuntimeException { - private static final long serialVersionUID = 1L; - private final int line; - private final int column; - - ANTLRSyntaxError(final String msg, final int line, final int column, final RecognitionException cause) { - super(msg, cause); - this.line = line; - this.column = column; - } - - public int getLine() { - return line; - } - - public int getColumn() { - return column; - } - } }