From b745f331b8ca490e0046b566b5a94a2eb10aca84 Mon Sep 17 00:00:00 2001 From: Andreas Dangel Date: Sun, 20 Jan 2019 09:59:59 +0100 Subject: [PATCH] CPD: Fix error handling for lexical errors * TokenMgrError must not be caught by the tokenizer. This is handled by CPD itself * The token managers need to know the filename for proper error messages --- .../pmd/cpd/EcmascriptTokenizer.java | 9 ++------- .../sourceforge/pmd/cpd/MatlabTokenizer.java | 11 +++++----- .../pmd/cpd/ObjectiveCTokenizer.java | 13 +++++------- .../sourceforge/pmd/cpd/PLSQLTokenizer.java | 17 +++++++++++++--- .../sourceforge/pmd/cpd/PythonTokenizer.java | 20 +++++++------------ 5 files changed, 33 insertions(+), 37 deletions(-) diff --git a/pmd-javascript/src/main/java/net/sourceforge/pmd/cpd/EcmascriptTokenizer.java b/pmd-javascript/src/main/java/net/sourceforge/pmd/cpd/EcmascriptTokenizer.java index 3c236c57e4..bafbde5a9f 100644 --- a/pmd-javascript/src/main/java/net/sourceforge/pmd/cpd/EcmascriptTokenizer.java +++ b/pmd-javascript/src/main/java/net/sourceforge/pmd/cpd/EcmascriptTokenizer.java @@ -12,7 +12,6 @@ import net.sourceforge.pmd.cpd.token.JavaCCTokenFilter; import net.sourceforge.pmd.cpd.token.TokenFilter; import net.sourceforge.pmd.lang.LanguageRegistry; import net.sourceforge.pmd.lang.LanguageVersionHandler; -import net.sourceforge.pmd.lang.ast.TokenMgrError; import net.sourceforge.pmd.lang.ecmascript.EcmascriptLanguageModule; import net.sourceforge.pmd.lang.ecmascript5.ast.Ecmascript5ParserConstants; import net.sourceforge.pmd.lang.ecmascript5.ast.Token; @@ -37,14 +36,10 @@ public class EcmascriptTokenizer implements Tokenizer { new TokenEntry(getTokenImage(currentToken), sourceCode.getFileName(), currentToken.beginLine)); currentToken = (Token) tokenFilter.getNextToken(); } - tokenEntries.add(TokenEntry.getEOF()); - System.err.println("Added " + sourceCode.getFileName()); - } catch (TokenMgrError err) { - err.printStackTrace(); - System.err.println("Skipping " + sourceCode.getFileName() + " due to parse error"); - tokenEntries.add(TokenEntry.getEOF()); } catch (IOException e) { e.printStackTrace(); + } finally { + tokenEntries.add(TokenEntry.getEOF()); } } diff --git a/pmd-matlab/src/main/java/net/sourceforge/pmd/cpd/MatlabTokenizer.java b/pmd-matlab/src/main/java/net/sourceforge/pmd/cpd/MatlabTokenizer.java index b0eb103f4e..7879052b99 100644 --- a/pmd-matlab/src/main/java/net/sourceforge/pmd/cpd/MatlabTokenizer.java +++ b/pmd-matlab/src/main/java/net/sourceforge/pmd/cpd/MatlabTokenizer.java @@ -10,7 +10,6 @@ import java.io.StringReader; import net.sourceforge.pmd.cpd.token.JavaCCTokenFilter; import net.sourceforge.pmd.cpd.token.TokenFilter; -import net.sourceforge.pmd.lang.ast.TokenMgrError; import net.sourceforge.pmd.lang.matlab.MatlabTokenManager; import net.sourceforge.pmd.lang.matlab.ast.Token; import net.sourceforge.pmd.util.IOUtil; @@ -24,17 +23,17 @@ public class MatlabTokenizer implements Tokenizer { public void tokenize(SourceCode sourceCode, Tokens tokenEntries) { StringBuilder buffer = sourceCode.getCodeBuffer(); try (Reader reader = IOUtil.skipBOM(new StringReader(buffer.toString()))) { - final TokenFilter tokenFilter = new JavaCCTokenFilter(new MatlabTokenManager(reader)); + MatlabTokenManager tokenManager = new MatlabTokenManager(reader); + tokenManager.setFileName(sourceCode.getFileName()); + final TokenFilter tokenFilter = new JavaCCTokenFilter(tokenManager); Token currentToken = (Token) tokenFilter.getNextToken(); while (currentToken != null) { tokenEntries.add(new TokenEntry(currentToken.image, sourceCode.getFileName(), currentToken.beginLine)); currentToken = (Token) tokenFilter.getNextToken(); } - tokenEntries.add(TokenEntry.getEOF()); - System.err.println("Added " + sourceCode.getFileName()); - } catch (TokenMgrError | IOException err) { + } catch (IOException err) { err.printStackTrace(); - System.err.println("Skipping " + sourceCode.getFileName() + " due to parse error"); + } finally { tokenEntries.add(TokenEntry.getEOF()); } } diff --git a/pmd-objectivec/src/main/java/net/sourceforge/pmd/cpd/ObjectiveCTokenizer.java b/pmd-objectivec/src/main/java/net/sourceforge/pmd/cpd/ObjectiveCTokenizer.java index 2bb7e7deac..d224ef211f 100644 --- a/pmd-objectivec/src/main/java/net/sourceforge/pmd/cpd/ObjectiveCTokenizer.java +++ b/pmd-objectivec/src/main/java/net/sourceforge/pmd/cpd/ObjectiveCTokenizer.java @@ -10,7 +10,6 @@ import java.io.StringReader; import net.sourceforge.pmd.cpd.token.JavaCCTokenFilter; import net.sourceforge.pmd.cpd.token.TokenFilter; -import net.sourceforge.pmd.lang.ast.TokenMgrError; import net.sourceforge.pmd.lang.objectivec.ObjectiveCTokenManager; import net.sourceforge.pmd.lang.objectivec.ast.Token; @@ -23,20 +22,18 @@ public class ObjectiveCTokenizer implements Tokenizer { public void tokenize(SourceCode sourceCode, Tokens tokenEntries) { StringBuilder buffer = sourceCode.getCodeBuffer(); try (Reader reader = new StringReader(buffer.toString())) { - final TokenFilter tokenFilter = new JavaCCTokenFilter(new ObjectiveCTokenManager(reader)); + ObjectiveCTokenManager tokenManager = new ObjectiveCTokenManager(reader); + tokenManager.setFileName(sourceCode.getFileName()); + final TokenFilter tokenFilter = new JavaCCTokenFilter(tokenManager); Token currentToken = (Token) tokenFilter.getNextToken(); while (currentToken != null) { tokenEntries.add(new TokenEntry(currentToken.image, sourceCode.getFileName(), currentToken.beginLine)); currentToken = (Token) tokenFilter.getNextToken(); } - tokenEntries.add(TokenEntry.getEOF()); - System.err.println("Added " + sourceCode.getFileName()); - } catch (TokenMgrError err) { - err.printStackTrace(); - System.err.println("Skipping " + sourceCode.getFileName() + " due to parse error"); - tokenEntries.add(TokenEntry.getEOF()); } catch (IOException e) { e.printStackTrace(); + } finally { + tokenEntries.add(TokenEntry.getEOF()); } } } diff --git a/pmd-plsql/src/main/java/net/sourceforge/pmd/cpd/PLSQLTokenizer.java b/pmd-plsql/src/main/java/net/sourceforge/pmd/cpd/PLSQLTokenizer.java index 74cfb32a41..ab090acac1 100644 --- a/pmd-plsql/src/main/java/net/sourceforge/pmd/cpd/PLSQLTokenizer.java +++ b/pmd-plsql/src/main/java/net/sourceforge/pmd/cpd/PLSQLTokenizer.java @@ -11,7 +11,10 @@ import java.util.logging.Logger; import net.sourceforge.pmd.cpd.token.JavaCCTokenFilter; import net.sourceforge.pmd.cpd.token.TokenFilter; -import net.sourceforge.pmd.lang.plsql.PLSQLTokenManager; +import net.sourceforge.pmd.lang.LanguageRegistry; +import net.sourceforge.pmd.lang.LanguageVersionHandler; +import net.sourceforge.pmd.lang.TokenManager; +import net.sourceforge.pmd.lang.plsql.PLSQLLanguageModule; import net.sourceforge.pmd.lang.plsql.ast.PLSQLParserConstants; import net.sourceforge.pmd.lang.plsql.ast.Token; @@ -72,9 +75,8 @@ public class PLSQLTokenizer implements Tokenizer { } String fileName = sourceCode.getFileName(); - StringBuilder sb = sourceCode.getCodeBuffer(); - TokenFilter tokenFilter = new JavaCCTokenFilter(new PLSQLTokenManager(new StringReader(sb.toString()))); + TokenFilter tokenFilter = createTokenFilter(sourceCode); Token currentToken = (Token) tokenFilter.getNextToken(); while (currentToken != null) { String image = currentToken.image; @@ -105,4 +107,13 @@ public class PLSQLTokenizer implements Tokenizer { } } + private JavaCCTokenFilter createTokenFilter(final SourceCode sourceCode) { + final StringBuilder stringBuilder = sourceCode.getCodeBuffer(); + final LanguageVersionHandler languageVersionHandler = LanguageRegistry.getLanguage(PLSQLLanguageModule.NAME) + .getDefaultVersion().getLanguageVersionHandler(); + final TokenManager tokenMgr = languageVersionHandler.getParser(languageVersionHandler.getDefaultParserOptions()) + .getTokenManager(sourceCode.getFileName(), new StringReader(stringBuilder.toString())); + return new JavaCCTokenFilter(tokenMgr); + } + } diff --git a/pmd-python/src/main/java/net/sourceforge/pmd/cpd/PythonTokenizer.java b/pmd-python/src/main/java/net/sourceforge/pmd/cpd/PythonTokenizer.java index 9996d61985..6b518c2061 100644 --- a/pmd-python/src/main/java/net/sourceforge/pmd/cpd/PythonTokenizer.java +++ b/pmd-python/src/main/java/net/sourceforge/pmd/cpd/PythonTokenizer.java @@ -10,10 +10,7 @@ import java.io.StringReader; import net.sourceforge.pmd.cpd.token.JavaCCTokenFilter; import net.sourceforge.pmd.cpd.token.TokenFilter; -import net.sourceforge.pmd.lang.LanguageRegistry; -import net.sourceforge.pmd.lang.LanguageVersionHandler; -import net.sourceforge.pmd.lang.ast.TokenMgrError; -import net.sourceforge.pmd.lang.python.PythonLanguageModule; +import net.sourceforge.pmd.lang.python.PythonTokenManager; import net.sourceforge.pmd.lang.python.ast.Token; import net.sourceforge.pmd.util.IOUtil; @@ -26,21 +23,18 @@ public class PythonTokenizer implements Tokenizer { public void tokenize(SourceCode sourceCode, Tokens tokenEntries) { StringBuilder buffer = sourceCode.getCodeBuffer(); try (Reader reader = IOUtil.skipBOM(new StringReader(buffer.toString()))) { - LanguageVersionHandler languageVersionHandler = LanguageRegistry.getLanguage(PythonLanguageModule.NAME) - .getDefaultVersion().getLanguageVersionHandler(); - TokenFilter tokenFilter = new JavaCCTokenFilter(languageVersionHandler - .getParser(languageVersionHandler.getDefaultParserOptions()) - .getTokenManager(sourceCode.getFileName(), reader)); + PythonTokenManager tokenManager = new PythonTokenManager(reader); + tokenManager.setFileName(sourceCode.getFileName()); + + TokenFilter tokenFilter = new JavaCCTokenFilter(tokenManager); Token currentToken = (Token) tokenFilter.getNextToken(); while (currentToken != null) { tokenEntries.add(new TokenEntry(currentToken.image, sourceCode.getFileName(), currentToken.beginLine)); currentToken = (Token) tokenFilter.getNextToken(); } - tokenEntries.add(TokenEntry.getEOF()); - System.err.println("Added " + sourceCode); - } catch (TokenMgrError | IOException err) { + } catch (IOException err) { err.printStackTrace(); - System.err.println("Skipping " + sourceCode + " due to parse error"); + } finally { tokenEntries.add(TokenEntry.getEOF()); } }