From d4c05d1fb5073f5375ce81a2b53926dea95c56e5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Fournier?= Date: Fri, 10 Feb 2023 16:24:11 +0100 Subject: [PATCH] Make pmd language have a hook to launch CPD This allows removing the CPD language instances, sharing more logic between CPD and PMD, and using language properties to configure CPD and PMD uniformly. --- .../sourceforge/pmd/cpd/ApexTokenizer.java | 3 +- .../pmd/cpd/AbstractTokenizer.java | 4 +- .../net/sourceforge/pmd/cpd/AnyTokenizer.java | 3 +- .../net/sourceforge/pmd/cpd/Tokenizer.java | 38 ++++++++++++- .../pmd/cpd/internal/AntlrTokenizer.java | 46 +++------------ .../pmd/cpd/internal/JavaCCTokenizer.java | 55 +----------------- .../pmd/cpd/internal/TokenizerBase.java | 42 ++++++++++++++ .../pmd/cpd/token/TokenFilter.java | 4 +- .../cpd/token/internal/BaseTokenFilter.java | 2 +- .../pmd/lang/CpdOnlyLanguageModuleBase.java | 28 +++++++++ .../net/sourceforge/pmd/lang/Language.java | 34 ++++++++++- .../pmd/lang/LanguageRegistry.java | 1 + .../pmd/lang/ast/impl/javacc/CharStream.java | 9 +++ .../lang/impl/SimpleLanguageModuleBase.java | 7 ++- .../net/sourceforge/pmd/cpd/CPPLanguage.java | 32 ----------- .../net/sourceforge/pmd/cpd/CPPTokenizer.java | 57 +++++++------------ .../pmd/lang/cpp/CppLanguageModule.java | 56 ++++++++++++++++++ .../services/net.sourceforge.pmd.cpd.Language | 1 - .../net.sourceforge.pmd.lang.Language | 1 + .../pmd/cpd/CppCharStreamTest.java | 12 ++-- .../net/sourceforge/pmd/cpd/CsLanguage.java | 28 --------- .../net/sourceforge/pmd/cpd/CsTokenizer.java | 38 +++++-------- .../pmd/lang/cs/CsLanguageModule.java | 42 ++++++++++++++ .../services/net.sourceforge.pmd.cpd.Language | 1 - .../net.sourceforge.pmd.lang.Language | 1 + .../sourceforge/pmd/cpd/DartTokenizer.java | 9 +-- .../sourceforge/pmd/cpd/GroovyTokenizer.java | 3 +- .../pmd/lang/html/ast/HtmlTokenizer.java | 3 +- .../sourceforge/pmd/cpd/JavaTokenizer.java | 16 ++---- .../pmd/cpd/EcmascriptTokenizer.java | 5 +- .../net/sourceforge/pmd/cpd/JSPTokenizer.java | 11 +--- .../sourceforge/pmd/cpd/MatlabTokenizer.java | 6 +- .../pmd/cpd/ModelicaTokenizer.java | 8 +-- .../pmd/cpd/ObjectiveCTokenizer.java | 6 +- .../net/sourceforge/pmd/cpd/PHPTokenizer.java | 4 +- .../sourceforge/pmd/cpd/PLSQLTokenizer.java | 6 +- .../sourceforge/pmd/cpd/PythonTokenizer.java | 11 +--- .../sourceforge/pmd/cpd/ScalaTokenizer.java | 2 +- .../net/sourceforge/pmd/cpd/VfTokenizer.java | 5 +- 39 files changed, 357 insertions(+), 283 deletions(-) create mode 100644 pmd-core/src/main/java/net/sourceforge/pmd/cpd/internal/TokenizerBase.java create mode 100644 pmd-core/src/main/java/net/sourceforge/pmd/lang/CpdOnlyLanguageModuleBase.java delete mode 100644 pmd-cpp/src/main/java/net/sourceforge/pmd/cpd/CPPLanguage.java create mode 100644 pmd-cpp/src/main/java/net/sourceforge/pmd/lang/cpp/CppLanguageModule.java delete mode 100644 pmd-cpp/src/main/resources/META-INF/services/net.sourceforge.pmd.cpd.Language create mode 100644 pmd-cpp/src/main/resources/META-INF/services/net.sourceforge.pmd.lang.Language delete mode 100644 pmd-cs/src/main/java/net/sourceforge/pmd/cpd/CsLanguage.java create mode 100644 pmd-cs/src/main/java/net/sourceforge/pmd/lang/cs/CsLanguageModule.java delete mode 100644 pmd-cs/src/main/resources/META-INF/services/net.sourceforge.pmd.cpd.Language create mode 100644 pmd-cs/src/main/resources/META-INF/services/net.sourceforge.pmd.lang.Language diff --git a/pmd-apex/src/main/java/net/sourceforge/pmd/cpd/ApexTokenizer.java b/pmd-apex/src/main/java/net/sourceforge/pmd/cpd/ApexTokenizer.java index afe8db0b3f..d2a01abb01 100644 --- a/pmd-apex/src/main/java/net/sourceforge/pmd/cpd/ApexTokenizer.java +++ b/pmd-apex/src/main/java/net/sourceforge/pmd/cpd/ApexTokenizer.java @@ -13,6 +13,7 @@ import org.antlr.runtime.Token; import net.sourceforge.pmd.lang.apex.ApexJorjeLogging; import net.sourceforge.pmd.lang.ast.TokenMgrError; +import net.sourceforge.pmd.lang.document.TextDocument; import apex.jorje.parser.impl.ApexLexer; @@ -35,7 +36,7 @@ public class ApexTokenizer implements Tokenizer { } @Override - public void tokenize(SourceCode sourceCode, Tokens tokenEntries) { + public void tokenize(TextDocument sourceCode, Tokens tokenEntries) { StringBuilder code = sourceCode.getCodeBuffer(); ANTLRStringStream ass = new ANTLRStringStream(code.toString()); diff --git a/pmd-core/src/main/java/net/sourceforge/pmd/cpd/AbstractTokenizer.java b/pmd-core/src/main/java/net/sourceforge/pmd/cpd/AbstractTokenizer.java index aa21a4db28..5db9827346 100644 --- a/pmd-core/src/main/java/net/sourceforge/pmd/cpd/AbstractTokenizer.java +++ b/pmd-core/src/main/java/net/sourceforge/pmd/cpd/AbstractTokenizer.java @@ -7,6 +7,8 @@ package net.sourceforge.pmd.cpd; import java.util.List; import java.util.Locale; +import net.sourceforge.pmd.lang.document.TextDocument; + /** * * @author Zev Blut zb@ubit.com @@ -48,7 +50,7 @@ public abstract class AbstractTokenizer implements Tokenizer { private boolean downcaseString = true; @Override - public void tokenize(SourceCode tokens, Tokens tokenEntries) { + public void tokenize(TextDocument tokens, Tokens tokenEntries) { code = tokens.getCode(); for (lineNumber = 0; lineNumber < code.size(); lineNumber++) { diff --git a/pmd-core/src/main/java/net/sourceforge/pmd/cpd/AnyTokenizer.java b/pmd-core/src/main/java/net/sourceforge/pmd/cpd/AnyTokenizer.java index e53f29e533..6e02dda6b8 100644 --- a/pmd-core/src/main/java/net/sourceforge/pmd/cpd/AnyTokenizer.java +++ b/pmd-core/src/main/java/net/sourceforge/pmd/cpd/AnyTokenizer.java @@ -9,6 +9,7 @@ import java.util.regex.Pattern; import org.apache.commons.lang3.StringUtils; +import net.sourceforge.pmd.lang.document.TextDocument; import net.sourceforge.pmd.util.StringUtil; /** @@ -60,7 +61,7 @@ public class AnyTokenizer implements Tokenizer { } @Override - public void tokenize(SourceCode sourceCode, Tokens tokenEntries) { + public void tokenize(TextDocument sourceCode, Tokens tokenEntries) { CharSequence text = sourceCode.getCodeBuffer(); Matcher matcher = pattern.matcher(text); int lineNo = 1; diff --git a/pmd-core/src/main/java/net/sourceforge/pmd/cpd/Tokenizer.java b/pmd-core/src/main/java/net/sourceforge/pmd/cpd/Tokenizer.java index e6876fb960..2e0d77f770 100644 --- a/pmd-core/src/main/java/net/sourceforge/pmd/cpd/Tokenizer.java +++ b/pmd-core/src/main/java/net/sourceforge/pmd/cpd/Tokenizer.java @@ -6,7 +6,43 @@ package net.sourceforge.pmd.cpd; import java.io.IOException; +import net.sourceforge.pmd.lang.document.TextDocument; +import net.sourceforge.pmd.properties.PropertyDescriptor; +import net.sourceforge.pmd.properties.PropertyFactory; + public interface Tokenizer { + + PropertyDescriptor CPD_IGNORE_LITERAL_SEQUENCES = + PropertyFactory.booleanProperty("cpdIgnoreLiteralSequences") + .defaultValue(false) + .desc("Ignore sequences of literals, eg `0, 0, 0, 0`") + .build(); + + PropertyDescriptor CPD_ANONYMiZE_LITERALS = + PropertyFactory.booleanProperty("cpdAnonymizeLiterals") + .defaultValue(false) + .desc("Anonymize literals. They are still part of the token stream but all literals appear to have the same value.") + .build(); + PropertyDescriptor CPD_ANONYMIZE_IDENTIFIERS = + PropertyFactory.booleanProperty("cpdAnonymizeIdentifiers") + .defaultValue(false) + .desc("Anonymize identifiers. They are still part of the token stream but all literals appear to have the same value.") + .build(); + + + PropertyDescriptor CPD_IGNORE_IMPORTS = + PropertyFactory.booleanProperty("cpdIgnoreImports") + .defaultValue(true) + .desc("Ignore import statements and equivalent (eg using statements in C#).") + .build(); + + PropertyDescriptor CPD_IGNORE_METADATA = + PropertyFactory.booleanProperty("cpdIgnoreMetadata") + .defaultValue(false) + .desc("Ignore metadata such as Java annotations or C# attributes.") + .build(); + + String IGNORE_LITERALS = "ignore_literals"; String IGNORE_IDENTIFIERS = "ignore_identifiers"; String IGNORE_ANNOTATIONS = "ignore_annotations"; @@ -39,5 +75,5 @@ public interface Tokenizer { String DEFAULT_SKIP_BLOCKS_PATTERN = "#if 0|#endif"; - void tokenize(SourceCode sourceCode, Tokens tokenEntries) throws IOException; + void tokenize(TextDocument sourceCode, Tokens tokenEntries) throws IOException; } diff --git a/pmd-core/src/main/java/net/sourceforge/pmd/cpd/internal/AntlrTokenizer.java b/pmd-core/src/main/java/net/sourceforge/pmd/cpd/internal/AntlrTokenizer.java index b09703881a..d5a3472281 100644 --- a/pmd-core/src/main/java/net/sourceforge/pmd/cpd/internal/AntlrTokenizer.java +++ b/pmd-core/src/main/java/net/sourceforge/pmd/cpd/internal/AntlrTokenizer.java @@ -4,58 +4,26 @@ package net.sourceforge.pmd.cpd.internal; -import java.io.IOException; -import java.io.UncheckedIOException; - import org.antlr.v4.runtime.CharStream; import org.antlr.v4.runtime.CharStreams; import org.antlr.v4.runtime.Lexer; -import net.sourceforge.pmd.cpd.SourceCode; -import net.sourceforge.pmd.cpd.TokenEntry; import net.sourceforge.pmd.cpd.Tokenizer; -import net.sourceforge.pmd.cpd.Tokens; -import net.sourceforge.pmd.cpd.token.AntlrTokenFilter; +import net.sourceforge.pmd.lang.TokenManager; import net.sourceforge.pmd.lang.ast.impl.antlr4.AntlrToken; import net.sourceforge.pmd.lang.ast.impl.antlr4.AntlrTokenManager; -import net.sourceforge.pmd.lang.document.CpdCompat; import net.sourceforge.pmd.lang.document.TextDocument; /** * Generic implementation of a {@link Tokenizer} useful to any Antlr grammar. */ -public abstract class AntlrTokenizer implements Tokenizer { +public abstract class AntlrTokenizer extends TokenizerBase { + @Override + protected final TokenManager makeLexerImpl(TextDocument doc) { + CharStream charStream = CharStreams.fromString(doc.getText().toString(), doc.getDisplayName()); + return new AntlrTokenManager(getLexerForSource(charStream), doc); + } protected abstract Lexer getLexerForSource(CharStream charStream); - @Override - public void tokenize(final SourceCode sourceCode, final Tokens tokenEntries) { - try (TextDocument textDoc = TextDocument.create(CpdCompat.cpdCompat(sourceCode))) { - - CharStream charStream = CharStreams.fromString(textDoc.getText().toString(), textDoc.getDisplayName()); - - final AntlrTokenManager tokenManager = new AntlrTokenManager(getLexerForSource(charStream), textDoc); - final AntlrTokenFilter tokenFilter = getTokenFilter(tokenManager); - - AntlrToken currentToken = tokenFilter.getNextToken(); - while (currentToken != null) { - processToken(tokenEntries, currentToken); - currentToken = tokenFilter.getNextToken(); - } - - } catch (IOException e) { - throw new UncheckedIOException(e); - } finally { - tokenEntries.add(TokenEntry.getEOF()); - } - } - - protected AntlrTokenFilter getTokenFilter(final AntlrTokenManager tokenManager) { - return new AntlrTokenFilter(tokenManager); - } - - private void processToken(final Tokens tokenEntries, final AntlrToken token) { - final TokenEntry tokenEntry = new TokenEntry(token.getImage(), token.getReportLocation()); - tokenEntries.add(tokenEntry); - } } diff --git a/pmd-core/src/main/java/net/sourceforge/pmd/cpd/internal/JavaCCTokenizer.java b/pmd-core/src/main/java/net/sourceforge/pmd/cpd/internal/JavaCCTokenizer.java index 3c45b96033..3a629d5af4 100644 --- a/pmd-core/src/main/java/net/sourceforge/pmd/cpd/internal/JavaCCTokenizer.java +++ b/pmd-core/src/main/java/net/sourceforge/pmd/cpd/internal/JavaCCTokenizer.java @@ -4,61 +4,8 @@ package net.sourceforge.pmd.cpd.internal; -import java.io.IOException; - -import net.sourceforge.pmd.cpd.SourceCode; -import net.sourceforge.pmd.cpd.TokenEntry; -import net.sourceforge.pmd.cpd.Tokenizer; -import net.sourceforge.pmd.cpd.Tokens; -import net.sourceforge.pmd.cpd.token.JavaCCTokenFilter; -import net.sourceforge.pmd.cpd.token.TokenFilter; -import net.sourceforge.pmd.lang.TokenManager; -import net.sourceforge.pmd.lang.ast.FileAnalysisException; -import net.sourceforge.pmd.lang.ast.impl.javacc.CharStream; import net.sourceforge.pmd.lang.ast.impl.javacc.JavaccToken; -import net.sourceforge.pmd.lang.ast.impl.javacc.JavaccTokenDocument.TokenDocumentBehavior; -import net.sourceforge.pmd.lang.document.CpdCompat; -import net.sourceforge.pmd.lang.document.TextDocument; -public abstract class JavaCCTokenizer implements Tokenizer { +public abstract class JavaCCTokenizer extends TokenizerBase { - @SuppressWarnings("PMD.CloseResource") - protected TokenManager getLexerForSource(TextDocument sourceCode) throws IOException { - return makeLexerImpl(CharStream.create(sourceCode, tokenBehavior())); - } - - protected TokenDocumentBehavior tokenBehavior() { - return TokenDocumentBehavior.DEFAULT; - } - - protected abstract TokenManager makeLexerImpl(CharStream sourceCode); - - protected TokenFilter getTokenFilter(TokenManager tokenManager) { - return new JavaCCTokenFilter(tokenManager); - } - - protected TokenEntry processToken(Tokens tokenEntries, JavaccToken currentToken) { - return new TokenEntry(getImage(currentToken), currentToken.getReportLocation()); - } - - protected String getImage(JavaccToken token) { - return token.getImage(); - } - - @Override - public void tokenize(SourceCode sourceCode, Tokens tokenEntries) throws IOException { - try (TextDocument textDoc = TextDocument.create(CpdCompat.cpdCompat(sourceCode))) { - TokenManager tokenManager = getLexerForSource(textDoc); - final TokenFilter tokenFilter = getTokenFilter(tokenManager); - JavaccToken currentToken = tokenFilter.getNextToken(); - while (currentToken != null) { - tokenEntries.add(processToken(tokenEntries, currentToken)); - currentToken = tokenFilter.getNextToken(); - } - } catch (FileAnalysisException e) { - throw e.setFileName(sourceCode.getFileName()); - } finally { - tokenEntries.add(TokenEntry.getEOF()); - } - } } diff --git a/pmd-core/src/main/java/net/sourceforge/pmd/cpd/internal/TokenizerBase.java b/pmd-core/src/main/java/net/sourceforge/pmd/cpd/internal/TokenizerBase.java new file mode 100644 index 0000000000..07d6e9894f --- /dev/null +++ b/pmd-core/src/main/java/net/sourceforge/pmd/cpd/internal/TokenizerBase.java @@ -0,0 +1,42 @@ +/** + * BSD-style license; for more info see http://pmd.sourceforge.net/license.html + */ + +package net.sourceforge.pmd.cpd.internal; + +import java.io.IOException; + +import net.sourceforge.pmd.cpd.TokenEntry; +import net.sourceforge.pmd.cpd.Tokenizer; +import net.sourceforge.pmd.cpd.Tokens; +import net.sourceforge.pmd.cpd.token.internal.BaseTokenFilter; +import net.sourceforge.pmd.lang.TokenManager; +import net.sourceforge.pmd.lang.ast.GenericToken; +import net.sourceforge.pmd.lang.document.TextDocument; + +public abstract class TokenizerBase> implements Tokenizer { + + protected abstract TokenManager makeLexerImpl(TextDocument doc); + + protected TokenManager filterTokenStream(TokenManager tokenManager) { + return new BaseTokenFilter<>(tokenManager); + } + + protected TokenEntry processToken(Tokens tokenEntries, T currentToken) { + return new TokenEntry(getImage(currentToken), currentToken.getReportLocation()); + } + + protected String getImage(T token) { + return token.getImage(); + } + + @Override + public void tokenize(TextDocument document, Tokens tokenEntries) throws IOException { + TokenManager tokenManager = filterTokenStream(makeLexerImpl(document)); + T currentToken = tokenManager.getNextToken(); + while (currentToken != null) { + tokenEntries.add(processToken(tokenEntries, currentToken)); + currentToken = tokenManager.getNextToken(); + } + } +} diff --git a/pmd-core/src/main/java/net/sourceforge/pmd/cpd/token/TokenFilter.java b/pmd-core/src/main/java/net/sourceforge/pmd/cpd/token/TokenFilter.java index 3671f109db..469b33d89f 100644 --- a/pmd-core/src/main/java/net/sourceforge/pmd/cpd/token/TokenFilter.java +++ b/pmd-core/src/main/java/net/sourceforge/pmd/cpd/token/TokenFilter.java @@ -4,16 +4,18 @@ package net.sourceforge.pmd.cpd.token; +import net.sourceforge.pmd.lang.TokenManager; import net.sourceforge.pmd.lang.ast.GenericToken; /** * Defines filter to be applied to the token stream during CPD analysis */ -public interface TokenFilter> { +public interface TokenFilter> extends TokenManager { /** * Retrieves the next token to pass the filter * @return The next token to pass the filter, or null if the end of the stream was reached */ + @Override T getNextToken(); } diff --git a/pmd-core/src/main/java/net/sourceforge/pmd/cpd/token/internal/BaseTokenFilter.java b/pmd-core/src/main/java/net/sourceforge/pmd/cpd/token/internal/BaseTokenFilter.java index 6d980ea41e..d4d6e7c90b 100644 --- a/pmd-core/src/main/java/net/sourceforge/pmd/cpd/token/internal/BaseTokenFilter.java +++ b/pmd-core/src/main/java/net/sourceforge/pmd/cpd/token/internal/BaseTokenFilter.java @@ -18,7 +18,7 @@ import net.sourceforge.pmd.lang.ast.GenericToken; * A generic filter for PMD token managers that allows to use comments * to enable / disable analysis of parts of the stream */ -public abstract class BaseTokenFilter> implements TokenFilter { +public class BaseTokenFilter> implements TokenFilter { private final TokenManager tokenManager; private final LinkedList unprocessedTokens; // NOPMD - used both as Queue and List diff --git a/pmd-core/src/main/java/net/sourceforge/pmd/lang/CpdOnlyLanguageModuleBase.java b/pmd-core/src/main/java/net/sourceforge/pmd/lang/CpdOnlyLanguageModuleBase.java new file mode 100644 index 0000000000..ade537f08e --- /dev/null +++ b/pmd-core/src/main/java/net/sourceforge/pmd/lang/CpdOnlyLanguageModuleBase.java @@ -0,0 +1,28 @@ +/* + * BSD-style license; for more info see http://pmd.sourceforge.net/license.html + */ + +package net.sourceforge.pmd.lang; + +/** + * Base class for language modules that only support CPD and not PMD. + * + * @author Clément Fournier + */ +public abstract class CpdOnlyLanguageModuleBase extends LanguageModuleBase { + + /** + * Construct a module instance using the given metadata. The metadata must + * be properly constructed. + * + * @throws IllegalStateException If the metadata is invalid (eg missing extensions or name) + */ + protected CpdOnlyLanguageModuleBase(LanguageMetadata metadata) { + super(metadata); + } + + @Override + public boolean supportsParsing() { + return false; + } +} diff --git a/pmd-core/src/main/java/net/sourceforge/pmd/lang/Language.java b/pmd-core/src/main/java/net/sourceforge/pmd/lang/Language.java index 4d10b41e4d..045dd2c9a6 100644 --- a/pmd-core/src/main/java/net/sourceforge/pmd/lang/Language.java +++ b/pmd-core/src/main/java/net/sourceforge/pmd/lang/Language.java @@ -8,6 +8,8 @@ import java.util.List; import java.util.ServiceLoader; import java.util.Set; +import net.sourceforge.pmd.cpd.Tokenizer; + /** * Represents a language module, and provides access to language-specific * functionality. You can get a language instance from a {@link LanguageRegistry}. @@ -156,6 +158,14 @@ public interface Language extends Comparable { return new LanguagePropertyBundle(this); } + /** + * Return true if this language supports parsing files into an AST. + * In that case {@link #createProcessor(LanguagePropertyBundle)} should + * also be implemented. + */ + default boolean supportsParsing() { + return false; + } /** * Create a new {@link LanguageProcessor} for this language, given @@ -167,8 +177,30 @@ public interface Language extends Comparable { * @param bundle A bundle of properties created by this instance. * * @return A new language processor + * + * @throws UnsupportedOperationException if this language does not support PMD */ - LanguageProcessor createProcessor(LanguagePropertyBundle bundle); + default LanguageProcessor createProcessor(LanguagePropertyBundle bundle) { + throw new UnsupportedOperationException(this + " does not support running a PMD analysis."); + } + + + /** + * Create a new {@link Tokenizer} for this language, given + * a property bundle with configuration. The bundle was created by + * this instance using {@link #newPropertyBundle()}. It can be assumed + * that the bundle will never be mutated anymore, and this method + * takes ownership of it. + * + * @param bundle A bundle of properties created by this instance. + * + * @return A new language processor + * + * @throws UnsupportedOperationException if this language does not support CPD + */ + default Tokenizer createCpdTokenizer(LanguagePropertyBundle bundle) { + throw new UnsupportedOperationException(this + " does not support running a CPD analysis."); + } /** diff --git a/pmd-core/src/main/java/net/sourceforge/pmd/lang/LanguageRegistry.java b/pmd-core/src/main/java/net/sourceforge/pmd/lang/LanguageRegistry.java index 3adf16c8fb..5cfd9f9baf 100644 --- a/pmd-core/src/main/java/net/sourceforge/pmd/lang/LanguageRegistry.java +++ b/pmd-core/src/main/java/net/sourceforge/pmd/lang/LanguageRegistry.java @@ -41,6 +41,7 @@ public final class LanguageRegistry implements Iterable { * of the classloader of this class. This can be used as a "default" registry. */ public static final LanguageRegistry PMD = loadLanguages(LanguageRegistry.class.getClassLoader()); + public static final LanguageRegistry CPD = loadLanguages(LanguageRegistry.class.getClassLoader()); // todo private final Set languages; diff --git a/pmd-core/src/main/java/net/sourceforge/pmd/lang/ast/impl/javacc/CharStream.java b/pmd-core/src/main/java/net/sourceforge/pmd/lang/ast/impl/javacc/CharStream.java index d598bff263..9635cf784b 100644 --- a/pmd-core/src/main/java/net/sourceforge/pmd/lang/ast/impl/javacc/CharStream.java +++ b/pmd-core/src/main/java/net/sourceforge/pmd/lang/ast/impl/javacc/CharStream.java @@ -42,6 +42,15 @@ public final class CharStream { return new CharStream(new JavaccTokenDocument(translated, behavior)); } + /** + * Create a new char stream for the given document with the default token + * document behavior. This may create a new {@link TextDocument} view + * over the original, which reflects its character escapes. + */ + public static CharStream create(TextDocument doc) throws MalformedSourceException { + return create(doc, TokenDocumentBehavior.DEFAULT); + } + /** * Returns the next character from the input. After a {@link #backup(int)}, * some of the already read chars must be spit out again. diff --git a/pmd-core/src/main/java/net/sourceforge/pmd/lang/impl/SimpleLanguageModuleBase.java b/pmd-core/src/main/java/net/sourceforge/pmd/lang/impl/SimpleLanguageModuleBase.java index 6f907780b3..23cf5ee7b1 100644 --- a/pmd-core/src/main/java/net/sourceforge/pmd/lang/impl/SimpleLanguageModuleBase.java +++ b/pmd-core/src/main/java/net/sourceforge/pmd/lang/impl/SimpleLanguageModuleBase.java @@ -20,7 +20,7 @@ import net.sourceforge.pmd.lang.LanguageVersionHandler; * @author Clément Fournier * @since 7.0.0 */ -public class SimpleLanguageModuleBase extends LanguageModuleBase { +public abstract class SimpleLanguageModuleBase extends LanguageModuleBase { private final Function handler; @@ -33,6 +33,11 @@ public class SimpleLanguageModuleBase extends LanguageModuleBase { this.handler = makeHandler; } + @Override + public boolean supportsParsing() { + return true; + } + @Override public LanguageProcessor createProcessor(LanguagePropertyBundle bundle) { LanguageVersionHandler services = handler.apply(bundle); diff --git a/pmd-cpp/src/main/java/net/sourceforge/pmd/cpd/CPPLanguage.java b/pmd-cpp/src/main/java/net/sourceforge/pmd/cpd/CPPLanguage.java deleted file mode 100644 index a3dfce0c96..0000000000 --- a/pmd-cpp/src/main/java/net/sourceforge/pmd/cpd/CPPLanguage.java +++ /dev/null @@ -1,32 +0,0 @@ -/* - * BSD-style license; for more info see http://pmd.sourceforge.net/license.html - */ - -package net.sourceforge.pmd.cpd; - -import java.util.Properties; - -/** - * Defines the Language module for C/C++ - */ -public class CPPLanguage extends AbstractLanguage { - - /** - * Creates a new instance of {@link CPPLanguage} with the default extensions - * for c/c++ files. - */ - public CPPLanguage() { - this(System.getProperties()); - } - - public CPPLanguage(Properties properties) { - super("C++", "cpp", new CPPTokenizer(), ".h", ".hpp", ".hxx", ".c", ".cpp", ".cxx", ".cc", ".C"); - setProperties(properties); - } - - @Override - public void setProperties(Properties properties) { - super.setProperties(properties); - ((CPPTokenizer) getTokenizer()).setProperties(properties); - } -} diff --git a/pmd-cpp/src/main/java/net/sourceforge/pmd/cpd/CPPTokenizer.java b/pmd-cpp/src/main/java/net/sourceforge/pmd/cpd/CPPTokenizer.java index 0728e9afbf..bb9f5c2811 100644 --- a/pmd-cpp/src/main/java/net/sourceforge/pmd/cpd/CPPTokenizer.java +++ b/pmd-cpp/src/main/java/net/sourceforge/pmd/cpd/CPPTokenizer.java @@ -4,46 +4,37 @@ package net.sourceforge.pmd.cpd; -import java.util.Properties; import java.util.regex.Pattern; -import net.sourceforge.pmd.cpd.internal.JavaCCTokenizer; +import org.apache.commons.lang3.StringUtils; + +import net.sourceforge.pmd.cpd.internal.TokenizerBase; import net.sourceforge.pmd.cpd.token.JavaCCTokenFilter; -import net.sourceforge.pmd.cpd.token.TokenFilter; +import net.sourceforge.pmd.lang.LanguagePropertyBundle; import net.sourceforge.pmd.lang.TokenManager; import net.sourceforge.pmd.lang.ast.impl.javacc.CharStream; import net.sourceforge.pmd.lang.ast.impl.javacc.JavaccToken; import net.sourceforge.pmd.lang.ast.impl.javacc.JavaccTokenDocument.TokenDocumentBehavior; import net.sourceforge.pmd.lang.ast.impl.javacc.MalformedSourceException; +import net.sourceforge.pmd.lang.cpp.CppLanguageModule; import net.sourceforge.pmd.lang.cpp.ast.CppTokenKinds; import net.sourceforge.pmd.lang.document.TextDocument; /** * The C++ tokenizer. */ -public class CPPTokenizer extends JavaCCTokenizer { +public class CPPTokenizer extends TokenizerBase { private boolean skipBlocks; private Pattern skipBlocksStart; private Pattern skipBlocksEnd; - private boolean ignoreLiteralSequences = false; + private final boolean ignoreLiteralSequences; - public CPPTokenizer() { - setProperties(new Properties()); // set the defaults - } - - /** - * Sets the possible options for the C++ tokenizer. - * - * @param properties the properties - * @see #OPTION_SKIP_BLOCKS - * @see #OPTION_SKIP_BLOCKS_PATTERN - * @see #OPTION_IGNORE_LITERAL_SEQUENCES - */ - public void setProperties(Properties properties) { - skipBlocks = Boolean.parseBoolean(properties.getProperty(OPTION_SKIP_BLOCKS, Boolean.TRUE.toString())); - if (skipBlocks) { - String skipBlocksPattern = properties.getProperty(OPTION_SKIP_BLOCKS_PATTERN, DEFAULT_SKIP_BLOCKS_PATTERN); + public CPPTokenizer(LanguagePropertyBundle cppProperties) { + ignoreLiteralSequences = cppProperties.getProperty(Tokenizer.CPD_IGNORE_LITERAL_SEQUENCES); + String skipBlocksPattern = cppProperties.getProperty(CppLanguageModule.CPD_SKIP_BLOCKS); + if (StringUtils.isNotBlank(skipBlocksPattern)) { + skipBlocks = true; String[] split = skipBlocksPattern.split("\\|", 2); skipBlocksStart = CppBlockSkipper.compileSkipMarker(split[0]); if (split.length == 1) { @@ -52,14 +43,15 @@ public class CPPTokenizer extends JavaCCTokenizer { skipBlocksEnd = CppBlockSkipper.compileSkipMarker(split[1]); } } - ignoreLiteralSequences = Boolean.parseBoolean(properties.getProperty(OPTION_IGNORE_LITERAL_SEQUENCES, - Boolean.FALSE.toString())); } - @Override - protected TokenDocumentBehavior tokenBehavior() { - return new TokenDocumentBehavior(CppTokenKinds.TOKEN_NAMES) { + protected TokenManager makeLexerImpl(TextDocument doc) { + return CppTokenKinds.newTokenManager(newCharStream(doc)); + } + + CharStream newCharStream(TextDocument doc) { + return CharStream.create(doc, new TokenDocumentBehavior(CppTokenKinds.TOKEN_NAMES) { @Override public TextDocument translate(TextDocument text) throws MalformedSourceException { @@ -68,20 +60,16 @@ public class CPPTokenizer extends JavaCCTokenizer { } return new CppEscapeTranslator(text).translateDocument(); } - }; + }); } @Override - protected TokenManager makeLexerImpl(CharStream sourceCode) { - return CppTokenKinds.newTokenManager(sourceCode); - } - - @Override - protected TokenFilter getTokenFilter(final TokenManager tokenManager) { + protected TokenManager filterTokenStream(final TokenManager tokenManager) { return new CppTokenFilter(tokenManager, ignoreLiteralSequences); } private static class CppTokenFilter extends JavaCCTokenFilter { + private final boolean ignoreLiteralSequences; private JavaccToken discardingLiteralsUntil = null; private boolean discardCurrent = false; @@ -106,8 +94,7 @@ public class CPPTokenizer extends JavaCCTokenizer { discardCurrent = true; } } else if (kind == CppTokenKinds.LCURLYBRACE) { - final JavaccToken finalToken = findEndOfSequenceOfLiterals(remainingTokens); - discardingLiteralsUntil = finalToken; + discardingLiteralsUntil = findEndOfSequenceOfLiterals(remainingTokens); } } } diff --git a/pmd-cpp/src/main/java/net/sourceforge/pmd/lang/cpp/CppLanguageModule.java b/pmd-cpp/src/main/java/net/sourceforge/pmd/lang/cpp/CppLanguageModule.java new file mode 100644 index 0000000000..eadecc970f --- /dev/null +++ b/pmd-cpp/src/main/java/net/sourceforge/pmd/lang/cpp/CppLanguageModule.java @@ -0,0 +1,56 @@ +/* + * BSD-style license; for more info see http://pmd.sourceforge.net/license.html + */ + +package net.sourceforge.pmd.lang.cpp; + +import net.sourceforge.pmd.cpd.CPPTokenizer; +import net.sourceforge.pmd.cpd.Tokenizer; +import net.sourceforge.pmd.lang.CpdOnlyLanguageModuleBase; +import net.sourceforge.pmd.lang.LanguagePropertyBundle; +import net.sourceforge.pmd.lang.LanguageRegistry; +import net.sourceforge.pmd.properties.PropertyDescriptor; +import net.sourceforge.pmd.properties.PropertyFactory; + +/** + * Defines the Language module for C/C++ + */ +public class CppLanguageModule extends CpdOnlyLanguageModuleBase { + + + public static final PropertyDescriptor CPD_SKIP_BLOCKS = + PropertyFactory.stringProperty("cpdSkipBlocksPattern") + .defaultValue("#if 0|#endif") + .desc("Specifies a start and end delimiter for CPD to completely ignore. " + + "The delimiters are separated by a pipe |. The default skips code " + + " that is conditionally compiled out. Set this property to empty to disable this.") + .build(); + + /** + * Creates a new instance of {@link CppLanguageModule} with the default extensions + * for c/c++ files. + */ + public CppLanguageModule() { + super(LanguageMetadata.withId("cpp") + .name("C++") + .addDefaultVersion("any") + .extensions("h", "hpp", "hxx", "c", "cpp", "cxx", "cc", "C")); + } + + public static CppLanguageModule getInstance() { + return (CppLanguageModule) LanguageRegistry.CPD.getLanguageById("cpp"); + } + + @Override + public LanguagePropertyBundle newPropertyBundle() { + LanguagePropertyBundle bundle = super.newPropertyBundle(); + bundle.definePropertyDescriptor(Tokenizer.CPD_IGNORE_LITERAL_SEQUENCES); + bundle.definePropertyDescriptor(CPD_SKIP_BLOCKS); + return bundle; + } + + @Override + public Tokenizer createCpdTokenizer(LanguagePropertyBundle bundle) { + return new CPPTokenizer(bundle); + } +} diff --git a/pmd-cpp/src/main/resources/META-INF/services/net.sourceforge.pmd.cpd.Language b/pmd-cpp/src/main/resources/META-INF/services/net.sourceforge.pmd.cpd.Language deleted file mode 100644 index 2170e55e7f..0000000000 --- a/pmd-cpp/src/main/resources/META-INF/services/net.sourceforge.pmd.cpd.Language +++ /dev/null @@ -1 +0,0 @@ -net.sourceforge.pmd.cpd.CPPLanguage diff --git a/pmd-cpp/src/main/resources/META-INF/services/net.sourceforge.pmd.lang.Language b/pmd-cpp/src/main/resources/META-INF/services/net.sourceforge.pmd.lang.Language new file mode 100644 index 0000000000..ecb3ec91fa --- /dev/null +++ b/pmd-cpp/src/main/resources/META-INF/services/net.sourceforge.pmd.lang.Language @@ -0,0 +1 @@ +net.sourceforge.pmd.lang.cpp.CppLanguageModule diff --git a/pmd-cpp/src/test/java/net/sourceforge/pmd/cpd/CppCharStreamTest.java b/pmd-cpp/src/test/java/net/sourceforge/pmd/cpd/CppCharStreamTest.java index 3546db9e6c..a44e7cf970 100644 --- a/pmd-cpp/src/test/java/net/sourceforge/pmd/cpd/CppCharStreamTest.java +++ b/pmd-cpp/src/test/java/net/sourceforge/pmd/cpd/CppCharStreamTest.java @@ -8,20 +8,20 @@ import static org.junit.jupiter.api.Assertions.assertEquals; import java.io.IOException; -import org.checkerframework.checker.nullness.qual.NonNull; import org.junit.jupiter.api.Test; import net.sourceforge.pmd.lang.ast.impl.javacc.CharStream; -import net.sourceforge.pmd.lang.document.CpdCompat; +import net.sourceforge.pmd.lang.cpp.CppLanguageModule; import net.sourceforge.pmd.lang.document.TextDocument; import net.sourceforge.pmd.lang.document.TextFile; class CppCharStreamTest { - @NonNull - public CharStream charStreamFor(String source) throws IOException { - TextDocument textDoc = TextDocument.readOnlyString(source, TextFile.UNKNOWN_FILENAME, CpdCompat.dummyVersion()); - return CharStream.create(textDoc, new CPPTokenizer().tokenBehavior()); + public CharStream charStreamFor(String source) { + CppLanguageModule cpp = CppLanguageModule.getInstance(); + TextDocument textDoc = TextDocument.readOnlyString(source, TextFile.UNKNOWN_FILENAME, cpp.getDefaultVersion()); + CPPTokenizer tokenizer = new CPPTokenizer(cpp.newPropertyBundle()); + return tokenizer.newCharStream(textDoc); } @Test diff --git a/pmd-cs/src/main/java/net/sourceforge/pmd/cpd/CsLanguage.java b/pmd-cs/src/main/java/net/sourceforge/pmd/cpd/CsLanguage.java deleted file mode 100644 index e54edcddbe..0000000000 --- a/pmd-cs/src/main/java/net/sourceforge/pmd/cpd/CsLanguage.java +++ /dev/null @@ -1,28 +0,0 @@ -/** - * BSD-style license; for more info see http://pmd.sourceforge.net/license.html - */ - -package net.sourceforge.pmd.cpd; - -import java.util.Properties; - -/** - * Language implementation for C# - */ -public class CsLanguage extends AbstractLanguage { - - public CsLanguage() { - this(System.getProperties()); - } - - public CsLanguage(Properties properties) { - super("C#", "cs", new CsTokenizer(), ".cs"); - setProperties(properties); - } - - @Override - public final void setProperties(Properties properties) { - CsTokenizer tokenizer = (CsTokenizer) getTokenizer(); - tokenizer.setProperties(properties); - } -} diff --git a/pmd-cs/src/main/java/net/sourceforge/pmd/cpd/CsTokenizer.java b/pmd-cs/src/main/java/net/sourceforge/pmd/cpd/CsTokenizer.java index 64822f1a38..d58ccdb0d7 100644 --- a/pmd-cs/src/main/java/net/sourceforge/pmd/cpd/CsTokenizer.java +++ b/pmd-cs/src/main/java/net/sourceforge/pmd/cpd/CsTokenizer.java @@ -4,15 +4,15 @@ package net.sourceforge.pmd.cpd; -import java.util.Properties; - import org.antlr.v4.runtime.CharStream; import org.antlr.v4.runtime.Lexer; import net.sourceforge.pmd.cpd.internal.AntlrTokenizer; import net.sourceforge.pmd.cpd.token.AntlrTokenFilter; +import net.sourceforge.pmd.cpd.token.internal.BaseTokenFilter; +import net.sourceforge.pmd.lang.LanguagePropertyBundle; +import net.sourceforge.pmd.lang.TokenManager; import net.sourceforge.pmd.lang.ast.impl.antlr4.AntlrToken; -import net.sourceforge.pmd.lang.ast.impl.antlr4.AntlrTokenManager; import net.sourceforge.pmd.lang.cs.ast.CSharpLexer; /** @@ -20,26 +20,14 @@ import net.sourceforge.pmd.lang.cs.ast.CSharpLexer; */ public class CsTokenizer extends AntlrTokenizer { - private boolean ignoreUsings = false; - private boolean ignoreLiteralSequences = false; - private boolean ignoreAttributes = false; + private final boolean ignoreUsings; + private final boolean ignoreLiteralSequences; + private final boolean ignoreAttributes; - /** - * Sets the possible options for the C# tokenizer. - * - * @param properties the properties - * @see #IGNORE_USINGS - * @see #OPTION_IGNORE_LITERAL_SEQUENCES - * @see #IGNORE_ANNOTATIONS - */ - public void setProperties(Properties properties) { - ignoreUsings = getBooleanProperty(properties, IGNORE_USINGS); - ignoreLiteralSequences = getBooleanProperty(properties, OPTION_IGNORE_LITERAL_SEQUENCES); - ignoreAttributes = getBooleanProperty(properties, IGNORE_ANNOTATIONS); - } - - private boolean getBooleanProperty(final Properties properties, final String property) { - return Boolean.parseBoolean(properties.getProperty(property, Boolean.FALSE.toString())); + public CsTokenizer(LanguagePropertyBundle properties) { + ignoreUsings = properties.getProperty(Tokenizer.CPD_IGNORE_IMPORTS); + ignoreLiteralSequences = properties.getProperty(Tokenizer.CPD_IGNORE_LITERAL_SEQUENCES); + ignoreAttributes = properties.getProperty(Tokenizer.CPD_IGNORE_METADATA); } @Override @@ -48,7 +36,7 @@ public class CsTokenizer extends AntlrTokenizer { } @Override - protected AntlrTokenFilter getTokenFilter(final AntlrTokenManager tokenManager) { + protected TokenManager filterTokenStream(TokenManager tokenManager) { return new CsTokenFilter(tokenManager, ignoreUsings, ignoreLiteralSequences, ignoreAttributes); } @@ -60,7 +48,7 @@ public class CsTokenizer extends AntlrTokenizer { * If the --ignoreUsings flag is provided, using directives are filtered out. *

*/ - private static class CsTokenFilter extends AntlrTokenFilter { + private static class CsTokenFilter extends BaseTokenFilter { private enum UsingState { KEYWORD, // just encountered the using keyword IDENTIFIER, // just encountered an identifier or var keyword @@ -75,7 +63,7 @@ public class CsTokenizer extends AntlrTokenizer { private AntlrToken discardingLiteralsUntil = null; private boolean discardCurrent = false; - CsTokenFilter(final AntlrTokenManager tokenManager, boolean ignoreUsings, boolean ignoreLiteralSequences, boolean ignoreAttributes) { + CsTokenFilter(final TokenManager tokenManager, boolean ignoreUsings, boolean ignoreLiteralSequences, boolean ignoreAttributes) { super(tokenManager); this.ignoreUsings = ignoreUsings; this.ignoreLiteralSequences = ignoreLiteralSequences; diff --git a/pmd-cs/src/main/java/net/sourceforge/pmd/lang/cs/CsLanguageModule.java b/pmd-cs/src/main/java/net/sourceforge/pmd/lang/cs/CsLanguageModule.java new file mode 100644 index 0000000000..5f122b8e35 --- /dev/null +++ b/pmd-cs/src/main/java/net/sourceforge/pmd/lang/cs/CsLanguageModule.java @@ -0,0 +1,42 @@ +/* + * BSD-style license; for more info see http://pmd.sourceforge.net/license.html + */ + +package net.sourceforge.pmd.lang.cs; + +import net.sourceforge.pmd.cpd.CsTokenizer; +import net.sourceforge.pmd.cpd.Tokenizer; +import net.sourceforge.pmd.lang.CpdOnlyLanguageModuleBase; +import net.sourceforge.pmd.lang.LanguagePropertyBundle; +import net.sourceforge.pmd.lang.LanguageRegistry; + +/** + * Defines the Language module for C#. + */ +public class CsLanguageModule extends CpdOnlyLanguageModuleBase { + + public CsLanguageModule() { + super(LanguageMetadata.withId("cs") + .name("C#") + .addDefaultVersion("any") + .extensions("cs")); + } + + public static CsLanguageModule getInstance() { + return (CsLanguageModule) LanguageRegistry.CPD.getLanguageById("cs"); + } + + @Override + public LanguagePropertyBundle newPropertyBundle() { + LanguagePropertyBundle bundle = super.newPropertyBundle(); + bundle.definePropertyDescriptor(Tokenizer.CPD_IGNORE_LITERAL_SEQUENCES); + bundle.definePropertyDescriptor(Tokenizer.CPD_IGNORE_IMPORTS); + bundle.definePropertyDescriptor(Tokenizer.CPD_IGNORE_METADATA); + return bundle; + } + + @Override + public Tokenizer createCpdTokenizer(LanguagePropertyBundle bundle) { + return new CsTokenizer(bundle); + } +} diff --git a/pmd-cs/src/main/resources/META-INF/services/net.sourceforge.pmd.cpd.Language b/pmd-cs/src/main/resources/META-INF/services/net.sourceforge.pmd.cpd.Language deleted file mode 100644 index 76459b4741..0000000000 --- a/pmd-cs/src/main/resources/META-INF/services/net.sourceforge.pmd.cpd.Language +++ /dev/null @@ -1 +0,0 @@ -net.sourceforge.pmd.cpd.CsLanguage diff --git a/pmd-cs/src/main/resources/META-INF/services/net.sourceforge.pmd.lang.Language b/pmd-cs/src/main/resources/META-INF/services/net.sourceforge.pmd.lang.Language new file mode 100644 index 0000000000..1b979f896f --- /dev/null +++ b/pmd-cs/src/main/resources/META-INF/services/net.sourceforge.pmd.lang.Language @@ -0,0 +1 @@ +net.sourceforge.pmd.lang.cs.CsLanguageModule diff --git a/pmd-dart/src/main/java/net/sourceforge/pmd/cpd/DartTokenizer.java b/pmd-dart/src/main/java/net/sourceforge/pmd/cpd/DartTokenizer.java index 6c847754a2..06a2527d9e 100644 --- a/pmd-dart/src/main/java/net/sourceforge/pmd/cpd/DartTokenizer.java +++ b/pmd-dart/src/main/java/net/sourceforge/pmd/cpd/DartTokenizer.java @@ -9,8 +9,9 @@ import org.antlr.v4.runtime.Lexer; import net.sourceforge.pmd.cpd.internal.AntlrTokenizer; import net.sourceforge.pmd.cpd.token.AntlrTokenFilter; +import net.sourceforge.pmd.cpd.token.internal.BaseTokenFilter; +import net.sourceforge.pmd.lang.TokenManager; import net.sourceforge.pmd.lang.ast.impl.antlr4.AntlrToken; -import net.sourceforge.pmd.lang.ast.impl.antlr4.AntlrTokenManager; import net.sourceforge.pmd.lang.dart.ast.DartLexer; /** @@ -24,7 +25,7 @@ public class DartTokenizer extends AntlrTokenizer { } @Override - protected AntlrTokenFilter getTokenFilter(final AntlrTokenManager tokenManager) { + protected TokenManager filterTokenStream(TokenManager tokenManager) { return new DartTokenFilter(tokenManager); } @@ -36,12 +37,12 @@ public class DartTokenizer extends AntlrTokenizer { * enables comment-based CPD suppression. *

*/ - private static class DartTokenFilter extends AntlrTokenFilter { + private static class DartTokenFilter extends BaseTokenFilter { private boolean discardingLibraryAndImport = false; private boolean discardingNL = false; private boolean discardingSemicolon = false; - /* default */ DartTokenFilter(final AntlrTokenManager tokenManager) { + /* default */ DartTokenFilter(final TokenManager tokenManager) { super(tokenManager); } diff --git a/pmd-groovy/src/main/java/net/sourceforge/pmd/cpd/GroovyTokenizer.java b/pmd-groovy/src/main/java/net/sourceforge/pmd/cpd/GroovyTokenizer.java index 654342e2b6..79ecf7b6b5 100644 --- a/pmd-groovy/src/main/java/net/sourceforge/pmd/cpd/GroovyTokenizer.java +++ b/pmd-groovy/src/main/java/net/sourceforge/pmd/cpd/GroovyTokenizer.java @@ -10,6 +10,7 @@ import org.codehaus.groovy.antlr.SourceInfo; import org.codehaus.groovy.antlr.parser.GroovyLexer; import net.sourceforge.pmd.lang.ast.TokenMgrError; +import net.sourceforge.pmd.lang.document.TextDocument; import groovyjarjarantlr.Token; import groovyjarjarantlr.TokenStream; @@ -21,7 +22,7 @@ import groovyjarjarantlr.TokenStreamException; public class GroovyTokenizer implements Tokenizer { @Override - public void tokenize(SourceCode sourceCode, Tokens tokenEntries) { + public void tokenize(TextDocument sourceCode, Tokens tokenEntries) { StringBuilder buffer = sourceCode.getCodeBuffer(); GroovyLexer lexer = new GroovyLexer(new StringReader(buffer.toString())); diff --git a/pmd-html/src/main/java/net/sourceforge/pmd/lang/html/ast/HtmlTokenizer.java b/pmd-html/src/main/java/net/sourceforge/pmd/lang/html/ast/HtmlTokenizer.java index e7fad1cbe9..8f70dc69f0 100644 --- a/pmd-html/src/main/java/net/sourceforge/pmd/lang/html/ast/HtmlTokenizer.java +++ b/pmd-html/src/main/java/net/sourceforge/pmd/lang/html/ast/HtmlTokenizer.java @@ -7,7 +7,6 @@ package net.sourceforge.pmd.lang.html.ast; import java.io.IOException; import java.io.UncheckedIOException; -import net.sourceforge.pmd.cpd.SourceCode; import net.sourceforge.pmd.cpd.TokenEntry; import net.sourceforge.pmd.cpd.Tokenizer; import net.sourceforge.pmd.cpd.Tokens; @@ -22,7 +21,7 @@ import net.sourceforge.pmd.lang.html.HtmlLanguageModule; public class HtmlTokenizer implements Tokenizer { @Override - public void tokenize(SourceCode sourceCode, Tokens tokenEntries) { + public void tokenize(TextDocument sourceCode, Tokens tokenEntries) { HtmlLanguageModule html = HtmlLanguageModule.getInstance(); try (LanguageProcessor processor = html.createProcessor(html.newPropertyBundle()); diff --git a/pmd-java/src/main/java/net/sourceforge/pmd/cpd/JavaTokenizer.java b/pmd-java/src/main/java/net/sourceforge/pmd/cpd/JavaTokenizer.java index 525d1731b2..8a54eca671 100644 --- a/pmd-java/src/main/java/net/sourceforge/pmd/cpd/JavaTokenizer.java +++ b/pmd-java/src/main/java/net/sourceforge/pmd/cpd/JavaTokenizer.java @@ -11,11 +11,10 @@ import java.util.Properties; import net.sourceforge.pmd.cpd.internal.JavaCCTokenizer; import net.sourceforge.pmd.cpd.token.JavaCCTokenFilter; -import net.sourceforge.pmd.cpd.token.TokenFilter; import net.sourceforge.pmd.lang.TokenManager; import net.sourceforge.pmd.lang.ast.impl.javacc.CharStream; import net.sourceforge.pmd.lang.ast.impl.javacc.JavaccToken; -import net.sourceforge.pmd.lang.ast.impl.javacc.JavaccTokenDocument; +import net.sourceforge.pmd.lang.document.TextDocument; import net.sourceforge.pmd.lang.java.ast.InternalApiBridge; import net.sourceforge.pmd.lang.java.ast.JavaTokenKinds; @@ -37,23 +36,18 @@ public class JavaTokenizer extends JavaCCTokenizer { } @Override - public void tokenize(SourceCode sourceCode, Tokens tokenEntries) throws IOException { + public void tokenize(TextDocument sourceCode, Tokens tokenEntries) throws IOException { constructorDetector = new ConstructorDetector(ignoreIdentifiers); super.tokenize(sourceCode, tokenEntries); } @Override - protected JavaccTokenDocument.TokenDocumentBehavior tokenBehavior() { - return InternalApiBridge.javaTokenDoc(); + protected TokenManager makeLexerImpl(TextDocument doc) { + return JavaTokenKinds.newTokenManager(CharStream.create(doc, InternalApiBridge.javaTokenDoc())); } @Override - protected TokenManager makeLexerImpl(CharStream sourceCode) { - return JavaTokenKinds.newTokenManager(sourceCode); - } - - @Override - protected TokenFilter getTokenFilter(TokenManager tokenManager) { + protected TokenManager filterTokenStream(TokenManager tokenManager) { return new JavaTokenFilter(tokenManager, ignoreAnnotations); } diff --git a/pmd-javascript/src/main/java/net/sourceforge/pmd/cpd/EcmascriptTokenizer.java b/pmd-javascript/src/main/java/net/sourceforge/pmd/cpd/EcmascriptTokenizer.java index d66d74949f..1a2de570ec 100644 --- a/pmd-javascript/src/main/java/net/sourceforge/pmd/cpd/EcmascriptTokenizer.java +++ b/pmd-javascript/src/main/java/net/sourceforge/pmd/cpd/EcmascriptTokenizer.java @@ -8,6 +8,7 @@ import net.sourceforge.pmd.cpd.internal.JavaCCTokenizer; import net.sourceforge.pmd.lang.TokenManager; import net.sourceforge.pmd.lang.ast.impl.javacc.CharStream; import net.sourceforge.pmd.lang.ast.impl.javacc.JavaccToken; +import net.sourceforge.pmd.lang.document.TextDocument; import net.sourceforge.pmd.lang.ecmascript5.ast.Ecmascript5TokenKinds; /** @@ -16,8 +17,8 @@ import net.sourceforge.pmd.lang.ecmascript5.ast.Ecmascript5TokenKinds; public class EcmascriptTokenizer extends JavaCCTokenizer { @Override - protected TokenManager makeLexerImpl(CharStream sourceCode) { - return Ecmascript5TokenKinds.newTokenManager(sourceCode); + protected TokenManager makeLexerImpl(TextDocument doc) { + return Ecmascript5TokenKinds.newTokenManager(CharStream.create(doc)); } @Override diff --git a/pmd-jsp/src/main/java/net/sourceforge/pmd/cpd/JSPTokenizer.java b/pmd-jsp/src/main/java/net/sourceforge/pmd/cpd/JSPTokenizer.java index 5617484d1b..d32b96973e 100644 --- a/pmd-jsp/src/main/java/net/sourceforge/pmd/cpd/JSPTokenizer.java +++ b/pmd-jsp/src/main/java/net/sourceforge/pmd/cpd/JSPTokenizer.java @@ -8,20 +8,15 @@ import net.sourceforge.pmd.cpd.internal.JavaCCTokenizer; import net.sourceforge.pmd.lang.TokenManager; import net.sourceforge.pmd.lang.ast.impl.javacc.CharStream; import net.sourceforge.pmd.lang.ast.impl.javacc.JavaccToken; -import net.sourceforge.pmd.lang.ast.impl.javacc.JavaccTokenDocument; +import net.sourceforge.pmd.lang.document.TextDocument; import net.sourceforge.pmd.lang.jsp.ast.JspParser; import net.sourceforge.pmd.lang.jsp.ast.JspTokenKinds; public class JSPTokenizer extends JavaCCTokenizer { @Override - protected TokenManager makeLexerImpl(CharStream sourceCode) { - return JspTokenKinds.newTokenManager(sourceCode); - } - - @Override - protected JavaccTokenDocument.TokenDocumentBehavior tokenBehavior() { - return JspParser.getTokenBehavior(); + protected TokenManager makeLexerImpl(TextDocument doc) { + return JspTokenKinds.newTokenManager(CharStream.create(doc, JspParser.getTokenBehavior())); } } diff --git a/pmd-matlab/src/main/java/net/sourceforge/pmd/cpd/MatlabTokenizer.java b/pmd-matlab/src/main/java/net/sourceforge/pmd/cpd/MatlabTokenizer.java index 9459c44696..b2233923b3 100644 --- a/pmd-matlab/src/main/java/net/sourceforge/pmd/cpd/MatlabTokenizer.java +++ b/pmd-matlab/src/main/java/net/sourceforge/pmd/cpd/MatlabTokenizer.java @@ -6,8 +6,8 @@ package net.sourceforge.pmd.cpd; import net.sourceforge.pmd.cpd.internal.JavaCCTokenizer; import net.sourceforge.pmd.lang.TokenManager; -import net.sourceforge.pmd.lang.ast.impl.javacc.CharStream; import net.sourceforge.pmd.lang.ast.impl.javacc.JavaccToken; +import net.sourceforge.pmd.lang.document.TextDocument; import net.sourceforge.pmd.lang.matlab.ast.MatlabTokenKinds; /** @@ -16,7 +16,7 @@ import net.sourceforge.pmd.lang.matlab.ast.MatlabTokenKinds; public class MatlabTokenizer extends JavaCCTokenizer { @Override - protected TokenManager makeLexerImpl(CharStream sourceCode) { - return MatlabTokenKinds.newTokenManager(sourceCode); + protected TokenManager makeLexerImpl(TextDocument doc) { + return MatlabTokenKinds.newTokenManager(doc); } } diff --git a/pmd-modelica/src/main/java/net/sourceforge/pmd/cpd/ModelicaTokenizer.java b/pmd-modelica/src/main/java/net/sourceforge/pmd/cpd/ModelicaTokenizer.java index 3258a3cda7..61bbad3226 100644 --- a/pmd-modelica/src/main/java/net/sourceforge/pmd/cpd/ModelicaTokenizer.java +++ b/pmd-modelica/src/main/java/net/sourceforge/pmd/cpd/ModelicaTokenizer.java @@ -7,20 +7,20 @@ package net.sourceforge.pmd.cpd; import net.sourceforge.pmd.cpd.internal.JavaCCTokenizer; import net.sourceforge.pmd.cpd.token.JavaCCTokenFilter; import net.sourceforge.pmd.lang.TokenManager; -import net.sourceforge.pmd.lang.ast.impl.javacc.CharStream; import net.sourceforge.pmd.lang.ast.impl.javacc.JavaccToken; +import net.sourceforge.pmd.lang.document.TextDocument; import net.sourceforge.pmd.lang.modelica.ast.ModelicaTokenKinds; public class ModelicaTokenizer extends JavaCCTokenizer { @Override - protected TokenManager makeLexerImpl(CharStream sourceCode) { - return ModelicaTokenKinds.newTokenManager(sourceCode); + protected TokenManager makeLexerImpl(TextDocument doc) { + return ModelicaTokenKinds.newTokenManager(doc); } @Override - protected JavaCCTokenFilter getTokenFilter(TokenManager tokenManager) { + protected TokenManager filterTokenStream(TokenManager tokenManager) { return new ModelicaTokenFilter(tokenManager); } diff --git a/pmd-objectivec/src/main/java/net/sourceforge/pmd/cpd/ObjectiveCTokenizer.java b/pmd-objectivec/src/main/java/net/sourceforge/pmd/cpd/ObjectiveCTokenizer.java index acccfcd24a..6c338b4067 100644 --- a/pmd-objectivec/src/main/java/net/sourceforge/pmd/cpd/ObjectiveCTokenizer.java +++ b/pmd-objectivec/src/main/java/net/sourceforge/pmd/cpd/ObjectiveCTokenizer.java @@ -6,8 +6,8 @@ package net.sourceforge.pmd.cpd; import net.sourceforge.pmd.cpd.internal.JavaCCTokenizer; import net.sourceforge.pmd.lang.TokenManager; -import net.sourceforge.pmd.lang.ast.impl.javacc.CharStream; import net.sourceforge.pmd.lang.ast.impl.javacc.JavaccToken; +import net.sourceforge.pmd.lang.document.TextDocument; import net.sourceforge.pmd.lang.objectivec.ast.ObjectiveCTokenKinds; /** @@ -16,7 +16,7 @@ import net.sourceforge.pmd.lang.objectivec.ast.ObjectiveCTokenKinds; public class ObjectiveCTokenizer extends JavaCCTokenizer { @Override - protected TokenManager makeLexerImpl(CharStream sourceCode) { - return ObjectiveCTokenKinds.newTokenManager(sourceCode); + protected TokenManager makeLexerImpl(TextDocument doc) { + return ObjectiveCTokenKinds.newTokenManager(doc); } } diff --git a/pmd-php/src/main/java/net/sourceforge/pmd/cpd/PHPTokenizer.java b/pmd-php/src/main/java/net/sourceforge/pmd/cpd/PHPTokenizer.java index 974a32c1e5..b63cfecd0e 100644 --- a/pmd-php/src/main/java/net/sourceforge/pmd/cpd/PHPTokenizer.java +++ b/pmd-php/src/main/java/net/sourceforge/pmd/cpd/PHPTokenizer.java @@ -6,13 +6,15 @@ package net.sourceforge.pmd.cpd; import java.util.List; +import net.sourceforge.pmd.lang.document.TextDocument; + /** * Simple tokenizer for PHP. */ public class PHPTokenizer implements Tokenizer { @Override - public void tokenize(SourceCode tokens, Tokens tokenEntries) { + public void tokenize(TextDocument tokens, Tokens tokenEntries) { List code = tokens.getCode(); for (int i = 0; i < code.size(); i++) { String currentLine = code.get(i); diff --git a/pmd-plsql/src/main/java/net/sourceforge/pmd/cpd/PLSQLTokenizer.java b/pmd-plsql/src/main/java/net/sourceforge/pmd/cpd/PLSQLTokenizer.java index 77abbf8794..4d66cf3089 100644 --- a/pmd-plsql/src/main/java/net/sourceforge/pmd/cpd/PLSQLTokenizer.java +++ b/pmd-plsql/src/main/java/net/sourceforge/pmd/cpd/PLSQLTokenizer.java @@ -8,8 +8,8 @@ import java.util.Properties; import net.sourceforge.pmd.cpd.internal.JavaCCTokenizer; import net.sourceforge.pmd.lang.TokenManager; -import net.sourceforge.pmd.lang.ast.impl.javacc.CharStream; import net.sourceforge.pmd.lang.ast.impl.javacc.JavaccToken; +import net.sourceforge.pmd.lang.document.TextDocument; import net.sourceforge.pmd.lang.plsql.ast.PLSQLTokenKinds; public class PLSQLTokenizer extends JavaCCTokenizer { @@ -66,7 +66,7 @@ public class PLSQLTokenizer extends JavaCCTokenizer { } @Override - protected TokenManager makeLexerImpl(CharStream sourceCode) { - return PLSQLTokenKinds.newTokenManager(sourceCode); + protected TokenManager makeLexerImpl(TextDocument doc) { + return PLSQLTokenKinds.newTokenManager(doc); } } diff --git a/pmd-python/src/main/java/net/sourceforge/pmd/cpd/PythonTokenizer.java b/pmd-python/src/main/java/net/sourceforge/pmd/cpd/PythonTokenizer.java index c80d572f67..89f8dce9ae 100644 --- a/pmd-python/src/main/java/net/sourceforge/pmd/cpd/PythonTokenizer.java +++ b/pmd-python/src/main/java/net/sourceforge/pmd/cpd/PythonTokenizer.java @@ -10,8 +10,8 @@ import net.sourceforge.pmd.cpd.internal.JavaCCTokenizer; import net.sourceforge.pmd.lang.TokenManager; import net.sourceforge.pmd.lang.ast.impl.javacc.CharStream; import net.sourceforge.pmd.lang.ast.impl.javacc.JavaccToken; -import net.sourceforge.pmd.lang.ast.impl.javacc.JavaccTokenDocument; import net.sourceforge.pmd.lang.ast.impl.javacc.JavaccTokenDocument.TokenDocumentBehavior; +import net.sourceforge.pmd.lang.document.TextDocument; import net.sourceforge.pmd.lang.python.ast.PythonTokenKinds; /** @@ -24,13 +24,8 @@ public class PythonTokenizer extends JavaCCTokenizer { private static final TokenDocumentBehavior TOKEN_BEHAVIOR = new TokenDocumentBehavior(PythonTokenKinds.TOKEN_NAMES); @Override - protected TokenManager makeLexerImpl(CharStream sourceCode) { - return PythonTokenKinds.newTokenManager(sourceCode); - } - - @Override - protected JavaccTokenDocument.TokenDocumentBehavior tokenBehavior() { - return TOKEN_BEHAVIOR; + protected TokenManager makeLexerImpl(TextDocument doc) { + return PythonTokenKinds.newTokenManager(CharStream.create(doc, TOKEN_BEHAVIOR)); } @Override diff --git a/pmd-scala-modules/pmd-scala-common/src/main/java/net/sourceforge/pmd/cpd/ScalaTokenizer.java b/pmd-scala-modules/pmd-scala-common/src/main/java/net/sourceforge/pmd/cpd/ScalaTokenizer.java index 060f24cbd4..5c2b98892b 100644 --- a/pmd-scala-modules/pmd-scala-common/src/main/java/net/sourceforge/pmd/cpd/ScalaTokenizer.java +++ b/pmd-scala-modules/pmd-scala-common/src/main/java/net/sourceforge/pmd/cpd/ScalaTokenizer.java @@ -62,7 +62,7 @@ public class ScalaTokenizer implements Tokenizer { } @Override - public void tokenize(SourceCode sourceCode, Tokens tokenEntries) throws IOException { + public void tokenize(TextDocument sourceCode, Tokens tokenEntries) throws IOException { try (TextDocument textDoc = TextDocument.create(CpdCompat.cpdCompat(sourceCode))) { diff --git a/pmd-visualforce/src/main/java/net/sourceforge/pmd/cpd/VfTokenizer.java b/pmd-visualforce/src/main/java/net/sourceforge/pmd/cpd/VfTokenizer.java index 17926160b0..d831ee0f7d 100644 --- a/pmd-visualforce/src/main/java/net/sourceforge/pmd/cpd/VfTokenizer.java +++ b/pmd-visualforce/src/main/java/net/sourceforge/pmd/cpd/VfTokenizer.java @@ -6,7 +6,6 @@ package net.sourceforge.pmd.cpd; import net.sourceforge.pmd.cpd.internal.JavaCCTokenizer; import net.sourceforge.pmd.lang.TokenManager; -import net.sourceforge.pmd.lang.ast.impl.javacc.CharStream; import net.sourceforge.pmd.lang.ast.impl.javacc.JavaEscapeTranslator; import net.sourceforge.pmd.lang.ast.impl.javacc.JavaccToken; import net.sourceforge.pmd.lang.ast.impl.javacc.JavaccTokenDocument; @@ -21,8 +20,8 @@ import net.sourceforge.pmd.lang.vf.ast.VfTokenKinds; public class VfTokenizer extends JavaCCTokenizer { @Override - protected TokenManager makeLexerImpl(CharStream sourceCode) { - return VfTokenKinds.newTokenManager(sourceCode); + protected TokenManager makeLexerImpl(TextDocument doc) { + return VfTokenKinds.newTokenManager(doc); } @Override