From d4c05d1fb5073f5375ce81a2b53926dea95c56e5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Cl=C3=A9ment=20Fournier?=
Date: Fri, 10 Feb 2023 16:24:11 +0100
Subject: [PATCH] Make pmd language have a hook to launch CPD
This allows removing the CPD language instances,
sharing more logic between CPD and PMD, and using
language properties to configure CPD and PMD uniformly.
---
.../sourceforge/pmd/cpd/ApexTokenizer.java | 3 +-
.../pmd/cpd/AbstractTokenizer.java | 4 +-
.../net/sourceforge/pmd/cpd/AnyTokenizer.java | 3 +-
.../net/sourceforge/pmd/cpd/Tokenizer.java | 38 ++++++++++++-
.../pmd/cpd/internal/AntlrTokenizer.java | 46 +++------------
.../pmd/cpd/internal/JavaCCTokenizer.java | 55 +-----------------
.../pmd/cpd/internal/TokenizerBase.java | 42 ++++++++++++++
.../pmd/cpd/token/TokenFilter.java | 4 +-
.../cpd/token/internal/BaseTokenFilter.java | 2 +-
.../pmd/lang/CpdOnlyLanguageModuleBase.java | 28 +++++++++
.../net/sourceforge/pmd/lang/Language.java | 34 ++++++++++-
.../pmd/lang/LanguageRegistry.java | 1 +
.../pmd/lang/ast/impl/javacc/CharStream.java | 9 +++
.../lang/impl/SimpleLanguageModuleBase.java | 7 ++-
.../net/sourceforge/pmd/cpd/CPPLanguage.java | 32 -----------
.../net/sourceforge/pmd/cpd/CPPTokenizer.java | 57 +++++++------------
.../pmd/lang/cpp/CppLanguageModule.java | 56 ++++++++++++++++++
.../services/net.sourceforge.pmd.cpd.Language | 1 -
.../net.sourceforge.pmd.lang.Language | 1 +
.../pmd/cpd/CppCharStreamTest.java | 12 ++--
.../net/sourceforge/pmd/cpd/CsLanguage.java | 28 ---------
.../net/sourceforge/pmd/cpd/CsTokenizer.java | 38 +++++--------
.../pmd/lang/cs/CsLanguageModule.java | 42 ++++++++++++++
.../services/net.sourceforge.pmd.cpd.Language | 1 -
.../net.sourceforge.pmd.lang.Language | 1 +
.../sourceforge/pmd/cpd/DartTokenizer.java | 9 +--
.../sourceforge/pmd/cpd/GroovyTokenizer.java | 3 +-
.../pmd/lang/html/ast/HtmlTokenizer.java | 3 +-
.../sourceforge/pmd/cpd/JavaTokenizer.java | 16 ++----
.../pmd/cpd/EcmascriptTokenizer.java | 5 +-
.../net/sourceforge/pmd/cpd/JSPTokenizer.java | 11 +---
.../sourceforge/pmd/cpd/MatlabTokenizer.java | 6 +-
.../pmd/cpd/ModelicaTokenizer.java | 8 +--
.../pmd/cpd/ObjectiveCTokenizer.java | 6 +-
.../net/sourceforge/pmd/cpd/PHPTokenizer.java | 4 +-
.../sourceforge/pmd/cpd/PLSQLTokenizer.java | 6 +-
.../sourceforge/pmd/cpd/PythonTokenizer.java | 11 +---
.../sourceforge/pmd/cpd/ScalaTokenizer.java | 2 +-
.../net/sourceforge/pmd/cpd/VfTokenizer.java | 5 +-
39 files changed, 357 insertions(+), 283 deletions(-)
create mode 100644 pmd-core/src/main/java/net/sourceforge/pmd/cpd/internal/TokenizerBase.java
create mode 100644 pmd-core/src/main/java/net/sourceforge/pmd/lang/CpdOnlyLanguageModuleBase.java
delete mode 100644 pmd-cpp/src/main/java/net/sourceforge/pmd/cpd/CPPLanguage.java
create mode 100644 pmd-cpp/src/main/java/net/sourceforge/pmd/lang/cpp/CppLanguageModule.java
delete mode 100644 pmd-cpp/src/main/resources/META-INF/services/net.sourceforge.pmd.cpd.Language
create mode 100644 pmd-cpp/src/main/resources/META-INF/services/net.sourceforge.pmd.lang.Language
delete mode 100644 pmd-cs/src/main/java/net/sourceforge/pmd/cpd/CsLanguage.java
create mode 100644 pmd-cs/src/main/java/net/sourceforge/pmd/lang/cs/CsLanguageModule.java
delete mode 100644 pmd-cs/src/main/resources/META-INF/services/net.sourceforge.pmd.cpd.Language
create mode 100644 pmd-cs/src/main/resources/META-INF/services/net.sourceforge.pmd.lang.Language
diff --git a/pmd-apex/src/main/java/net/sourceforge/pmd/cpd/ApexTokenizer.java b/pmd-apex/src/main/java/net/sourceforge/pmd/cpd/ApexTokenizer.java
index afe8db0b3f..d2a01abb01 100644
--- a/pmd-apex/src/main/java/net/sourceforge/pmd/cpd/ApexTokenizer.java
+++ b/pmd-apex/src/main/java/net/sourceforge/pmd/cpd/ApexTokenizer.java
@@ -13,6 +13,7 @@ import org.antlr.runtime.Token;
import net.sourceforge.pmd.lang.apex.ApexJorjeLogging;
import net.sourceforge.pmd.lang.ast.TokenMgrError;
+import net.sourceforge.pmd.lang.document.TextDocument;
import apex.jorje.parser.impl.ApexLexer;
@@ -35,7 +36,7 @@ public class ApexTokenizer implements Tokenizer {
}
@Override
- public void tokenize(SourceCode sourceCode, Tokens tokenEntries) {
+ public void tokenize(TextDocument sourceCode, Tokens tokenEntries) {
StringBuilder code = sourceCode.getCodeBuffer();
ANTLRStringStream ass = new ANTLRStringStream(code.toString());
diff --git a/pmd-core/src/main/java/net/sourceforge/pmd/cpd/AbstractTokenizer.java b/pmd-core/src/main/java/net/sourceforge/pmd/cpd/AbstractTokenizer.java
index aa21a4db28..5db9827346 100644
--- a/pmd-core/src/main/java/net/sourceforge/pmd/cpd/AbstractTokenizer.java
+++ b/pmd-core/src/main/java/net/sourceforge/pmd/cpd/AbstractTokenizer.java
@@ -7,6 +7,8 @@ package net.sourceforge.pmd.cpd;
import java.util.List;
import java.util.Locale;
+import net.sourceforge.pmd.lang.document.TextDocument;
+
/**
*
* @author Zev Blut zb@ubit.com
@@ -48,7 +50,7 @@ public abstract class AbstractTokenizer implements Tokenizer {
private boolean downcaseString = true;
@Override
- public void tokenize(SourceCode tokens, Tokens tokenEntries) {
+ public void tokenize(TextDocument tokens, Tokens tokenEntries) {
code = tokens.getCode();
for (lineNumber = 0; lineNumber < code.size(); lineNumber++) {
diff --git a/pmd-core/src/main/java/net/sourceforge/pmd/cpd/AnyTokenizer.java b/pmd-core/src/main/java/net/sourceforge/pmd/cpd/AnyTokenizer.java
index e53f29e533..6e02dda6b8 100644
--- a/pmd-core/src/main/java/net/sourceforge/pmd/cpd/AnyTokenizer.java
+++ b/pmd-core/src/main/java/net/sourceforge/pmd/cpd/AnyTokenizer.java
@@ -9,6 +9,7 @@ import java.util.regex.Pattern;
import org.apache.commons.lang3.StringUtils;
+import net.sourceforge.pmd.lang.document.TextDocument;
import net.sourceforge.pmd.util.StringUtil;
/**
@@ -60,7 +61,7 @@ public class AnyTokenizer implements Tokenizer {
}
@Override
- public void tokenize(SourceCode sourceCode, Tokens tokenEntries) {
+ public void tokenize(TextDocument sourceCode, Tokens tokenEntries) {
CharSequence text = sourceCode.getCodeBuffer();
Matcher matcher = pattern.matcher(text);
int lineNo = 1;
diff --git a/pmd-core/src/main/java/net/sourceforge/pmd/cpd/Tokenizer.java b/pmd-core/src/main/java/net/sourceforge/pmd/cpd/Tokenizer.java
index e6876fb960..2e0d77f770 100644
--- a/pmd-core/src/main/java/net/sourceforge/pmd/cpd/Tokenizer.java
+++ b/pmd-core/src/main/java/net/sourceforge/pmd/cpd/Tokenizer.java
@@ -6,7 +6,43 @@ package net.sourceforge.pmd.cpd;
import java.io.IOException;
+import net.sourceforge.pmd.lang.document.TextDocument;
+import net.sourceforge.pmd.properties.PropertyDescriptor;
+import net.sourceforge.pmd.properties.PropertyFactory;
+
public interface Tokenizer {
+
+ PropertyDescriptor CPD_IGNORE_LITERAL_SEQUENCES =
+ PropertyFactory.booleanProperty("cpdIgnoreLiteralSequences")
+ .defaultValue(false)
+ .desc("Ignore sequences of literals, eg `0, 0, 0, 0`")
+ .build();
+
+ PropertyDescriptor CPD_ANONYMiZE_LITERALS =
+ PropertyFactory.booleanProperty("cpdAnonymizeLiterals")
+ .defaultValue(false)
+ .desc("Anonymize literals. They are still part of the token stream but all literals appear to have the same value.")
+ .build();
+ PropertyDescriptor CPD_ANONYMIZE_IDENTIFIERS =
+ PropertyFactory.booleanProperty("cpdAnonymizeIdentifiers")
+ .defaultValue(false)
+ .desc("Anonymize identifiers. They are still part of the token stream but all literals appear to have the same value.")
+ .build();
+
+
+ PropertyDescriptor CPD_IGNORE_IMPORTS =
+ PropertyFactory.booleanProperty("cpdIgnoreImports")
+ .defaultValue(true)
+ .desc("Ignore import statements and equivalent (eg using statements in C#).")
+ .build();
+
+ PropertyDescriptor CPD_IGNORE_METADATA =
+ PropertyFactory.booleanProperty("cpdIgnoreMetadata")
+ .defaultValue(false)
+ .desc("Ignore metadata such as Java annotations or C# attributes.")
+ .build();
+
+
String IGNORE_LITERALS = "ignore_literals";
String IGNORE_IDENTIFIERS = "ignore_identifiers";
String IGNORE_ANNOTATIONS = "ignore_annotations";
@@ -39,5 +75,5 @@ public interface Tokenizer {
String DEFAULT_SKIP_BLOCKS_PATTERN = "#if 0|#endif";
- void tokenize(SourceCode sourceCode, Tokens tokenEntries) throws IOException;
+ void tokenize(TextDocument sourceCode, Tokens tokenEntries) throws IOException;
}
diff --git a/pmd-core/src/main/java/net/sourceforge/pmd/cpd/internal/AntlrTokenizer.java b/pmd-core/src/main/java/net/sourceforge/pmd/cpd/internal/AntlrTokenizer.java
index b09703881a..d5a3472281 100644
--- a/pmd-core/src/main/java/net/sourceforge/pmd/cpd/internal/AntlrTokenizer.java
+++ b/pmd-core/src/main/java/net/sourceforge/pmd/cpd/internal/AntlrTokenizer.java
@@ -4,58 +4,26 @@
package net.sourceforge.pmd.cpd.internal;
-import java.io.IOException;
-import java.io.UncheckedIOException;
-
import org.antlr.v4.runtime.CharStream;
import org.antlr.v4.runtime.CharStreams;
import org.antlr.v4.runtime.Lexer;
-import net.sourceforge.pmd.cpd.SourceCode;
-import net.sourceforge.pmd.cpd.TokenEntry;
import net.sourceforge.pmd.cpd.Tokenizer;
-import net.sourceforge.pmd.cpd.Tokens;
-import net.sourceforge.pmd.cpd.token.AntlrTokenFilter;
+import net.sourceforge.pmd.lang.TokenManager;
import net.sourceforge.pmd.lang.ast.impl.antlr4.AntlrToken;
import net.sourceforge.pmd.lang.ast.impl.antlr4.AntlrTokenManager;
-import net.sourceforge.pmd.lang.document.CpdCompat;
import net.sourceforge.pmd.lang.document.TextDocument;
/**
* Generic implementation of a {@link Tokenizer} useful to any Antlr grammar.
*/
-public abstract class AntlrTokenizer implements Tokenizer {
+public abstract class AntlrTokenizer extends TokenizerBase {
+ @Override
+ protected final TokenManager makeLexerImpl(TextDocument doc) {
+ CharStream charStream = CharStreams.fromString(doc.getText().toString(), doc.getDisplayName());
+ return new AntlrTokenManager(getLexerForSource(charStream), doc);
+ }
protected abstract Lexer getLexerForSource(CharStream charStream);
- @Override
- public void tokenize(final SourceCode sourceCode, final Tokens tokenEntries) {
- try (TextDocument textDoc = TextDocument.create(CpdCompat.cpdCompat(sourceCode))) {
-
- CharStream charStream = CharStreams.fromString(textDoc.getText().toString(), textDoc.getDisplayName());
-
- final AntlrTokenManager tokenManager = new AntlrTokenManager(getLexerForSource(charStream), textDoc);
- final AntlrTokenFilter tokenFilter = getTokenFilter(tokenManager);
-
- AntlrToken currentToken = tokenFilter.getNextToken();
- while (currentToken != null) {
- processToken(tokenEntries, currentToken);
- currentToken = tokenFilter.getNextToken();
- }
-
- } catch (IOException e) {
- throw new UncheckedIOException(e);
- } finally {
- tokenEntries.add(TokenEntry.getEOF());
- }
- }
-
- protected AntlrTokenFilter getTokenFilter(final AntlrTokenManager tokenManager) {
- return new AntlrTokenFilter(tokenManager);
- }
-
- private void processToken(final Tokens tokenEntries, final AntlrToken token) {
- final TokenEntry tokenEntry = new TokenEntry(token.getImage(), token.getReportLocation());
- tokenEntries.add(tokenEntry);
- }
}
diff --git a/pmd-core/src/main/java/net/sourceforge/pmd/cpd/internal/JavaCCTokenizer.java b/pmd-core/src/main/java/net/sourceforge/pmd/cpd/internal/JavaCCTokenizer.java
index 3c45b96033..3a629d5af4 100644
--- a/pmd-core/src/main/java/net/sourceforge/pmd/cpd/internal/JavaCCTokenizer.java
+++ b/pmd-core/src/main/java/net/sourceforge/pmd/cpd/internal/JavaCCTokenizer.java
@@ -4,61 +4,8 @@
package net.sourceforge.pmd.cpd.internal;
-import java.io.IOException;
-
-import net.sourceforge.pmd.cpd.SourceCode;
-import net.sourceforge.pmd.cpd.TokenEntry;
-import net.sourceforge.pmd.cpd.Tokenizer;
-import net.sourceforge.pmd.cpd.Tokens;
-import net.sourceforge.pmd.cpd.token.JavaCCTokenFilter;
-import net.sourceforge.pmd.cpd.token.TokenFilter;
-import net.sourceforge.pmd.lang.TokenManager;
-import net.sourceforge.pmd.lang.ast.FileAnalysisException;
-import net.sourceforge.pmd.lang.ast.impl.javacc.CharStream;
import net.sourceforge.pmd.lang.ast.impl.javacc.JavaccToken;
-import net.sourceforge.pmd.lang.ast.impl.javacc.JavaccTokenDocument.TokenDocumentBehavior;
-import net.sourceforge.pmd.lang.document.CpdCompat;
-import net.sourceforge.pmd.lang.document.TextDocument;
-public abstract class JavaCCTokenizer implements Tokenizer {
+public abstract class JavaCCTokenizer extends TokenizerBase {
- @SuppressWarnings("PMD.CloseResource")
- protected TokenManager getLexerForSource(TextDocument sourceCode) throws IOException {
- return makeLexerImpl(CharStream.create(sourceCode, tokenBehavior()));
- }
-
- protected TokenDocumentBehavior tokenBehavior() {
- return TokenDocumentBehavior.DEFAULT;
- }
-
- protected abstract TokenManager makeLexerImpl(CharStream sourceCode);
-
- protected TokenFilter getTokenFilter(TokenManager tokenManager) {
- return new JavaCCTokenFilter(tokenManager);
- }
-
- protected TokenEntry processToken(Tokens tokenEntries, JavaccToken currentToken) {
- return new TokenEntry(getImage(currentToken), currentToken.getReportLocation());
- }
-
- protected String getImage(JavaccToken token) {
- return token.getImage();
- }
-
- @Override
- public void tokenize(SourceCode sourceCode, Tokens tokenEntries) throws IOException {
- try (TextDocument textDoc = TextDocument.create(CpdCompat.cpdCompat(sourceCode))) {
- TokenManager tokenManager = getLexerForSource(textDoc);
- final TokenFilter tokenFilter = getTokenFilter(tokenManager);
- JavaccToken currentToken = tokenFilter.getNextToken();
- while (currentToken != null) {
- tokenEntries.add(processToken(tokenEntries, currentToken));
- currentToken = tokenFilter.getNextToken();
- }
- } catch (FileAnalysisException e) {
- throw e.setFileName(sourceCode.getFileName());
- } finally {
- tokenEntries.add(TokenEntry.getEOF());
- }
- }
}
diff --git a/pmd-core/src/main/java/net/sourceforge/pmd/cpd/internal/TokenizerBase.java b/pmd-core/src/main/java/net/sourceforge/pmd/cpd/internal/TokenizerBase.java
new file mode 100644
index 0000000000..07d6e9894f
--- /dev/null
+++ b/pmd-core/src/main/java/net/sourceforge/pmd/cpd/internal/TokenizerBase.java
@@ -0,0 +1,42 @@
+/**
+ * BSD-style license; for more info see http://pmd.sourceforge.net/license.html
+ */
+
+package net.sourceforge.pmd.cpd.internal;
+
+import java.io.IOException;
+
+import net.sourceforge.pmd.cpd.TokenEntry;
+import net.sourceforge.pmd.cpd.Tokenizer;
+import net.sourceforge.pmd.cpd.Tokens;
+import net.sourceforge.pmd.cpd.token.internal.BaseTokenFilter;
+import net.sourceforge.pmd.lang.TokenManager;
+import net.sourceforge.pmd.lang.ast.GenericToken;
+import net.sourceforge.pmd.lang.document.TextDocument;
+
+public abstract class TokenizerBase> implements Tokenizer {
+
+ protected abstract TokenManager makeLexerImpl(TextDocument doc);
+
+ protected TokenManager filterTokenStream(TokenManager tokenManager) {
+ return new BaseTokenFilter<>(tokenManager);
+ }
+
+ protected TokenEntry processToken(Tokens tokenEntries, T currentToken) {
+ return new TokenEntry(getImage(currentToken), currentToken.getReportLocation());
+ }
+
+ protected String getImage(T token) {
+ return token.getImage();
+ }
+
+ @Override
+ public void tokenize(TextDocument document, Tokens tokenEntries) throws IOException {
+ TokenManager tokenManager = filterTokenStream(makeLexerImpl(document));
+ T currentToken = tokenManager.getNextToken();
+ while (currentToken != null) {
+ tokenEntries.add(processToken(tokenEntries, currentToken));
+ currentToken = tokenManager.getNextToken();
+ }
+ }
+}
diff --git a/pmd-core/src/main/java/net/sourceforge/pmd/cpd/token/TokenFilter.java b/pmd-core/src/main/java/net/sourceforge/pmd/cpd/token/TokenFilter.java
index 3671f109db..469b33d89f 100644
--- a/pmd-core/src/main/java/net/sourceforge/pmd/cpd/token/TokenFilter.java
+++ b/pmd-core/src/main/java/net/sourceforge/pmd/cpd/token/TokenFilter.java
@@ -4,16 +4,18 @@
package net.sourceforge.pmd.cpd.token;
+import net.sourceforge.pmd.lang.TokenManager;
import net.sourceforge.pmd.lang.ast.GenericToken;
/**
* Defines filter to be applied to the token stream during CPD analysis
*/
-public interface TokenFilter> {
+public interface TokenFilter> extends TokenManager {
/**
* Retrieves the next token to pass the filter
* @return The next token to pass the filter, or null if the end of the stream was reached
*/
+ @Override
T getNextToken();
}
diff --git a/pmd-core/src/main/java/net/sourceforge/pmd/cpd/token/internal/BaseTokenFilter.java b/pmd-core/src/main/java/net/sourceforge/pmd/cpd/token/internal/BaseTokenFilter.java
index 6d980ea41e..d4d6e7c90b 100644
--- a/pmd-core/src/main/java/net/sourceforge/pmd/cpd/token/internal/BaseTokenFilter.java
+++ b/pmd-core/src/main/java/net/sourceforge/pmd/cpd/token/internal/BaseTokenFilter.java
@@ -18,7 +18,7 @@ import net.sourceforge.pmd.lang.ast.GenericToken;
* A generic filter for PMD token managers that allows to use comments
* to enable / disable analysis of parts of the stream
*/
-public abstract class BaseTokenFilter> implements TokenFilter {
+public class BaseTokenFilter> implements TokenFilter {
private final TokenManager tokenManager;
private final LinkedList unprocessedTokens; // NOPMD - used both as Queue and List
diff --git a/pmd-core/src/main/java/net/sourceforge/pmd/lang/CpdOnlyLanguageModuleBase.java b/pmd-core/src/main/java/net/sourceforge/pmd/lang/CpdOnlyLanguageModuleBase.java
new file mode 100644
index 0000000000..ade537f08e
--- /dev/null
+++ b/pmd-core/src/main/java/net/sourceforge/pmd/lang/CpdOnlyLanguageModuleBase.java
@@ -0,0 +1,28 @@
+/*
+ * BSD-style license; for more info see http://pmd.sourceforge.net/license.html
+ */
+
+package net.sourceforge.pmd.lang;
+
+/**
+ * Base class for language modules that only support CPD and not PMD.
+ *
+ * @author Clément Fournier
+ */
+public abstract class CpdOnlyLanguageModuleBase extends LanguageModuleBase {
+
+ /**
+ * Construct a module instance using the given metadata. The metadata must
+ * be properly constructed.
+ *
+ * @throws IllegalStateException If the metadata is invalid (eg missing extensions or name)
+ */
+ protected CpdOnlyLanguageModuleBase(LanguageMetadata metadata) {
+ super(metadata);
+ }
+
+ @Override
+ public boolean supportsParsing() {
+ return false;
+ }
+}
diff --git a/pmd-core/src/main/java/net/sourceforge/pmd/lang/Language.java b/pmd-core/src/main/java/net/sourceforge/pmd/lang/Language.java
index 4d10b41e4d..045dd2c9a6 100644
--- a/pmd-core/src/main/java/net/sourceforge/pmd/lang/Language.java
+++ b/pmd-core/src/main/java/net/sourceforge/pmd/lang/Language.java
@@ -8,6 +8,8 @@ import java.util.List;
import java.util.ServiceLoader;
import java.util.Set;
+import net.sourceforge.pmd.cpd.Tokenizer;
+
/**
* Represents a language module, and provides access to language-specific
* functionality. You can get a language instance from a {@link LanguageRegistry}.
@@ -156,6 +158,14 @@ public interface Language extends Comparable {
return new LanguagePropertyBundle(this);
}
+ /**
+ * Return true if this language supports parsing files into an AST.
+ * In that case {@link #createProcessor(LanguagePropertyBundle)} should
+ * also be implemented.
+ */
+ default boolean supportsParsing() {
+ return false;
+ }
/**
* Create a new {@link LanguageProcessor} for this language, given
@@ -167,8 +177,30 @@ public interface Language extends Comparable {
* @param bundle A bundle of properties created by this instance.
*
* @return A new language processor
+ *
+ * @throws UnsupportedOperationException if this language does not support PMD
*/
- LanguageProcessor createProcessor(LanguagePropertyBundle bundle);
+ default LanguageProcessor createProcessor(LanguagePropertyBundle bundle) {
+ throw new UnsupportedOperationException(this + " does not support running a PMD analysis.");
+ }
+
+
+ /**
+ * Create a new {@link Tokenizer} for this language, given
+ * a property bundle with configuration. The bundle was created by
+ * this instance using {@link #newPropertyBundle()}. It can be assumed
+ * that the bundle will never be mutated anymore, and this method
+ * takes ownership of it.
+ *
+ * @param bundle A bundle of properties created by this instance.
+ *
+ * @return A new language processor
+ *
+ * @throws UnsupportedOperationException if this language does not support CPD
+ */
+ default Tokenizer createCpdTokenizer(LanguagePropertyBundle bundle) {
+ throw new UnsupportedOperationException(this + " does not support running a CPD analysis.");
+ }
/**
diff --git a/pmd-core/src/main/java/net/sourceforge/pmd/lang/LanguageRegistry.java b/pmd-core/src/main/java/net/sourceforge/pmd/lang/LanguageRegistry.java
index 3adf16c8fb..5cfd9f9baf 100644
--- a/pmd-core/src/main/java/net/sourceforge/pmd/lang/LanguageRegistry.java
+++ b/pmd-core/src/main/java/net/sourceforge/pmd/lang/LanguageRegistry.java
@@ -41,6 +41,7 @@ public final class LanguageRegistry implements Iterable {
* of the classloader of this class. This can be used as a "default" registry.
*/
public static final LanguageRegistry PMD = loadLanguages(LanguageRegistry.class.getClassLoader());
+ public static final LanguageRegistry CPD = loadLanguages(LanguageRegistry.class.getClassLoader()); // todo
private final Set languages;
diff --git a/pmd-core/src/main/java/net/sourceforge/pmd/lang/ast/impl/javacc/CharStream.java b/pmd-core/src/main/java/net/sourceforge/pmd/lang/ast/impl/javacc/CharStream.java
index d598bff263..9635cf784b 100644
--- a/pmd-core/src/main/java/net/sourceforge/pmd/lang/ast/impl/javacc/CharStream.java
+++ b/pmd-core/src/main/java/net/sourceforge/pmd/lang/ast/impl/javacc/CharStream.java
@@ -42,6 +42,15 @@ public final class CharStream {
return new CharStream(new JavaccTokenDocument(translated, behavior));
}
+ /**
+ * Create a new char stream for the given document with the default token
+ * document behavior. This may create a new {@link TextDocument} view
+ * over the original, which reflects its character escapes.
+ */
+ public static CharStream create(TextDocument doc) throws MalformedSourceException {
+ return create(doc, TokenDocumentBehavior.DEFAULT);
+ }
+
/**
* Returns the next character from the input. After a {@link #backup(int)},
* some of the already read chars must be spit out again.
diff --git a/pmd-core/src/main/java/net/sourceforge/pmd/lang/impl/SimpleLanguageModuleBase.java b/pmd-core/src/main/java/net/sourceforge/pmd/lang/impl/SimpleLanguageModuleBase.java
index 6f907780b3..23cf5ee7b1 100644
--- a/pmd-core/src/main/java/net/sourceforge/pmd/lang/impl/SimpleLanguageModuleBase.java
+++ b/pmd-core/src/main/java/net/sourceforge/pmd/lang/impl/SimpleLanguageModuleBase.java
@@ -20,7 +20,7 @@ import net.sourceforge.pmd.lang.LanguageVersionHandler;
* @author Clément Fournier
* @since 7.0.0
*/
-public class SimpleLanguageModuleBase extends LanguageModuleBase {
+public abstract class SimpleLanguageModuleBase extends LanguageModuleBase {
private final Function handler;
@@ -33,6 +33,11 @@ public class SimpleLanguageModuleBase extends LanguageModuleBase {
this.handler = makeHandler;
}
+ @Override
+ public boolean supportsParsing() {
+ return true;
+ }
+
@Override
public LanguageProcessor createProcessor(LanguagePropertyBundle bundle) {
LanguageVersionHandler services = handler.apply(bundle);
diff --git a/pmd-cpp/src/main/java/net/sourceforge/pmd/cpd/CPPLanguage.java b/pmd-cpp/src/main/java/net/sourceforge/pmd/cpd/CPPLanguage.java
deleted file mode 100644
index a3dfce0c96..0000000000
--- a/pmd-cpp/src/main/java/net/sourceforge/pmd/cpd/CPPLanguage.java
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
- * BSD-style license; for more info see http://pmd.sourceforge.net/license.html
- */
-
-package net.sourceforge.pmd.cpd;
-
-import java.util.Properties;
-
-/**
- * Defines the Language module for C/C++
- */
-public class CPPLanguage extends AbstractLanguage {
-
- /**
- * Creates a new instance of {@link CPPLanguage} with the default extensions
- * for c/c++ files.
- */
- public CPPLanguage() {
- this(System.getProperties());
- }
-
- public CPPLanguage(Properties properties) {
- super("C++", "cpp", new CPPTokenizer(), ".h", ".hpp", ".hxx", ".c", ".cpp", ".cxx", ".cc", ".C");
- setProperties(properties);
- }
-
- @Override
- public void setProperties(Properties properties) {
- super.setProperties(properties);
- ((CPPTokenizer) getTokenizer()).setProperties(properties);
- }
-}
diff --git a/pmd-cpp/src/main/java/net/sourceforge/pmd/cpd/CPPTokenizer.java b/pmd-cpp/src/main/java/net/sourceforge/pmd/cpd/CPPTokenizer.java
index 0728e9afbf..bb9f5c2811 100644
--- a/pmd-cpp/src/main/java/net/sourceforge/pmd/cpd/CPPTokenizer.java
+++ b/pmd-cpp/src/main/java/net/sourceforge/pmd/cpd/CPPTokenizer.java
@@ -4,46 +4,37 @@
package net.sourceforge.pmd.cpd;
-import java.util.Properties;
import java.util.regex.Pattern;
-import net.sourceforge.pmd.cpd.internal.JavaCCTokenizer;
+import org.apache.commons.lang3.StringUtils;
+
+import net.sourceforge.pmd.cpd.internal.TokenizerBase;
import net.sourceforge.pmd.cpd.token.JavaCCTokenFilter;
-import net.sourceforge.pmd.cpd.token.TokenFilter;
+import net.sourceforge.pmd.lang.LanguagePropertyBundle;
import net.sourceforge.pmd.lang.TokenManager;
import net.sourceforge.pmd.lang.ast.impl.javacc.CharStream;
import net.sourceforge.pmd.lang.ast.impl.javacc.JavaccToken;
import net.sourceforge.pmd.lang.ast.impl.javacc.JavaccTokenDocument.TokenDocumentBehavior;
import net.sourceforge.pmd.lang.ast.impl.javacc.MalformedSourceException;
+import net.sourceforge.pmd.lang.cpp.CppLanguageModule;
import net.sourceforge.pmd.lang.cpp.ast.CppTokenKinds;
import net.sourceforge.pmd.lang.document.TextDocument;
/**
* The C++ tokenizer.
*/
-public class CPPTokenizer extends JavaCCTokenizer {
+public class CPPTokenizer extends TokenizerBase {
private boolean skipBlocks;
private Pattern skipBlocksStart;
private Pattern skipBlocksEnd;
- private boolean ignoreLiteralSequences = false;
+ private final boolean ignoreLiteralSequences;
- public CPPTokenizer() {
- setProperties(new Properties()); // set the defaults
- }
-
- /**
- * Sets the possible options for the C++ tokenizer.
- *
- * @param properties the properties
- * @see #OPTION_SKIP_BLOCKS
- * @see #OPTION_SKIP_BLOCKS_PATTERN
- * @see #OPTION_IGNORE_LITERAL_SEQUENCES
- */
- public void setProperties(Properties properties) {
- skipBlocks = Boolean.parseBoolean(properties.getProperty(OPTION_SKIP_BLOCKS, Boolean.TRUE.toString()));
- if (skipBlocks) {
- String skipBlocksPattern = properties.getProperty(OPTION_SKIP_BLOCKS_PATTERN, DEFAULT_SKIP_BLOCKS_PATTERN);
+ public CPPTokenizer(LanguagePropertyBundle cppProperties) {
+ ignoreLiteralSequences = cppProperties.getProperty(Tokenizer.CPD_IGNORE_LITERAL_SEQUENCES);
+ String skipBlocksPattern = cppProperties.getProperty(CppLanguageModule.CPD_SKIP_BLOCKS);
+ if (StringUtils.isNotBlank(skipBlocksPattern)) {
+ skipBlocks = true;
String[] split = skipBlocksPattern.split("\\|", 2);
skipBlocksStart = CppBlockSkipper.compileSkipMarker(split[0]);
if (split.length == 1) {
@@ -52,14 +43,15 @@ public class CPPTokenizer extends JavaCCTokenizer {
skipBlocksEnd = CppBlockSkipper.compileSkipMarker(split[1]);
}
}
- ignoreLiteralSequences = Boolean.parseBoolean(properties.getProperty(OPTION_IGNORE_LITERAL_SEQUENCES,
- Boolean.FALSE.toString()));
}
-
@Override
- protected TokenDocumentBehavior tokenBehavior() {
- return new TokenDocumentBehavior(CppTokenKinds.TOKEN_NAMES) {
+ protected TokenManager makeLexerImpl(TextDocument doc) {
+ return CppTokenKinds.newTokenManager(newCharStream(doc));
+ }
+
+ CharStream newCharStream(TextDocument doc) {
+ return CharStream.create(doc, new TokenDocumentBehavior(CppTokenKinds.TOKEN_NAMES) {
@Override
public TextDocument translate(TextDocument text) throws MalformedSourceException {
@@ -68,20 +60,16 @@ public class CPPTokenizer extends JavaCCTokenizer {
}
return new CppEscapeTranslator(text).translateDocument();
}
- };
+ });
}
@Override
- protected TokenManager makeLexerImpl(CharStream sourceCode) {
- return CppTokenKinds.newTokenManager(sourceCode);
- }
-
- @Override
- protected TokenFilter getTokenFilter(final TokenManager tokenManager) {
+ protected TokenManager filterTokenStream(final TokenManager tokenManager) {
return new CppTokenFilter(tokenManager, ignoreLiteralSequences);
}
private static class CppTokenFilter extends JavaCCTokenFilter {
+
private final boolean ignoreLiteralSequences;
private JavaccToken discardingLiteralsUntil = null;
private boolean discardCurrent = false;
@@ -106,8 +94,7 @@ public class CPPTokenizer extends JavaCCTokenizer {
discardCurrent = true;
}
} else if (kind == CppTokenKinds.LCURLYBRACE) {
- final JavaccToken finalToken = findEndOfSequenceOfLiterals(remainingTokens);
- discardingLiteralsUntil = finalToken;
+ discardingLiteralsUntil = findEndOfSequenceOfLiterals(remainingTokens);
}
}
}
diff --git a/pmd-cpp/src/main/java/net/sourceforge/pmd/lang/cpp/CppLanguageModule.java b/pmd-cpp/src/main/java/net/sourceforge/pmd/lang/cpp/CppLanguageModule.java
new file mode 100644
index 0000000000..eadecc970f
--- /dev/null
+++ b/pmd-cpp/src/main/java/net/sourceforge/pmd/lang/cpp/CppLanguageModule.java
@@ -0,0 +1,56 @@
+/*
+ * BSD-style license; for more info see http://pmd.sourceforge.net/license.html
+ */
+
+package net.sourceforge.pmd.lang.cpp;
+
+import net.sourceforge.pmd.cpd.CPPTokenizer;
+import net.sourceforge.pmd.cpd.Tokenizer;
+import net.sourceforge.pmd.lang.CpdOnlyLanguageModuleBase;
+import net.sourceforge.pmd.lang.LanguagePropertyBundle;
+import net.sourceforge.pmd.lang.LanguageRegistry;
+import net.sourceforge.pmd.properties.PropertyDescriptor;
+import net.sourceforge.pmd.properties.PropertyFactory;
+
+/**
+ * Defines the Language module for C/C++
+ */
+public class CppLanguageModule extends CpdOnlyLanguageModuleBase {
+
+
+ public static final PropertyDescriptor CPD_SKIP_BLOCKS =
+ PropertyFactory.stringProperty("cpdSkipBlocksPattern")
+ .defaultValue("#if 0|#endif")
+ .desc("Specifies a start and end delimiter for CPD to completely ignore. "
+ + "The delimiters are separated by a pipe |. The default skips code "
+ + " that is conditionally compiled out. Set this property to empty to disable this.")
+ .build();
+
+ /**
+ * Creates a new instance of {@link CppLanguageModule} with the default extensions
+ * for c/c++ files.
+ */
+ public CppLanguageModule() {
+ super(LanguageMetadata.withId("cpp")
+ .name("C++")
+ .addDefaultVersion("any")
+ .extensions("h", "hpp", "hxx", "c", "cpp", "cxx", "cc", "C"));
+ }
+
+ public static CppLanguageModule getInstance() {
+ return (CppLanguageModule) LanguageRegistry.CPD.getLanguageById("cpp");
+ }
+
+ @Override
+ public LanguagePropertyBundle newPropertyBundle() {
+ LanguagePropertyBundle bundle = super.newPropertyBundle();
+ bundle.definePropertyDescriptor(Tokenizer.CPD_IGNORE_LITERAL_SEQUENCES);
+ bundle.definePropertyDescriptor(CPD_SKIP_BLOCKS);
+ return bundle;
+ }
+
+ @Override
+ public Tokenizer createCpdTokenizer(LanguagePropertyBundle bundle) {
+ return new CPPTokenizer(bundle);
+ }
+}
diff --git a/pmd-cpp/src/main/resources/META-INF/services/net.sourceforge.pmd.cpd.Language b/pmd-cpp/src/main/resources/META-INF/services/net.sourceforge.pmd.cpd.Language
deleted file mode 100644
index 2170e55e7f..0000000000
--- a/pmd-cpp/src/main/resources/META-INF/services/net.sourceforge.pmd.cpd.Language
+++ /dev/null
@@ -1 +0,0 @@
-net.sourceforge.pmd.cpd.CPPLanguage
diff --git a/pmd-cpp/src/main/resources/META-INF/services/net.sourceforge.pmd.lang.Language b/pmd-cpp/src/main/resources/META-INF/services/net.sourceforge.pmd.lang.Language
new file mode 100644
index 0000000000..ecb3ec91fa
--- /dev/null
+++ b/pmd-cpp/src/main/resources/META-INF/services/net.sourceforge.pmd.lang.Language
@@ -0,0 +1 @@
+net.sourceforge.pmd.lang.cpp.CppLanguageModule
diff --git a/pmd-cpp/src/test/java/net/sourceforge/pmd/cpd/CppCharStreamTest.java b/pmd-cpp/src/test/java/net/sourceforge/pmd/cpd/CppCharStreamTest.java
index 3546db9e6c..a44e7cf970 100644
--- a/pmd-cpp/src/test/java/net/sourceforge/pmd/cpd/CppCharStreamTest.java
+++ b/pmd-cpp/src/test/java/net/sourceforge/pmd/cpd/CppCharStreamTest.java
@@ -8,20 +8,20 @@ import static org.junit.jupiter.api.Assertions.assertEquals;
import java.io.IOException;
-import org.checkerframework.checker.nullness.qual.NonNull;
import org.junit.jupiter.api.Test;
import net.sourceforge.pmd.lang.ast.impl.javacc.CharStream;
-import net.sourceforge.pmd.lang.document.CpdCompat;
+import net.sourceforge.pmd.lang.cpp.CppLanguageModule;
import net.sourceforge.pmd.lang.document.TextDocument;
import net.sourceforge.pmd.lang.document.TextFile;
class CppCharStreamTest {
- @NonNull
- public CharStream charStreamFor(String source) throws IOException {
- TextDocument textDoc = TextDocument.readOnlyString(source, TextFile.UNKNOWN_FILENAME, CpdCompat.dummyVersion());
- return CharStream.create(textDoc, new CPPTokenizer().tokenBehavior());
+ public CharStream charStreamFor(String source) {
+ CppLanguageModule cpp = CppLanguageModule.getInstance();
+ TextDocument textDoc = TextDocument.readOnlyString(source, TextFile.UNKNOWN_FILENAME, cpp.getDefaultVersion());
+ CPPTokenizer tokenizer = new CPPTokenizer(cpp.newPropertyBundle());
+ return tokenizer.newCharStream(textDoc);
}
@Test
diff --git a/pmd-cs/src/main/java/net/sourceforge/pmd/cpd/CsLanguage.java b/pmd-cs/src/main/java/net/sourceforge/pmd/cpd/CsLanguage.java
deleted file mode 100644
index e54edcddbe..0000000000
--- a/pmd-cs/src/main/java/net/sourceforge/pmd/cpd/CsLanguage.java
+++ /dev/null
@@ -1,28 +0,0 @@
-/**
- * BSD-style license; for more info see http://pmd.sourceforge.net/license.html
- */
-
-package net.sourceforge.pmd.cpd;
-
-import java.util.Properties;
-
-/**
- * Language implementation for C#
- */
-public class CsLanguage extends AbstractLanguage {
-
- public CsLanguage() {
- this(System.getProperties());
- }
-
- public CsLanguage(Properties properties) {
- super("C#", "cs", new CsTokenizer(), ".cs");
- setProperties(properties);
- }
-
- @Override
- public final void setProperties(Properties properties) {
- CsTokenizer tokenizer = (CsTokenizer) getTokenizer();
- tokenizer.setProperties(properties);
- }
-}
diff --git a/pmd-cs/src/main/java/net/sourceforge/pmd/cpd/CsTokenizer.java b/pmd-cs/src/main/java/net/sourceforge/pmd/cpd/CsTokenizer.java
index 64822f1a38..d58ccdb0d7 100644
--- a/pmd-cs/src/main/java/net/sourceforge/pmd/cpd/CsTokenizer.java
+++ b/pmd-cs/src/main/java/net/sourceforge/pmd/cpd/CsTokenizer.java
@@ -4,15 +4,15 @@
package net.sourceforge.pmd.cpd;
-import java.util.Properties;
-
import org.antlr.v4.runtime.CharStream;
import org.antlr.v4.runtime.Lexer;
import net.sourceforge.pmd.cpd.internal.AntlrTokenizer;
import net.sourceforge.pmd.cpd.token.AntlrTokenFilter;
+import net.sourceforge.pmd.cpd.token.internal.BaseTokenFilter;
+import net.sourceforge.pmd.lang.LanguagePropertyBundle;
+import net.sourceforge.pmd.lang.TokenManager;
import net.sourceforge.pmd.lang.ast.impl.antlr4.AntlrToken;
-import net.sourceforge.pmd.lang.ast.impl.antlr4.AntlrTokenManager;
import net.sourceforge.pmd.lang.cs.ast.CSharpLexer;
/**
@@ -20,26 +20,14 @@ import net.sourceforge.pmd.lang.cs.ast.CSharpLexer;
*/
public class CsTokenizer extends AntlrTokenizer {
- private boolean ignoreUsings = false;
- private boolean ignoreLiteralSequences = false;
- private boolean ignoreAttributes = false;
+ private final boolean ignoreUsings;
+ private final boolean ignoreLiteralSequences;
+ private final boolean ignoreAttributes;
- /**
- * Sets the possible options for the C# tokenizer.
- *
- * @param properties the properties
- * @see #IGNORE_USINGS
- * @see #OPTION_IGNORE_LITERAL_SEQUENCES
- * @see #IGNORE_ANNOTATIONS
- */
- public void setProperties(Properties properties) {
- ignoreUsings = getBooleanProperty(properties, IGNORE_USINGS);
- ignoreLiteralSequences = getBooleanProperty(properties, OPTION_IGNORE_LITERAL_SEQUENCES);
- ignoreAttributes = getBooleanProperty(properties, IGNORE_ANNOTATIONS);
- }
-
- private boolean getBooleanProperty(final Properties properties, final String property) {
- return Boolean.parseBoolean(properties.getProperty(property, Boolean.FALSE.toString()));
+ public CsTokenizer(LanguagePropertyBundle properties) {
+ ignoreUsings = properties.getProperty(Tokenizer.CPD_IGNORE_IMPORTS);
+ ignoreLiteralSequences = properties.getProperty(Tokenizer.CPD_IGNORE_LITERAL_SEQUENCES);
+ ignoreAttributes = properties.getProperty(Tokenizer.CPD_IGNORE_METADATA);
}
@Override
@@ -48,7 +36,7 @@ public class CsTokenizer extends AntlrTokenizer {
}
@Override
- protected AntlrTokenFilter getTokenFilter(final AntlrTokenManager tokenManager) {
+ protected TokenManager filterTokenStream(TokenManager tokenManager) {
return new CsTokenFilter(tokenManager, ignoreUsings, ignoreLiteralSequences, ignoreAttributes);
}
@@ -60,7 +48,7 @@ public class CsTokenizer extends AntlrTokenizer {
* If the --ignoreUsings flag is provided, using directives are filtered out.
*
*/
- private static class CsTokenFilter extends AntlrTokenFilter {
+ private static class CsTokenFilter extends BaseTokenFilter {
private enum UsingState {
KEYWORD, // just encountered the using keyword
IDENTIFIER, // just encountered an identifier or var keyword
@@ -75,7 +63,7 @@ public class CsTokenizer extends AntlrTokenizer {
private AntlrToken discardingLiteralsUntil = null;
private boolean discardCurrent = false;
- CsTokenFilter(final AntlrTokenManager tokenManager, boolean ignoreUsings, boolean ignoreLiteralSequences, boolean ignoreAttributes) {
+ CsTokenFilter(final TokenManager tokenManager, boolean ignoreUsings, boolean ignoreLiteralSequences, boolean ignoreAttributes) {
super(tokenManager);
this.ignoreUsings = ignoreUsings;
this.ignoreLiteralSequences = ignoreLiteralSequences;
diff --git a/pmd-cs/src/main/java/net/sourceforge/pmd/lang/cs/CsLanguageModule.java b/pmd-cs/src/main/java/net/sourceforge/pmd/lang/cs/CsLanguageModule.java
new file mode 100644
index 0000000000..5f122b8e35
--- /dev/null
+++ b/pmd-cs/src/main/java/net/sourceforge/pmd/lang/cs/CsLanguageModule.java
@@ -0,0 +1,42 @@
+/*
+ * BSD-style license; for more info see http://pmd.sourceforge.net/license.html
+ */
+
+package net.sourceforge.pmd.lang.cs;
+
+import net.sourceforge.pmd.cpd.CsTokenizer;
+import net.sourceforge.pmd.cpd.Tokenizer;
+import net.sourceforge.pmd.lang.CpdOnlyLanguageModuleBase;
+import net.sourceforge.pmd.lang.LanguagePropertyBundle;
+import net.sourceforge.pmd.lang.LanguageRegistry;
+
+/**
+ * Defines the Language module for C#.
+ */
+public class CsLanguageModule extends CpdOnlyLanguageModuleBase {
+
+ public CsLanguageModule() {
+ super(LanguageMetadata.withId("cs")
+ .name("C#")
+ .addDefaultVersion("any")
+ .extensions("cs"));
+ }
+
+ public static CsLanguageModule getInstance() {
+ return (CsLanguageModule) LanguageRegistry.CPD.getLanguageById("cs");
+ }
+
+ @Override
+ public LanguagePropertyBundle newPropertyBundle() {
+ LanguagePropertyBundle bundle = super.newPropertyBundle();
+ bundle.definePropertyDescriptor(Tokenizer.CPD_IGNORE_LITERAL_SEQUENCES);
+ bundle.definePropertyDescriptor(Tokenizer.CPD_IGNORE_IMPORTS);
+ bundle.definePropertyDescriptor(Tokenizer.CPD_IGNORE_METADATA);
+ return bundle;
+ }
+
+ @Override
+ public Tokenizer createCpdTokenizer(LanguagePropertyBundle bundle) {
+ return new CsTokenizer(bundle);
+ }
+}
diff --git a/pmd-cs/src/main/resources/META-INF/services/net.sourceforge.pmd.cpd.Language b/pmd-cs/src/main/resources/META-INF/services/net.sourceforge.pmd.cpd.Language
deleted file mode 100644
index 76459b4741..0000000000
--- a/pmd-cs/src/main/resources/META-INF/services/net.sourceforge.pmd.cpd.Language
+++ /dev/null
@@ -1 +0,0 @@
-net.sourceforge.pmd.cpd.CsLanguage
diff --git a/pmd-cs/src/main/resources/META-INF/services/net.sourceforge.pmd.lang.Language b/pmd-cs/src/main/resources/META-INF/services/net.sourceforge.pmd.lang.Language
new file mode 100644
index 0000000000..1b979f896f
--- /dev/null
+++ b/pmd-cs/src/main/resources/META-INF/services/net.sourceforge.pmd.lang.Language
@@ -0,0 +1 @@
+net.sourceforge.pmd.lang.cs.CsLanguageModule
diff --git a/pmd-dart/src/main/java/net/sourceforge/pmd/cpd/DartTokenizer.java b/pmd-dart/src/main/java/net/sourceforge/pmd/cpd/DartTokenizer.java
index 6c847754a2..06a2527d9e 100644
--- a/pmd-dart/src/main/java/net/sourceforge/pmd/cpd/DartTokenizer.java
+++ b/pmd-dart/src/main/java/net/sourceforge/pmd/cpd/DartTokenizer.java
@@ -9,8 +9,9 @@ import org.antlr.v4.runtime.Lexer;
import net.sourceforge.pmd.cpd.internal.AntlrTokenizer;
import net.sourceforge.pmd.cpd.token.AntlrTokenFilter;
+import net.sourceforge.pmd.cpd.token.internal.BaseTokenFilter;
+import net.sourceforge.pmd.lang.TokenManager;
import net.sourceforge.pmd.lang.ast.impl.antlr4.AntlrToken;
-import net.sourceforge.pmd.lang.ast.impl.antlr4.AntlrTokenManager;
import net.sourceforge.pmd.lang.dart.ast.DartLexer;
/**
@@ -24,7 +25,7 @@ public class DartTokenizer extends AntlrTokenizer {
}
@Override
- protected AntlrTokenFilter getTokenFilter(final AntlrTokenManager tokenManager) {
+ protected TokenManager filterTokenStream(TokenManager tokenManager) {
return new DartTokenFilter(tokenManager);
}
@@ -36,12 +37,12 @@ public class DartTokenizer extends AntlrTokenizer {
* enables comment-based CPD suppression.
*
*/
- private static class DartTokenFilter extends AntlrTokenFilter {
+ private static class DartTokenFilter extends BaseTokenFilter {
private boolean discardingLibraryAndImport = false;
private boolean discardingNL = false;
private boolean discardingSemicolon = false;
- /* default */ DartTokenFilter(final AntlrTokenManager tokenManager) {
+ /* default */ DartTokenFilter(final TokenManager tokenManager) {
super(tokenManager);
}
diff --git a/pmd-groovy/src/main/java/net/sourceforge/pmd/cpd/GroovyTokenizer.java b/pmd-groovy/src/main/java/net/sourceforge/pmd/cpd/GroovyTokenizer.java
index 654342e2b6..79ecf7b6b5 100644
--- a/pmd-groovy/src/main/java/net/sourceforge/pmd/cpd/GroovyTokenizer.java
+++ b/pmd-groovy/src/main/java/net/sourceforge/pmd/cpd/GroovyTokenizer.java
@@ -10,6 +10,7 @@ import org.codehaus.groovy.antlr.SourceInfo;
import org.codehaus.groovy.antlr.parser.GroovyLexer;
import net.sourceforge.pmd.lang.ast.TokenMgrError;
+import net.sourceforge.pmd.lang.document.TextDocument;
import groovyjarjarantlr.Token;
import groovyjarjarantlr.TokenStream;
@@ -21,7 +22,7 @@ import groovyjarjarantlr.TokenStreamException;
public class GroovyTokenizer implements Tokenizer {
@Override
- public void tokenize(SourceCode sourceCode, Tokens tokenEntries) {
+ public void tokenize(TextDocument sourceCode, Tokens tokenEntries) {
StringBuilder buffer = sourceCode.getCodeBuffer();
GroovyLexer lexer = new GroovyLexer(new StringReader(buffer.toString()));
diff --git a/pmd-html/src/main/java/net/sourceforge/pmd/lang/html/ast/HtmlTokenizer.java b/pmd-html/src/main/java/net/sourceforge/pmd/lang/html/ast/HtmlTokenizer.java
index e7fad1cbe9..8f70dc69f0 100644
--- a/pmd-html/src/main/java/net/sourceforge/pmd/lang/html/ast/HtmlTokenizer.java
+++ b/pmd-html/src/main/java/net/sourceforge/pmd/lang/html/ast/HtmlTokenizer.java
@@ -7,7 +7,6 @@ package net.sourceforge.pmd.lang.html.ast;
import java.io.IOException;
import java.io.UncheckedIOException;
-import net.sourceforge.pmd.cpd.SourceCode;
import net.sourceforge.pmd.cpd.TokenEntry;
import net.sourceforge.pmd.cpd.Tokenizer;
import net.sourceforge.pmd.cpd.Tokens;
@@ -22,7 +21,7 @@ import net.sourceforge.pmd.lang.html.HtmlLanguageModule;
public class HtmlTokenizer implements Tokenizer {
@Override
- public void tokenize(SourceCode sourceCode, Tokens tokenEntries) {
+ public void tokenize(TextDocument sourceCode, Tokens tokenEntries) {
HtmlLanguageModule html = HtmlLanguageModule.getInstance();
try (LanguageProcessor processor = html.createProcessor(html.newPropertyBundle());
diff --git a/pmd-java/src/main/java/net/sourceforge/pmd/cpd/JavaTokenizer.java b/pmd-java/src/main/java/net/sourceforge/pmd/cpd/JavaTokenizer.java
index 525d1731b2..8a54eca671 100644
--- a/pmd-java/src/main/java/net/sourceforge/pmd/cpd/JavaTokenizer.java
+++ b/pmd-java/src/main/java/net/sourceforge/pmd/cpd/JavaTokenizer.java
@@ -11,11 +11,10 @@ import java.util.Properties;
import net.sourceforge.pmd.cpd.internal.JavaCCTokenizer;
import net.sourceforge.pmd.cpd.token.JavaCCTokenFilter;
-import net.sourceforge.pmd.cpd.token.TokenFilter;
import net.sourceforge.pmd.lang.TokenManager;
import net.sourceforge.pmd.lang.ast.impl.javacc.CharStream;
import net.sourceforge.pmd.lang.ast.impl.javacc.JavaccToken;
-import net.sourceforge.pmd.lang.ast.impl.javacc.JavaccTokenDocument;
+import net.sourceforge.pmd.lang.document.TextDocument;
import net.sourceforge.pmd.lang.java.ast.InternalApiBridge;
import net.sourceforge.pmd.lang.java.ast.JavaTokenKinds;
@@ -37,23 +36,18 @@ public class JavaTokenizer extends JavaCCTokenizer {
}
@Override
- public void tokenize(SourceCode sourceCode, Tokens tokenEntries) throws IOException {
+ public void tokenize(TextDocument sourceCode, Tokens tokenEntries) throws IOException {
constructorDetector = new ConstructorDetector(ignoreIdentifiers);
super.tokenize(sourceCode, tokenEntries);
}
@Override
- protected JavaccTokenDocument.TokenDocumentBehavior tokenBehavior() {
- return InternalApiBridge.javaTokenDoc();
+ protected TokenManager makeLexerImpl(TextDocument doc) {
+ return JavaTokenKinds.newTokenManager(CharStream.create(doc, InternalApiBridge.javaTokenDoc()));
}
@Override
- protected TokenManager makeLexerImpl(CharStream sourceCode) {
- return JavaTokenKinds.newTokenManager(sourceCode);
- }
-
- @Override
- protected TokenFilter getTokenFilter(TokenManager tokenManager) {
+ protected TokenManager filterTokenStream(TokenManager tokenManager) {
return new JavaTokenFilter(tokenManager, ignoreAnnotations);
}
diff --git a/pmd-javascript/src/main/java/net/sourceforge/pmd/cpd/EcmascriptTokenizer.java b/pmd-javascript/src/main/java/net/sourceforge/pmd/cpd/EcmascriptTokenizer.java
index d66d74949f..1a2de570ec 100644
--- a/pmd-javascript/src/main/java/net/sourceforge/pmd/cpd/EcmascriptTokenizer.java
+++ b/pmd-javascript/src/main/java/net/sourceforge/pmd/cpd/EcmascriptTokenizer.java
@@ -8,6 +8,7 @@ import net.sourceforge.pmd.cpd.internal.JavaCCTokenizer;
import net.sourceforge.pmd.lang.TokenManager;
import net.sourceforge.pmd.lang.ast.impl.javacc.CharStream;
import net.sourceforge.pmd.lang.ast.impl.javacc.JavaccToken;
+import net.sourceforge.pmd.lang.document.TextDocument;
import net.sourceforge.pmd.lang.ecmascript5.ast.Ecmascript5TokenKinds;
/**
@@ -16,8 +17,8 @@ import net.sourceforge.pmd.lang.ecmascript5.ast.Ecmascript5TokenKinds;
public class EcmascriptTokenizer extends JavaCCTokenizer {
@Override
- protected TokenManager makeLexerImpl(CharStream sourceCode) {
- return Ecmascript5TokenKinds.newTokenManager(sourceCode);
+ protected TokenManager makeLexerImpl(TextDocument doc) {
+ return Ecmascript5TokenKinds.newTokenManager(CharStream.create(doc));
}
@Override
diff --git a/pmd-jsp/src/main/java/net/sourceforge/pmd/cpd/JSPTokenizer.java b/pmd-jsp/src/main/java/net/sourceforge/pmd/cpd/JSPTokenizer.java
index 5617484d1b..d32b96973e 100644
--- a/pmd-jsp/src/main/java/net/sourceforge/pmd/cpd/JSPTokenizer.java
+++ b/pmd-jsp/src/main/java/net/sourceforge/pmd/cpd/JSPTokenizer.java
@@ -8,20 +8,15 @@ import net.sourceforge.pmd.cpd.internal.JavaCCTokenizer;
import net.sourceforge.pmd.lang.TokenManager;
import net.sourceforge.pmd.lang.ast.impl.javacc.CharStream;
import net.sourceforge.pmd.lang.ast.impl.javacc.JavaccToken;
-import net.sourceforge.pmd.lang.ast.impl.javacc.JavaccTokenDocument;
+import net.sourceforge.pmd.lang.document.TextDocument;
import net.sourceforge.pmd.lang.jsp.ast.JspParser;
import net.sourceforge.pmd.lang.jsp.ast.JspTokenKinds;
public class JSPTokenizer extends JavaCCTokenizer {
@Override
- protected TokenManager makeLexerImpl(CharStream sourceCode) {
- return JspTokenKinds.newTokenManager(sourceCode);
- }
-
- @Override
- protected JavaccTokenDocument.TokenDocumentBehavior tokenBehavior() {
- return JspParser.getTokenBehavior();
+ protected TokenManager makeLexerImpl(TextDocument doc) {
+ return JspTokenKinds.newTokenManager(CharStream.create(doc, JspParser.getTokenBehavior()));
}
}
diff --git a/pmd-matlab/src/main/java/net/sourceforge/pmd/cpd/MatlabTokenizer.java b/pmd-matlab/src/main/java/net/sourceforge/pmd/cpd/MatlabTokenizer.java
index 9459c44696..b2233923b3 100644
--- a/pmd-matlab/src/main/java/net/sourceforge/pmd/cpd/MatlabTokenizer.java
+++ b/pmd-matlab/src/main/java/net/sourceforge/pmd/cpd/MatlabTokenizer.java
@@ -6,8 +6,8 @@ package net.sourceforge.pmd.cpd;
import net.sourceforge.pmd.cpd.internal.JavaCCTokenizer;
import net.sourceforge.pmd.lang.TokenManager;
-import net.sourceforge.pmd.lang.ast.impl.javacc.CharStream;
import net.sourceforge.pmd.lang.ast.impl.javacc.JavaccToken;
+import net.sourceforge.pmd.lang.document.TextDocument;
import net.sourceforge.pmd.lang.matlab.ast.MatlabTokenKinds;
/**
@@ -16,7 +16,7 @@ import net.sourceforge.pmd.lang.matlab.ast.MatlabTokenKinds;
public class MatlabTokenizer extends JavaCCTokenizer {
@Override
- protected TokenManager makeLexerImpl(CharStream sourceCode) {
- return MatlabTokenKinds.newTokenManager(sourceCode);
+ protected TokenManager makeLexerImpl(TextDocument doc) {
+ return MatlabTokenKinds.newTokenManager(doc);
}
}
diff --git a/pmd-modelica/src/main/java/net/sourceforge/pmd/cpd/ModelicaTokenizer.java b/pmd-modelica/src/main/java/net/sourceforge/pmd/cpd/ModelicaTokenizer.java
index 3258a3cda7..61bbad3226 100644
--- a/pmd-modelica/src/main/java/net/sourceforge/pmd/cpd/ModelicaTokenizer.java
+++ b/pmd-modelica/src/main/java/net/sourceforge/pmd/cpd/ModelicaTokenizer.java
@@ -7,20 +7,20 @@ package net.sourceforge.pmd.cpd;
import net.sourceforge.pmd.cpd.internal.JavaCCTokenizer;
import net.sourceforge.pmd.cpd.token.JavaCCTokenFilter;
import net.sourceforge.pmd.lang.TokenManager;
-import net.sourceforge.pmd.lang.ast.impl.javacc.CharStream;
import net.sourceforge.pmd.lang.ast.impl.javacc.JavaccToken;
+import net.sourceforge.pmd.lang.document.TextDocument;
import net.sourceforge.pmd.lang.modelica.ast.ModelicaTokenKinds;
public class ModelicaTokenizer extends JavaCCTokenizer {
@Override
- protected TokenManager makeLexerImpl(CharStream sourceCode) {
- return ModelicaTokenKinds.newTokenManager(sourceCode);
+ protected TokenManager makeLexerImpl(TextDocument doc) {
+ return ModelicaTokenKinds.newTokenManager(doc);
}
@Override
- protected JavaCCTokenFilter getTokenFilter(TokenManager tokenManager) {
+ protected TokenManager filterTokenStream(TokenManager tokenManager) {
return new ModelicaTokenFilter(tokenManager);
}
diff --git a/pmd-objectivec/src/main/java/net/sourceforge/pmd/cpd/ObjectiveCTokenizer.java b/pmd-objectivec/src/main/java/net/sourceforge/pmd/cpd/ObjectiveCTokenizer.java
index acccfcd24a..6c338b4067 100644
--- a/pmd-objectivec/src/main/java/net/sourceforge/pmd/cpd/ObjectiveCTokenizer.java
+++ b/pmd-objectivec/src/main/java/net/sourceforge/pmd/cpd/ObjectiveCTokenizer.java
@@ -6,8 +6,8 @@ package net.sourceforge.pmd.cpd;
import net.sourceforge.pmd.cpd.internal.JavaCCTokenizer;
import net.sourceforge.pmd.lang.TokenManager;
-import net.sourceforge.pmd.lang.ast.impl.javacc.CharStream;
import net.sourceforge.pmd.lang.ast.impl.javacc.JavaccToken;
+import net.sourceforge.pmd.lang.document.TextDocument;
import net.sourceforge.pmd.lang.objectivec.ast.ObjectiveCTokenKinds;
/**
@@ -16,7 +16,7 @@ import net.sourceforge.pmd.lang.objectivec.ast.ObjectiveCTokenKinds;
public class ObjectiveCTokenizer extends JavaCCTokenizer {
@Override
- protected TokenManager makeLexerImpl(CharStream sourceCode) {
- return ObjectiveCTokenKinds.newTokenManager(sourceCode);
+ protected TokenManager makeLexerImpl(TextDocument doc) {
+ return ObjectiveCTokenKinds.newTokenManager(doc);
}
}
diff --git a/pmd-php/src/main/java/net/sourceforge/pmd/cpd/PHPTokenizer.java b/pmd-php/src/main/java/net/sourceforge/pmd/cpd/PHPTokenizer.java
index 974a32c1e5..b63cfecd0e 100644
--- a/pmd-php/src/main/java/net/sourceforge/pmd/cpd/PHPTokenizer.java
+++ b/pmd-php/src/main/java/net/sourceforge/pmd/cpd/PHPTokenizer.java
@@ -6,13 +6,15 @@ package net.sourceforge.pmd.cpd;
import java.util.List;
+import net.sourceforge.pmd.lang.document.TextDocument;
+
/**
* Simple tokenizer for PHP.
*/
public class PHPTokenizer implements Tokenizer {
@Override
- public void tokenize(SourceCode tokens, Tokens tokenEntries) {
+ public void tokenize(TextDocument tokens, Tokens tokenEntries) {
List code = tokens.getCode();
for (int i = 0; i < code.size(); i++) {
String currentLine = code.get(i);
diff --git a/pmd-plsql/src/main/java/net/sourceforge/pmd/cpd/PLSQLTokenizer.java b/pmd-plsql/src/main/java/net/sourceforge/pmd/cpd/PLSQLTokenizer.java
index 77abbf8794..4d66cf3089 100644
--- a/pmd-plsql/src/main/java/net/sourceforge/pmd/cpd/PLSQLTokenizer.java
+++ b/pmd-plsql/src/main/java/net/sourceforge/pmd/cpd/PLSQLTokenizer.java
@@ -8,8 +8,8 @@ import java.util.Properties;
import net.sourceforge.pmd.cpd.internal.JavaCCTokenizer;
import net.sourceforge.pmd.lang.TokenManager;
-import net.sourceforge.pmd.lang.ast.impl.javacc.CharStream;
import net.sourceforge.pmd.lang.ast.impl.javacc.JavaccToken;
+import net.sourceforge.pmd.lang.document.TextDocument;
import net.sourceforge.pmd.lang.plsql.ast.PLSQLTokenKinds;
public class PLSQLTokenizer extends JavaCCTokenizer {
@@ -66,7 +66,7 @@ public class PLSQLTokenizer extends JavaCCTokenizer {
}
@Override
- protected TokenManager makeLexerImpl(CharStream sourceCode) {
- return PLSQLTokenKinds.newTokenManager(sourceCode);
+ protected TokenManager makeLexerImpl(TextDocument doc) {
+ return PLSQLTokenKinds.newTokenManager(doc);
}
}
diff --git a/pmd-python/src/main/java/net/sourceforge/pmd/cpd/PythonTokenizer.java b/pmd-python/src/main/java/net/sourceforge/pmd/cpd/PythonTokenizer.java
index c80d572f67..89f8dce9ae 100644
--- a/pmd-python/src/main/java/net/sourceforge/pmd/cpd/PythonTokenizer.java
+++ b/pmd-python/src/main/java/net/sourceforge/pmd/cpd/PythonTokenizer.java
@@ -10,8 +10,8 @@ import net.sourceforge.pmd.cpd.internal.JavaCCTokenizer;
import net.sourceforge.pmd.lang.TokenManager;
import net.sourceforge.pmd.lang.ast.impl.javacc.CharStream;
import net.sourceforge.pmd.lang.ast.impl.javacc.JavaccToken;
-import net.sourceforge.pmd.lang.ast.impl.javacc.JavaccTokenDocument;
import net.sourceforge.pmd.lang.ast.impl.javacc.JavaccTokenDocument.TokenDocumentBehavior;
+import net.sourceforge.pmd.lang.document.TextDocument;
import net.sourceforge.pmd.lang.python.ast.PythonTokenKinds;
/**
@@ -24,13 +24,8 @@ public class PythonTokenizer extends JavaCCTokenizer {
private static final TokenDocumentBehavior TOKEN_BEHAVIOR = new TokenDocumentBehavior(PythonTokenKinds.TOKEN_NAMES);
@Override
- protected TokenManager makeLexerImpl(CharStream sourceCode) {
- return PythonTokenKinds.newTokenManager(sourceCode);
- }
-
- @Override
- protected JavaccTokenDocument.TokenDocumentBehavior tokenBehavior() {
- return TOKEN_BEHAVIOR;
+ protected TokenManager makeLexerImpl(TextDocument doc) {
+ return PythonTokenKinds.newTokenManager(CharStream.create(doc, TOKEN_BEHAVIOR));
}
@Override
diff --git a/pmd-scala-modules/pmd-scala-common/src/main/java/net/sourceforge/pmd/cpd/ScalaTokenizer.java b/pmd-scala-modules/pmd-scala-common/src/main/java/net/sourceforge/pmd/cpd/ScalaTokenizer.java
index 060f24cbd4..5c2b98892b 100644
--- a/pmd-scala-modules/pmd-scala-common/src/main/java/net/sourceforge/pmd/cpd/ScalaTokenizer.java
+++ b/pmd-scala-modules/pmd-scala-common/src/main/java/net/sourceforge/pmd/cpd/ScalaTokenizer.java
@@ -62,7 +62,7 @@ public class ScalaTokenizer implements Tokenizer {
}
@Override
- public void tokenize(SourceCode sourceCode, Tokens tokenEntries) throws IOException {
+ public void tokenize(TextDocument sourceCode, Tokens tokenEntries) throws IOException {
try (TextDocument textDoc = TextDocument.create(CpdCompat.cpdCompat(sourceCode))) {
diff --git a/pmd-visualforce/src/main/java/net/sourceforge/pmd/cpd/VfTokenizer.java b/pmd-visualforce/src/main/java/net/sourceforge/pmd/cpd/VfTokenizer.java
index 17926160b0..d831ee0f7d 100644
--- a/pmd-visualforce/src/main/java/net/sourceforge/pmd/cpd/VfTokenizer.java
+++ b/pmd-visualforce/src/main/java/net/sourceforge/pmd/cpd/VfTokenizer.java
@@ -6,7 +6,6 @@ package net.sourceforge.pmd.cpd;
import net.sourceforge.pmd.cpd.internal.JavaCCTokenizer;
import net.sourceforge.pmd.lang.TokenManager;
-import net.sourceforge.pmd.lang.ast.impl.javacc.CharStream;
import net.sourceforge.pmd.lang.ast.impl.javacc.JavaEscapeTranslator;
import net.sourceforge.pmd.lang.ast.impl.javacc.JavaccToken;
import net.sourceforge.pmd.lang.ast.impl.javacc.JavaccTokenDocument;
@@ -21,8 +20,8 @@ import net.sourceforge.pmd.lang.vf.ast.VfTokenKinds;
public class VfTokenizer extends JavaCCTokenizer {
@Override
- protected TokenManager makeLexerImpl(CharStream sourceCode) {
- return VfTokenKinds.newTokenManager(sourceCode);
+ protected TokenManager makeLexerImpl(TextDocument doc) {
+ return VfTokenKinds.newTokenManager(doc);
}
@Override