Make pmd language have a hook to launch CPD

This allows removing the CPD language instances,
sharing more logic between CPD and PMD, and using
language properties to configure CPD and PMD uniformly.
This commit is contained in:
Clément Fournier
2023-02-10 16:24:11 +01:00
parent eee8b95a68
commit d4c05d1fb5
39 changed files with 357 additions and 283 deletions

View File

@ -13,6 +13,7 @@ import org.antlr.runtime.Token;
import net.sourceforge.pmd.lang.apex.ApexJorjeLogging;
import net.sourceforge.pmd.lang.ast.TokenMgrError;
import net.sourceforge.pmd.lang.document.TextDocument;
import apex.jorje.parser.impl.ApexLexer;
@ -35,7 +36,7 @@ public class ApexTokenizer implements Tokenizer {
}
@Override
public void tokenize(SourceCode sourceCode, Tokens tokenEntries) {
public void tokenize(TextDocument sourceCode, Tokens tokenEntries) {
StringBuilder code = sourceCode.getCodeBuffer();
ANTLRStringStream ass = new ANTLRStringStream(code.toString());

View File

@ -7,6 +7,8 @@ package net.sourceforge.pmd.cpd;
import java.util.List;
import java.util.Locale;
import net.sourceforge.pmd.lang.document.TextDocument;
/**
*
* @author Zev Blut zb@ubit.com
@ -48,7 +50,7 @@ public abstract class AbstractTokenizer implements Tokenizer {
private boolean downcaseString = true;
@Override
public void tokenize(SourceCode tokens, Tokens tokenEntries) {
public void tokenize(TextDocument tokens, Tokens tokenEntries) {
code = tokens.getCode();
for (lineNumber = 0; lineNumber < code.size(); lineNumber++) {

View File

@ -9,6 +9,7 @@ import java.util.regex.Pattern;
import org.apache.commons.lang3.StringUtils;
import net.sourceforge.pmd.lang.document.TextDocument;
import net.sourceforge.pmd.util.StringUtil;
/**
@ -60,7 +61,7 @@ public class AnyTokenizer implements Tokenizer {
}
@Override
public void tokenize(SourceCode sourceCode, Tokens tokenEntries) {
public void tokenize(TextDocument sourceCode, Tokens tokenEntries) {
CharSequence text = sourceCode.getCodeBuffer();
Matcher matcher = pattern.matcher(text);
int lineNo = 1;

View File

@ -6,7 +6,43 @@ package net.sourceforge.pmd.cpd;
import java.io.IOException;
import net.sourceforge.pmd.lang.document.TextDocument;
import net.sourceforge.pmd.properties.PropertyDescriptor;
import net.sourceforge.pmd.properties.PropertyFactory;
public interface Tokenizer {
PropertyDescriptor<Boolean> CPD_IGNORE_LITERAL_SEQUENCES =
PropertyFactory.booleanProperty("cpdIgnoreLiteralSequences")
.defaultValue(false)
.desc("Ignore sequences of literals, eg `0, 0, 0, 0`")
.build();
PropertyDescriptor<Boolean> CPD_ANONYMiZE_LITERALS =
PropertyFactory.booleanProperty("cpdAnonymizeLiterals")
.defaultValue(false)
.desc("Anonymize literals. They are still part of the token stream but all literals appear to have the same value.")
.build();
PropertyDescriptor<Boolean> CPD_ANONYMIZE_IDENTIFIERS =
PropertyFactory.booleanProperty("cpdAnonymizeIdentifiers")
.defaultValue(false)
.desc("Anonymize identifiers. They are still part of the token stream but all literals appear to have the same value.")
.build();
PropertyDescriptor<Boolean> CPD_IGNORE_IMPORTS =
PropertyFactory.booleanProperty("cpdIgnoreImports")
.defaultValue(true)
.desc("Ignore import statements and equivalent (eg using statements in C#).")
.build();
PropertyDescriptor<Boolean> CPD_IGNORE_METADATA =
PropertyFactory.booleanProperty("cpdIgnoreMetadata")
.defaultValue(false)
.desc("Ignore metadata such as Java annotations or C# attributes.")
.build();
String IGNORE_LITERALS = "ignore_literals";
String IGNORE_IDENTIFIERS = "ignore_identifiers";
String IGNORE_ANNOTATIONS = "ignore_annotations";
@ -39,5 +75,5 @@ public interface Tokenizer {
String DEFAULT_SKIP_BLOCKS_PATTERN = "#if 0|#endif";
void tokenize(SourceCode sourceCode, Tokens tokenEntries) throws IOException;
void tokenize(TextDocument sourceCode, Tokens tokenEntries) throws IOException;
}

View File

@ -4,58 +4,26 @@
package net.sourceforge.pmd.cpd.internal;
import java.io.IOException;
import java.io.UncheckedIOException;
import org.antlr.v4.runtime.CharStream;
import org.antlr.v4.runtime.CharStreams;
import org.antlr.v4.runtime.Lexer;
import net.sourceforge.pmd.cpd.SourceCode;
import net.sourceforge.pmd.cpd.TokenEntry;
import net.sourceforge.pmd.cpd.Tokenizer;
import net.sourceforge.pmd.cpd.Tokens;
import net.sourceforge.pmd.cpd.token.AntlrTokenFilter;
import net.sourceforge.pmd.lang.TokenManager;
import net.sourceforge.pmd.lang.ast.impl.antlr4.AntlrToken;
import net.sourceforge.pmd.lang.ast.impl.antlr4.AntlrTokenManager;
import net.sourceforge.pmd.lang.document.CpdCompat;
import net.sourceforge.pmd.lang.document.TextDocument;
/**
* Generic implementation of a {@link Tokenizer} useful to any Antlr grammar.
*/
public abstract class AntlrTokenizer implements Tokenizer {
public abstract class AntlrTokenizer extends TokenizerBase<AntlrToken> {
@Override
protected final TokenManager<AntlrToken> makeLexerImpl(TextDocument doc) {
CharStream charStream = CharStreams.fromString(doc.getText().toString(), doc.getDisplayName());
return new AntlrTokenManager(getLexerForSource(charStream), doc);
}
protected abstract Lexer getLexerForSource(CharStream charStream);
@Override
public void tokenize(final SourceCode sourceCode, final Tokens tokenEntries) {
try (TextDocument textDoc = TextDocument.create(CpdCompat.cpdCompat(sourceCode))) {
CharStream charStream = CharStreams.fromString(textDoc.getText().toString(), textDoc.getDisplayName());
final AntlrTokenManager tokenManager = new AntlrTokenManager(getLexerForSource(charStream), textDoc);
final AntlrTokenFilter tokenFilter = getTokenFilter(tokenManager);
AntlrToken currentToken = tokenFilter.getNextToken();
while (currentToken != null) {
processToken(tokenEntries, currentToken);
currentToken = tokenFilter.getNextToken();
}
} catch (IOException e) {
throw new UncheckedIOException(e);
} finally {
tokenEntries.add(TokenEntry.getEOF());
}
}
protected AntlrTokenFilter getTokenFilter(final AntlrTokenManager tokenManager) {
return new AntlrTokenFilter(tokenManager);
}
private void processToken(final Tokens tokenEntries, final AntlrToken token) {
final TokenEntry tokenEntry = new TokenEntry(token.getImage(), token.getReportLocation());
tokenEntries.add(tokenEntry);
}
}

View File

@ -4,61 +4,8 @@
package net.sourceforge.pmd.cpd.internal;
import java.io.IOException;
import net.sourceforge.pmd.cpd.SourceCode;
import net.sourceforge.pmd.cpd.TokenEntry;
import net.sourceforge.pmd.cpd.Tokenizer;
import net.sourceforge.pmd.cpd.Tokens;
import net.sourceforge.pmd.cpd.token.JavaCCTokenFilter;
import net.sourceforge.pmd.cpd.token.TokenFilter;
import net.sourceforge.pmd.lang.TokenManager;
import net.sourceforge.pmd.lang.ast.FileAnalysisException;
import net.sourceforge.pmd.lang.ast.impl.javacc.CharStream;
import net.sourceforge.pmd.lang.ast.impl.javacc.JavaccToken;
import net.sourceforge.pmd.lang.ast.impl.javacc.JavaccTokenDocument.TokenDocumentBehavior;
import net.sourceforge.pmd.lang.document.CpdCompat;
import net.sourceforge.pmd.lang.document.TextDocument;
public abstract class JavaCCTokenizer implements Tokenizer {
public abstract class JavaCCTokenizer extends TokenizerBase<JavaccToken> {
@SuppressWarnings("PMD.CloseResource")
protected TokenManager<JavaccToken> getLexerForSource(TextDocument sourceCode) throws IOException {
return makeLexerImpl(CharStream.create(sourceCode, tokenBehavior()));
}
protected TokenDocumentBehavior tokenBehavior() {
return TokenDocumentBehavior.DEFAULT;
}
protected abstract TokenManager<JavaccToken> makeLexerImpl(CharStream sourceCode);
protected TokenFilter<JavaccToken> getTokenFilter(TokenManager<JavaccToken> tokenManager) {
return new JavaCCTokenFilter(tokenManager);
}
protected TokenEntry processToken(Tokens tokenEntries, JavaccToken currentToken) {
return new TokenEntry(getImage(currentToken), currentToken.getReportLocation());
}
protected String getImage(JavaccToken token) {
return token.getImage();
}
@Override
public void tokenize(SourceCode sourceCode, Tokens tokenEntries) throws IOException {
try (TextDocument textDoc = TextDocument.create(CpdCompat.cpdCompat(sourceCode))) {
TokenManager<JavaccToken> tokenManager = getLexerForSource(textDoc);
final TokenFilter<JavaccToken> tokenFilter = getTokenFilter(tokenManager);
JavaccToken currentToken = tokenFilter.getNextToken();
while (currentToken != null) {
tokenEntries.add(processToken(tokenEntries, currentToken));
currentToken = tokenFilter.getNextToken();
}
} catch (FileAnalysisException e) {
throw e.setFileName(sourceCode.getFileName());
} finally {
tokenEntries.add(TokenEntry.getEOF());
}
}
}

View File

@ -0,0 +1,42 @@
/**
* BSD-style license; for more info see http://pmd.sourceforge.net/license.html
*/
package net.sourceforge.pmd.cpd.internal;
import java.io.IOException;
import net.sourceforge.pmd.cpd.TokenEntry;
import net.sourceforge.pmd.cpd.Tokenizer;
import net.sourceforge.pmd.cpd.Tokens;
import net.sourceforge.pmd.cpd.token.internal.BaseTokenFilter;
import net.sourceforge.pmd.lang.TokenManager;
import net.sourceforge.pmd.lang.ast.GenericToken;
import net.sourceforge.pmd.lang.document.TextDocument;
public abstract class TokenizerBase<T extends GenericToken<T>> implements Tokenizer {
protected abstract TokenManager<T> makeLexerImpl(TextDocument doc);
protected TokenManager<T> filterTokenStream(TokenManager<T> tokenManager) {
return new BaseTokenFilter<>(tokenManager);
}
protected TokenEntry processToken(Tokens tokenEntries, T currentToken) {
return new TokenEntry(getImage(currentToken), currentToken.getReportLocation());
}
protected String getImage(T token) {
return token.getImage();
}
@Override
public void tokenize(TextDocument document, Tokens tokenEntries) throws IOException {
TokenManager<T> tokenManager = filterTokenStream(makeLexerImpl(document));
T currentToken = tokenManager.getNextToken();
while (currentToken != null) {
tokenEntries.add(processToken(tokenEntries, currentToken));
currentToken = tokenManager.getNextToken();
}
}
}

View File

@ -4,16 +4,18 @@
package net.sourceforge.pmd.cpd.token;
import net.sourceforge.pmd.lang.TokenManager;
import net.sourceforge.pmd.lang.ast.GenericToken;
/**
* Defines filter to be applied to the token stream during CPD analysis
*/
public interface TokenFilter<T extends GenericToken<T>> {
public interface TokenFilter<T extends GenericToken<T>> extends TokenManager<T> {
/**
* Retrieves the next token to pass the filter
* @return The next token to pass the filter, or null if the end of the stream was reached
*/
@Override
T getNextToken();
}

View File

@ -18,7 +18,7 @@ import net.sourceforge.pmd.lang.ast.GenericToken;
* A generic filter for PMD token managers that allows to use comments
* to enable / disable analysis of parts of the stream
*/
public abstract class BaseTokenFilter<T extends GenericToken<T>> implements TokenFilter<T> {
public class BaseTokenFilter<T extends GenericToken<T>> implements TokenFilter<T> {
private final TokenManager<T> tokenManager;
private final LinkedList<T> unprocessedTokens; // NOPMD - used both as Queue and List

View File

@ -0,0 +1,28 @@
/*
* BSD-style license; for more info see http://pmd.sourceforge.net/license.html
*/
package net.sourceforge.pmd.lang;
/**
* Base class for language modules that only support CPD and not PMD.
*
* @author Clément Fournier
*/
public abstract class CpdOnlyLanguageModuleBase extends LanguageModuleBase {
/**
* Construct a module instance using the given metadata. The metadata must
* be properly constructed.
*
* @throws IllegalStateException If the metadata is invalid (eg missing extensions or name)
*/
protected CpdOnlyLanguageModuleBase(LanguageMetadata metadata) {
super(metadata);
}
@Override
public boolean supportsParsing() {
return false;
}
}

View File

@ -8,6 +8,8 @@ import java.util.List;
import java.util.ServiceLoader;
import java.util.Set;
import net.sourceforge.pmd.cpd.Tokenizer;
/**
* Represents a language module, and provides access to language-specific
* functionality. You can get a language instance from a {@link LanguageRegistry}.
@ -156,6 +158,14 @@ public interface Language extends Comparable<Language> {
return new LanguagePropertyBundle(this);
}
/**
* Return true if this language supports parsing files into an AST.
* In that case {@link #createProcessor(LanguagePropertyBundle)} should
* also be implemented.
*/
default boolean supportsParsing() {
return false;
}
/**
* Create a new {@link LanguageProcessor} for this language, given
@ -167,8 +177,30 @@ public interface Language extends Comparable<Language> {
* @param bundle A bundle of properties created by this instance.
*
* @return A new language processor
*
* @throws UnsupportedOperationException if this language does not support PMD
*/
LanguageProcessor createProcessor(LanguagePropertyBundle bundle);
default LanguageProcessor createProcessor(LanguagePropertyBundle bundle) {
throw new UnsupportedOperationException(this + " does not support running a PMD analysis.");
}
/**
* Create a new {@link Tokenizer} for this language, given
* a property bundle with configuration. The bundle was created by
* this instance using {@link #newPropertyBundle()}. It can be assumed
* that the bundle will never be mutated anymore, and this method
* takes ownership of it.
*
* @param bundle A bundle of properties created by this instance.
*
* @return A new language processor
*
* @throws UnsupportedOperationException if this language does not support CPD
*/
default Tokenizer createCpdTokenizer(LanguagePropertyBundle bundle) {
throw new UnsupportedOperationException(this + " does not support running a CPD analysis.");
}
/**

View File

@ -41,6 +41,7 @@ public final class LanguageRegistry implements Iterable<Language> {
* of the classloader of this class. This can be used as a "default" registry.
*/
public static final LanguageRegistry PMD = loadLanguages(LanguageRegistry.class.getClassLoader());
public static final LanguageRegistry CPD = loadLanguages(LanguageRegistry.class.getClassLoader()); // todo
private final Set<Language> languages;

View File

@ -42,6 +42,15 @@ public final class CharStream {
return new CharStream(new JavaccTokenDocument(translated, behavior));
}
/**
* Create a new char stream for the given document with the default token
* document behavior. This may create a new {@link TextDocument} view
* over the original, which reflects its character escapes.
*/
public static CharStream create(TextDocument doc) throws MalformedSourceException {
return create(doc, TokenDocumentBehavior.DEFAULT);
}
/**
* Returns the next character from the input. After a {@link #backup(int)},
* some of the already read chars must be spit out again.

View File

@ -20,7 +20,7 @@ import net.sourceforge.pmd.lang.LanguageVersionHandler;
* @author Clément Fournier
* @since 7.0.0
*/
public class SimpleLanguageModuleBase extends LanguageModuleBase {
public abstract class SimpleLanguageModuleBase extends LanguageModuleBase {
private final Function<LanguagePropertyBundle, LanguageVersionHandler> handler;
@ -33,6 +33,11 @@ public class SimpleLanguageModuleBase extends LanguageModuleBase {
this.handler = makeHandler;
}
@Override
public boolean supportsParsing() {
return true;
}
@Override
public LanguageProcessor createProcessor(LanguagePropertyBundle bundle) {
LanguageVersionHandler services = handler.apply(bundle);

View File

@ -1,32 +0,0 @@
/*
* BSD-style license; for more info see http://pmd.sourceforge.net/license.html
*/
package net.sourceforge.pmd.cpd;
import java.util.Properties;
/**
* Defines the Language module for C/C++
*/
public class CPPLanguage extends AbstractLanguage {
/**
* Creates a new instance of {@link CPPLanguage} with the default extensions
* for c/c++ files.
*/
public CPPLanguage() {
this(System.getProperties());
}
public CPPLanguage(Properties properties) {
super("C++", "cpp", new CPPTokenizer(), ".h", ".hpp", ".hxx", ".c", ".cpp", ".cxx", ".cc", ".C");
setProperties(properties);
}
@Override
public void setProperties(Properties properties) {
super.setProperties(properties);
((CPPTokenizer) getTokenizer()).setProperties(properties);
}
}

View File

@ -4,46 +4,37 @@
package net.sourceforge.pmd.cpd;
import java.util.Properties;
import java.util.regex.Pattern;
import net.sourceforge.pmd.cpd.internal.JavaCCTokenizer;
import org.apache.commons.lang3.StringUtils;
import net.sourceforge.pmd.cpd.internal.TokenizerBase;
import net.sourceforge.pmd.cpd.token.JavaCCTokenFilter;
import net.sourceforge.pmd.cpd.token.TokenFilter;
import net.sourceforge.pmd.lang.LanguagePropertyBundle;
import net.sourceforge.pmd.lang.TokenManager;
import net.sourceforge.pmd.lang.ast.impl.javacc.CharStream;
import net.sourceforge.pmd.lang.ast.impl.javacc.JavaccToken;
import net.sourceforge.pmd.lang.ast.impl.javacc.JavaccTokenDocument.TokenDocumentBehavior;
import net.sourceforge.pmd.lang.ast.impl.javacc.MalformedSourceException;
import net.sourceforge.pmd.lang.cpp.CppLanguageModule;
import net.sourceforge.pmd.lang.cpp.ast.CppTokenKinds;
import net.sourceforge.pmd.lang.document.TextDocument;
/**
* The C++ tokenizer.
*/
public class CPPTokenizer extends JavaCCTokenizer {
public class CPPTokenizer extends TokenizerBase<JavaccToken> {
private boolean skipBlocks;
private Pattern skipBlocksStart;
private Pattern skipBlocksEnd;
private boolean ignoreLiteralSequences = false;
private final boolean ignoreLiteralSequences;
public CPPTokenizer() {
setProperties(new Properties()); // set the defaults
}
/**
* Sets the possible options for the C++ tokenizer.
*
* @param properties the properties
* @see #OPTION_SKIP_BLOCKS
* @see #OPTION_SKIP_BLOCKS_PATTERN
* @see #OPTION_IGNORE_LITERAL_SEQUENCES
*/
public void setProperties(Properties properties) {
skipBlocks = Boolean.parseBoolean(properties.getProperty(OPTION_SKIP_BLOCKS, Boolean.TRUE.toString()));
if (skipBlocks) {
String skipBlocksPattern = properties.getProperty(OPTION_SKIP_BLOCKS_PATTERN, DEFAULT_SKIP_BLOCKS_PATTERN);
public CPPTokenizer(LanguagePropertyBundle cppProperties) {
ignoreLiteralSequences = cppProperties.getProperty(Tokenizer.CPD_IGNORE_LITERAL_SEQUENCES);
String skipBlocksPattern = cppProperties.getProperty(CppLanguageModule.CPD_SKIP_BLOCKS);
if (StringUtils.isNotBlank(skipBlocksPattern)) {
skipBlocks = true;
String[] split = skipBlocksPattern.split("\\|", 2);
skipBlocksStart = CppBlockSkipper.compileSkipMarker(split[0]);
if (split.length == 1) {
@ -52,14 +43,15 @@ public class CPPTokenizer extends JavaCCTokenizer {
skipBlocksEnd = CppBlockSkipper.compileSkipMarker(split[1]);
}
}
ignoreLiteralSequences = Boolean.parseBoolean(properties.getProperty(OPTION_IGNORE_LITERAL_SEQUENCES,
Boolean.FALSE.toString()));
}
@Override
protected TokenDocumentBehavior tokenBehavior() {
return new TokenDocumentBehavior(CppTokenKinds.TOKEN_NAMES) {
protected TokenManager<JavaccToken> makeLexerImpl(TextDocument doc) {
return CppTokenKinds.newTokenManager(newCharStream(doc));
}
CharStream newCharStream(TextDocument doc) {
return CharStream.create(doc, new TokenDocumentBehavior(CppTokenKinds.TOKEN_NAMES) {
@Override
public TextDocument translate(TextDocument text) throws MalformedSourceException {
@ -68,20 +60,16 @@ public class CPPTokenizer extends JavaCCTokenizer {
}
return new CppEscapeTranslator(text).translateDocument();
}
};
});
}
@Override
protected TokenManager<JavaccToken> makeLexerImpl(CharStream sourceCode) {
return CppTokenKinds.newTokenManager(sourceCode);
}
@Override
protected TokenFilter<JavaccToken> getTokenFilter(final TokenManager<JavaccToken> tokenManager) {
protected TokenManager<JavaccToken> filterTokenStream(final TokenManager<JavaccToken> tokenManager) {
return new CppTokenFilter(tokenManager, ignoreLiteralSequences);
}
private static class CppTokenFilter extends JavaCCTokenFilter {
private final boolean ignoreLiteralSequences;
private JavaccToken discardingLiteralsUntil = null;
private boolean discardCurrent = false;
@ -106,8 +94,7 @@ public class CPPTokenizer extends JavaCCTokenizer {
discardCurrent = true;
}
} else if (kind == CppTokenKinds.LCURLYBRACE) {
final JavaccToken finalToken = findEndOfSequenceOfLiterals(remainingTokens);
discardingLiteralsUntil = finalToken;
discardingLiteralsUntil = findEndOfSequenceOfLiterals(remainingTokens);
}
}
}

View File

@ -0,0 +1,56 @@
/*
* BSD-style license; for more info see http://pmd.sourceforge.net/license.html
*/
package net.sourceforge.pmd.lang.cpp;
import net.sourceforge.pmd.cpd.CPPTokenizer;
import net.sourceforge.pmd.cpd.Tokenizer;
import net.sourceforge.pmd.lang.CpdOnlyLanguageModuleBase;
import net.sourceforge.pmd.lang.LanguagePropertyBundle;
import net.sourceforge.pmd.lang.LanguageRegistry;
import net.sourceforge.pmd.properties.PropertyDescriptor;
import net.sourceforge.pmd.properties.PropertyFactory;
/**
* Defines the Language module for C/C++
*/
public class CppLanguageModule extends CpdOnlyLanguageModuleBase {
public static final PropertyDescriptor<String> CPD_SKIP_BLOCKS =
PropertyFactory.stringProperty("cpdSkipBlocksPattern")
.defaultValue("#if 0|#endif")
.desc("Specifies a start and end delimiter for CPD to completely ignore. "
+ "The delimiters are separated by a pipe |. The default skips code "
+ " that is conditionally compiled out. Set this property to empty to disable this.")
.build();
/**
* Creates a new instance of {@link CppLanguageModule} with the default extensions
* for c/c++ files.
*/
public CppLanguageModule() {
super(LanguageMetadata.withId("cpp")
.name("C++")
.addDefaultVersion("any")
.extensions("h", "hpp", "hxx", "c", "cpp", "cxx", "cc", "C"));
}
public static CppLanguageModule getInstance() {
return (CppLanguageModule) LanguageRegistry.CPD.getLanguageById("cpp");
}
@Override
public LanguagePropertyBundle newPropertyBundle() {
LanguagePropertyBundle bundle = super.newPropertyBundle();
bundle.definePropertyDescriptor(Tokenizer.CPD_IGNORE_LITERAL_SEQUENCES);
bundle.definePropertyDescriptor(CPD_SKIP_BLOCKS);
return bundle;
}
@Override
public Tokenizer createCpdTokenizer(LanguagePropertyBundle bundle) {
return new CPPTokenizer(bundle);
}
}

View File

@ -1 +0,0 @@
net.sourceforge.pmd.cpd.CPPLanguage

View File

@ -0,0 +1 @@
net.sourceforge.pmd.lang.cpp.CppLanguageModule

View File

@ -8,20 +8,20 @@ import static org.junit.jupiter.api.Assertions.assertEquals;
import java.io.IOException;
import org.checkerframework.checker.nullness.qual.NonNull;
import org.junit.jupiter.api.Test;
import net.sourceforge.pmd.lang.ast.impl.javacc.CharStream;
import net.sourceforge.pmd.lang.document.CpdCompat;
import net.sourceforge.pmd.lang.cpp.CppLanguageModule;
import net.sourceforge.pmd.lang.document.TextDocument;
import net.sourceforge.pmd.lang.document.TextFile;
class CppCharStreamTest {
@NonNull
public CharStream charStreamFor(String source) throws IOException {
TextDocument textDoc = TextDocument.readOnlyString(source, TextFile.UNKNOWN_FILENAME, CpdCompat.dummyVersion());
return CharStream.create(textDoc, new CPPTokenizer().tokenBehavior());
public CharStream charStreamFor(String source) {
CppLanguageModule cpp = CppLanguageModule.getInstance();
TextDocument textDoc = TextDocument.readOnlyString(source, TextFile.UNKNOWN_FILENAME, cpp.getDefaultVersion());
CPPTokenizer tokenizer = new CPPTokenizer(cpp.newPropertyBundle());
return tokenizer.newCharStream(textDoc);
}
@Test

View File

@ -1,28 +0,0 @@
/**
* BSD-style license; for more info see http://pmd.sourceforge.net/license.html
*/
package net.sourceforge.pmd.cpd;
import java.util.Properties;
/**
* Language implementation for C#
*/
public class CsLanguage extends AbstractLanguage {
public CsLanguage() {
this(System.getProperties());
}
public CsLanguage(Properties properties) {
super("C#", "cs", new CsTokenizer(), ".cs");
setProperties(properties);
}
@Override
public final void setProperties(Properties properties) {
CsTokenizer tokenizer = (CsTokenizer) getTokenizer();
tokenizer.setProperties(properties);
}
}

View File

@ -4,15 +4,15 @@
package net.sourceforge.pmd.cpd;
import java.util.Properties;
import org.antlr.v4.runtime.CharStream;
import org.antlr.v4.runtime.Lexer;
import net.sourceforge.pmd.cpd.internal.AntlrTokenizer;
import net.sourceforge.pmd.cpd.token.AntlrTokenFilter;
import net.sourceforge.pmd.cpd.token.internal.BaseTokenFilter;
import net.sourceforge.pmd.lang.LanguagePropertyBundle;
import net.sourceforge.pmd.lang.TokenManager;
import net.sourceforge.pmd.lang.ast.impl.antlr4.AntlrToken;
import net.sourceforge.pmd.lang.ast.impl.antlr4.AntlrTokenManager;
import net.sourceforge.pmd.lang.cs.ast.CSharpLexer;
/**
@ -20,26 +20,14 @@ import net.sourceforge.pmd.lang.cs.ast.CSharpLexer;
*/
public class CsTokenizer extends AntlrTokenizer {
private boolean ignoreUsings = false;
private boolean ignoreLiteralSequences = false;
private boolean ignoreAttributes = false;
private final boolean ignoreUsings;
private final boolean ignoreLiteralSequences;
private final boolean ignoreAttributes;
/**
* Sets the possible options for the C# tokenizer.
*
* @param properties the properties
* @see #IGNORE_USINGS
* @see #OPTION_IGNORE_LITERAL_SEQUENCES
* @see #IGNORE_ANNOTATIONS
*/
public void setProperties(Properties properties) {
ignoreUsings = getBooleanProperty(properties, IGNORE_USINGS);
ignoreLiteralSequences = getBooleanProperty(properties, OPTION_IGNORE_LITERAL_SEQUENCES);
ignoreAttributes = getBooleanProperty(properties, IGNORE_ANNOTATIONS);
}
private boolean getBooleanProperty(final Properties properties, final String property) {
return Boolean.parseBoolean(properties.getProperty(property, Boolean.FALSE.toString()));
public CsTokenizer(LanguagePropertyBundle properties) {
ignoreUsings = properties.getProperty(Tokenizer.CPD_IGNORE_IMPORTS);
ignoreLiteralSequences = properties.getProperty(Tokenizer.CPD_IGNORE_LITERAL_SEQUENCES);
ignoreAttributes = properties.getProperty(Tokenizer.CPD_IGNORE_METADATA);
}
@Override
@ -48,7 +36,7 @@ public class CsTokenizer extends AntlrTokenizer {
}
@Override
protected AntlrTokenFilter getTokenFilter(final AntlrTokenManager tokenManager) {
protected TokenManager<AntlrToken> filterTokenStream(TokenManager<AntlrToken> tokenManager) {
return new CsTokenFilter(tokenManager, ignoreUsings, ignoreLiteralSequences, ignoreAttributes);
}
@ -60,7 +48,7 @@ public class CsTokenizer extends AntlrTokenizer {
* If the --ignoreUsings flag is provided, using directives are filtered out.
* </p>
*/
private static class CsTokenFilter extends AntlrTokenFilter {
private static class CsTokenFilter extends BaseTokenFilter<AntlrToken> {
private enum UsingState {
KEYWORD, // just encountered the using keyword
IDENTIFIER, // just encountered an identifier or var keyword
@ -75,7 +63,7 @@ public class CsTokenizer extends AntlrTokenizer {
private AntlrToken discardingLiteralsUntil = null;
private boolean discardCurrent = false;
CsTokenFilter(final AntlrTokenManager tokenManager, boolean ignoreUsings, boolean ignoreLiteralSequences, boolean ignoreAttributes) {
CsTokenFilter(final TokenManager<AntlrToken> tokenManager, boolean ignoreUsings, boolean ignoreLiteralSequences, boolean ignoreAttributes) {
super(tokenManager);
this.ignoreUsings = ignoreUsings;
this.ignoreLiteralSequences = ignoreLiteralSequences;

View File

@ -0,0 +1,42 @@
/*
* BSD-style license; for more info see http://pmd.sourceforge.net/license.html
*/
package net.sourceforge.pmd.lang.cs;
import net.sourceforge.pmd.cpd.CsTokenizer;
import net.sourceforge.pmd.cpd.Tokenizer;
import net.sourceforge.pmd.lang.CpdOnlyLanguageModuleBase;
import net.sourceforge.pmd.lang.LanguagePropertyBundle;
import net.sourceforge.pmd.lang.LanguageRegistry;
/**
* Defines the Language module for C#.
*/
public class CsLanguageModule extends CpdOnlyLanguageModuleBase {
public CsLanguageModule() {
super(LanguageMetadata.withId("cs")
.name("C#")
.addDefaultVersion("any")
.extensions("cs"));
}
public static CsLanguageModule getInstance() {
return (CsLanguageModule) LanguageRegistry.CPD.getLanguageById("cs");
}
@Override
public LanguagePropertyBundle newPropertyBundle() {
LanguagePropertyBundle bundle = super.newPropertyBundle();
bundle.definePropertyDescriptor(Tokenizer.CPD_IGNORE_LITERAL_SEQUENCES);
bundle.definePropertyDescriptor(Tokenizer.CPD_IGNORE_IMPORTS);
bundle.definePropertyDescriptor(Tokenizer.CPD_IGNORE_METADATA);
return bundle;
}
@Override
public Tokenizer createCpdTokenizer(LanguagePropertyBundle bundle) {
return new CsTokenizer(bundle);
}
}

View File

@ -1 +0,0 @@
net.sourceforge.pmd.cpd.CsLanguage

View File

@ -0,0 +1 @@
net.sourceforge.pmd.lang.cs.CsLanguageModule

View File

@ -9,8 +9,9 @@ import org.antlr.v4.runtime.Lexer;
import net.sourceforge.pmd.cpd.internal.AntlrTokenizer;
import net.sourceforge.pmd.cpd.token.AntlrTokenFilter;
import net.sourceforge.pmd.cpd.token.internal.BaseTokenFilter;
import net.sourceforge.pmd.lang.TokenManager;
import net.sourceforge.pmd.lang.ast.impl.antlr4.AntlrToken;
import net.sourceforge.pmd.lang.ast.impl.antlr4.AntlrTokenManager;
import net.sourceforge.pmd.lang.dart.ast.DartLexer;
/**
@ -24,7 +25,7 @@ public class DartTokenizer extends AntlrTokenizer {
}
@Override
protected AntlrTokenFilter getTokenFilter(final AntlrTokenManager tokenManager) {
protected TokenManager<AntlrToken> filterTokenStream(TokenManager<AntlrToken> tokenManager) {
return new DartTokenFilter(tokenManager);
}
@ -36,12 +37,12 @@ public class DartTokenizer extends AntlrTokenizer {
* enables comment-based CPD suppression.
* </p>
*/
private static class DartTokenFilter extends AntlrTokenFilter {
private static class DartTokenFilter extends BaseTokenFilter<AntlrToken> {
private boolean discardingLibraryAndImport = false;
private boolean discardingNL = false;
private boolean discardingSemicolon = false;
/* default */ DartTokenFilter(final AntlrTokenManager tokenManager) {
/* default */ DartTokenFilter(final TokenManager<AntlrToken> tokenManager) {
super(tokenManager);
}

View File

@ -10,6 +10,7 @@ import org.codehaus.groovy.antlr.SourceInfo;
import org.codehaus.groovy.antlr.parser.GroovyLexer;
import net.sourceforge.pmd.lang.ast.TokenMgrError;
import net.sourceforge.pmd.lang.document.TextDocument;
import groovyjarjarantlr.Token;
import groovyjarjarantlr.TokenStream;
@ -21,7 +22,7 @@ import groovyjarjarantlr.TokenStreamException;
public class GroovyTokenizer implements Tokenizer {
@Override
public void tokenize(SourceCode sourceCode, Tokens tokenEntries) {
public void tokenize(TextDocument sourceCode, Tokens tokenEntries) {
StringBuilder buffer = sourceCode.getCodeBuffer();
GroovyLexer lexer = new GroovyLexer(new StringReader(buffer.toString()));

View File

@ -7,7 +7,6 @@ package net.sourceforge.pmd.lang.html.ast;
import java.io.IOException;
import java.io.UncheckedIOException;
import net.sourceforge.pmd.cpd.SourceCode;
import net.sourceforge.pmd.cpd.TokenEntry;
import net.sourceforge.pmd.cpd.Tokenizer;
import net.sourceforge.pmd.cpd.Tokens;
@ -22,7 +21,7 @@ import net.sourceforge.pmd.lang.html.HtmlLanguageModule;
public class HtmlTokenizer implements Tokenizer {
@Override
public void tokenize(SourceCode sourceCode, Tokens tokenEntries) {
public void tokenize(TextDocument sourceCode, Tokens tokenEntries) {
HtmlLanguageModule html = HtmlLanguageModule.getInstance();
try (LanguageProcessor processor = html.createProcessor(html.newPropertyBundle());

View File

@ -11,11 +11,10 @@ import java.util.Properties;
import net.sourceforge.pmd.cpd.internal.JavaCCTokenizer;
import net.sourceforge.pmd.cpd.token.JavaCCTokenFilter;
import net.sourceforge.pmd.cpd.token.TokenFilter;
import net.sourceforge.pmd.lang.TokenManager;
import net.sourceforge.pmd.lang.ast.impl.javacc.CharStream;
import net.sourceforge.pmd.lang.ast.impl.javacc.JavaccToken;
import net.sourceforge.pmd.lang.ast.impl.javacc.JavaccTokenDocument;
import net.sourceforge.pmd.lang.document.TextDocument;
import net.sourceforge.pmd.lang.java.ast.InternalApiBridge;
import net.sourceforge.pmd.lang.java.ast.JavaTokenKinds;
@ -37,23 +36,18 @@ public class JavaTokenizer extends JavaCCTokenizer {
}
@Override
public void tokenize(SourceCode sourceCode, Tokens tokenEntries) throws IOException {
public void tokenize(TextDocument sourceCode, Tokens tokenEntries) throws IOException {
constructorDetector = new ConstructorDetector(ignoreIdentifiers);
super.tokenize(sourceCode, tokenEntries);
}
@Override
protected JavaccTokenDocument.TokenDocumentBehavior tokenBehavior() {
return InternalApiBridge.javaTokenDoc();
protected TokenManager<JavaccToken> makeLexerImpl(TextDocument doc) {
return JavaTokenKinds.newTokenManager(CharStream.create(doc, InternalApiBridge.javaTokenDoc()));
}
@Override
protected TokenManager<JavaccToken> makeLexerImpl(CharStream sourceCode) {
return JavaTokenKinds.newTokenManager(sourceCode);
}
@Override
protected TokenFilter<JavaccToken> getTokenFilter(TokenManager<JavaccToken> tokenManager) {
protected TokenManager<JavaccToken> filterTokenStream(TokenManager<JavaccToken> tokenManager) {
return new JavaTokenFilter(tokenManager, ignoreAnnotations);
}

View File

@ -8,6 +8,7 @@ import net.sourceforge.pmd.cpd.internal.JavaCCTokenizer;
import net.sourceforge.pmd.lang.TokenManager;
import net.sourceforge.pmd.lang.ast.impl.javacc.CharStream;
import net.sourceforge.pmd.lang.ast.impl.javacc.JavaccToken;
import net.sourceforge.pmd.lang.document.TextDocument;
import net.sourceforge.pmd.lang.ecmascript5.ast.Ecmascript5TokenKinds;
/**
@ -16,8 +17,8 @@ import net.sourceforge.pmd.lang.ecmascript5.ast.Ecmascript5TokenKinds;
public class EcmascriptTokenizer extends JavaCCTokenizer {
@Override
protected TokenManager<JavaccToken> makeLexerImpl(CharStream sourceCode) {
return Ecmascript5TokenKinds.newTokenManager(sourceCode);
protected TokenManager<JavaccToken> makeLexerImpl(TextDocument doc) {
return Ecmascript5TokenKinds.newTokenManager(CharStream.create(doc));
}
@Override

Some files were not shown because too many files have changed in this diff Show More