diff --git a/pmd-apex/src/main/java/net/sourceforge/pmd/cpd/ApexLanguage.java b/pmd-apex/src/main/java/net/sourceforge/pmd/cpd/ApexLanguage.java deleted file mode 100644 index 0bb7bd7014..0000000000 --- a/pmd-apex/src/main/java/net/sourceforge/pmd/cpd/ApexLanguage.java +++ /dev/null @@ -1,25 +0,0 @@ -/** - * BSD-style license; for more info see http://pmd.sourceforge.net/license.html - */ - -package net.sourceforge.pmd.cpd; - -import java.util.Properties; - -public class ApexLanguage extends AbstractLanguage { - - public ApexLanguage() { - this(new Properties()); - } - - public ApexLanguage(Properties properties) { - super("Apex", "apex", new ApexTokenizer(), ".cls"); - setProperties(properties); - } - - @Override - public final void setProperties(Properties properties) { - ApexTokenizer tokenizer = (ApexTokenizer) getTokenizer(); - tokenizer.setProperties(properties); - } -} diff --git a/pmd-apex/src/main/java/net/sourceforge/pmd/cpd/ApexTokenizer.java b/pmd-apex/src/main/java/net/sourceforge/pmd/cpd/ApexTokenizer.java index d2a01abb01..37872c2cc2 100644 --- a/pmd-apex/src/main/java/net/sourceforge/pmd/cpd/ApexTokenizer.java +++ b/pmd-apex/src/main/java/net/sourceforge/pmd/cpd/ApexTokenizer.java @@ -5,67 +5,30 @@ package net.sourceforge.pmd.cpd; import java.util.Locale; -import java.util.Properties; -import org.antlr.runtime.ANTLRStringStream; -import org.antlr.runtime.Lexer; -import org.antlr.runtime.Token; +import org.antlr.v4.runtime.CharStream; -import net.sourceforge.pmd.lang.apex.ApexJorjeLogging; -import net.sourceforge.pmd.lang.ast.TokenMgrError; -import net.sourceforge.pmd.lang.document.TextDocument; +import net.sourceforge.pmd.cpd.internal.AntlrTokenizer; +import net.sourceforge.pmd.lang.apex.ApexLanguageProperties; +import net.sourceforge.pmd.lang.ast.impl.antlr4.AntlrToken; -import apex.jorje.parser.impl.ApexLexer; +public class ApexTokenizer extends AntlrTokenizer { + private final boolean caseSensitive; -public class ApexTokenizer implements Tokenizer { - - public ApexTokenizer() { - ApexJorjeLogging.disableLogging(); - } - - /** - * If the properties is false (default), then the case of any token - * is ignored. - */ - public static final String CASE_SENSITIVE = "net.sourceforge.pmd.cpd.ApexTokenizer.caseSensitive"; - - private boolean caseSensitive; - - public void setProperties(Properties properties) { - caseSensitive = Boolean.parseBoolean(properties.getProperty(CASE_SENSITIVE, "false")); + public ApexTokenizer(ApexLanguageProperties properties) { + this.caseSensitive = properties.getProperty(Tokenizer.CPD_CASE_SENSITIVE); } @Override - public void tokenize(TextDocument sourceCode, Tokens tokenEntries) { - StringBuilder code = sourceCode.getCodeBuffer(); - - ANTLRStringStream ass = new ANTLRStringStream(code.toString()); - ApexLexer lexer = new ApexLexer(ass) { - @Override - public void emitErrorMessage(String msg) { - throw new TokenMgrError(getLine(), getCharPositionInLine(), getSourceName(), msg, null); - } - }; - - try { - Token token = lexer.nextToken(); - - while (token.getType() != Token.EOF) { - if (token.getChannel() != Lexer.HIDDEN) { - String tokenText = token.getText(); - if (!caseSensitive) { - tokenText = tokenText.toLowerCase(Locale.ROOT); - } - TokenEntry tokenEntry = new TokenEntry(tokenText, sourceCode.getFileName(), - token.getLine(), - token.getCharPositionInLine() + 1, - token.getCharPositionInLine() + tokenText.length() + 1); - tokenEntries.add(tokenEntry); - } - token = lexer.nextToken(); - } - } finally { - tokenEntries.add(TokenEntry.getEOF()); + protected String getImage(AntlrToken token) { + if (caseSensitive) { + return token.getImage(); } + return token.getImage().toLowerCase(Locale.ROOT); + } + + @Override + protected org.antlr.v4.runtime.Lexer getLexerForSource(CharStream charStream) { + return new com.nawforce.runtime.parsers.ApexLexer(charStream); } } diff --git a/pmd-apex/src/main/java/net/sourceforge/pmd/lang/apex/ApexLanguageModule.java b/pmd-apex/src/main/java/net/sourceforge/pmd/lang/apex/ApexLanguageModule.java index 7b57934bd7..690c228c0b 100644 --- a/pmd-apex/src/main/java/net/sourceforge/pmd/lang/apex/ApexLanguageModule.java +++ b/pmd-apex/src/main/java/net/sourceforge/pmd/lang/apex/ApexLanguageModule.java @@ -4,6 +4,8 @@ package net.sourceforge.pmd.lang.apex; +import net.sourceforge.pmd.cpd.ApexTokenizer; +import net.sourceforge.pmd.cpd.Tokenizer; import net.sourceforge.pmd.lang.Language; import net.sourceforge.pmd.lang.LanguageModuleBase; import net.sourceforge.pmd.lang.LanguageProcessor; @@ -32,6 +34,11 @@ public class ApexLanguageModule extends LanguageModuleBase { return new ApexLanguageProcessor((ApexLanguageProperties) bundle); } + @Override + public Tokenizer createCpdTokenizer(LanguagePropertyBundle bundle) { + return new ApexTokenizer((ApexLanguageProperties) bundle); + } + public static Language getInstance() { return LanguageRegistry.PMD.getLanguageByFullName(NAME); } diff --git a/pmd-apex/src/main/java/net/sourceforge/pmd/lang/apex/ApexLanguageProperties.java b/pmd-apex/src/main/java/net/sourceforge/pmd/lang/apex/ApexLanguageProperties.java index 1b33565565..3431c89dd5 100644 --- a/pmd-apex/src/main/java/net/sourceforge/pmd/lang/apex/ApexLanguageProperties.java +++ b/pmd-apex/src/main/java/net/sourceforge/pmd/lang/apex/ApexLanguageProperties.java @@ -4,6 +4,7 @@ package net.sourceforge.pmd.lang.apex; +import net.sourceforge.pmd.cpd.Tokenizer; import net.sourceforge.pmd.lang.LanguagePropertyBundle; import net.sourceforge.pmd.properties.PropertyDescriptor; import net.sourceforge.pmd.properties.PropertyFactory; @@ -23,6 +24,7 @@ public class ApexLanguageProperties extends LanguagePropertyBundle { public ApexLanguageProperties() { super(ApexLanguageModule.getInstance()); definePropertyDescriptor(MULTIFILE_DIRECTORY); + definePropertyDescriptor(Tokenizer.CPD_CASE_SENSITIVE); } diff --git a/pmd-apex/src/main/resources/META-INF/services/net.sourceforge.pmd.cpd.Language b/pmd-apex/src/main/resources/META-INF/services/net.sourceforge.pmd.cpd.Language deleted file mode 100644 index fd84d4a22b..0000000000 --- a/pmd-apex/src/main/resources/META-INF/services/net.sourceforge.pmd.cpd.Language +++ /dev/null @@ -1 +0,0 @@ -net.sourceforge.pmd.cpd.ApexLanguage diff --git a/pmd-apex/src/test/java/net/sourceforge/pmd/cpd/ApexTokenizerTest.java b/pmd-apex/src/test/java/net/sourceforge/pmd/cpd/ApexTokenizerTest.java index aeb4a51252..47fd06778b 100644 --- a/pmd-apex/src/test/java/net/sourceforge/pmd/cpd/ApexTokenizerTest.java +++ b/pmd-apex/src/test/java/net/sourceforge/pmd/cpd/ApexTokenizerTest.java @@ -4,16 +4,16 @@ package net.sourceforge.pmd.cpd; -import java.util.Properties; - import org.junit.jupiter.api.Test; import net.sourceforge.pmd.cpd.test.CpdTextComparisonTest; +import net.sourceforge.pmd.cpd.test.LanguagePropertyConfig; +import net.sourceforge.pmd.lang.apex.ApexLanguageModule; class ApexTokenizerTest extends CpdTextComparisonTest { ApexTokenizerTest() { - super(".cls"); + super(ApexLanguageModule.getInstance(), ".cls"); } @Override @@ -21,13 +21,6 @@ class ApexTokenizerTest extends CpdTextComparisonTest { return "../lang/apex/cpd/testdata"; } - @Override - public Tokenizer newTokenizer(Properties properties) { - ApexTokenizer tokenizer = new ApexTokenizer(); - tokenizer.setProperties(properties); - return tokenizer; - } - @Test void testTokenize() { @@ -52,14 +45,12 @@ class ApexTokenizerTest extends CpdTextComparisonTest { doTest("tabWidth"); } - private Properties caseSensitive() { + private LanguagePropertyConfig caseSensitive() { return properties(true); } - private Properties properties(boolean caseSensitive) { - Properties properties = new Properties(); - properties.setProperty(ApexTokenizer.CASE_SENSITIVE, Boolean.toString(caseSensitive)); - return properties; + private LanguagePropertyConfig properties(boolean caseSensitive) { + return properties -> properties.setProperty(Tokenizer.CPD_CASE_SENSITIVE, caseSensitive); } } diff --git a/pmd-cli/src/main/java/net/sourceforge/pmd/cli/commands/internal/CpdCommand.java b/pmd-cli/src/main/java/net/sourceforge/pmd/cli/commands/internal/CpdCommand.java index da3fdacdc0..b228f2ee92 100644 --- a/pmd-cli/src/main/java/net/sourceforge/pmd/cli/commands/internal/CpdCommand.java +++ b/pmd-cli/src/main/java/net/sourceforge/pmd/cli/commands/internal/CpdCommand.java @@ -116,7 +116,7 @@ public class CpdCommand extends AbstractAnalysisPmdSubcommand { configuration.setSkipDuplicates(skipDuplicates); configuration.setSkipLexicalErrors(skipLexicalErrors); configuration.setSourceEncoding(encoding.getEncoding().name()); - configuration.setURI(uri == null ? null : uri.toString()); + configuration.setURI(uri); configuration.postContruct(); // Pass extra parameters as System properties to allow language diff --git a/pmd-core/src/main/java/net/sourceforge/pmd/cpd/AbstractTokenizer.java b/pmd-core/src/main/java/net/sourceforge/pmd/cpd/AbstractTokenizer.java deleted file mode 100644 index 5db9827346..0000000000 --- a/pmd-core/src/main/java/net/sourceforge/pmd/cpd/AbstractTokenizer.java +++ /dev/null @@ -1,184 +0,0 @@ -/** - * BSD-style license; for more info see http://pmd.sourceforge.net/license.html - */ - -package net.sourceforge.pmd.cpd; - -import java.util.List; -import java.util.Locale; - -import net.sourceforge.pmd.lang.document.TextDocument; - -/** - * - * @author Zev Blut zb@ubit.com - * @author Romain PELISSE belaran@gmail.com - * - * @deprecated Use an {@link AnyTokenizer} instead, it's basically as powerful. - */ -@Deprecated -public abstract class AbstractTokenizer implements Tokenizer { - - // FIXME depending on subclasses to assign local vars is rather fragile - - // better to make private and setup via explicit hook methods - - protected List stringToken; // List, should be set by sub - // classes - protected List ignorableCharacter; // List, should be set by - // sub classes - // FIXME:Maybe an array of 'char' - // would be better for - // performance ? - protected List ignorableStmt; // List, should be set by sub - // classes - protected char oneLineCommentChar = '#'; // Most script languages ( shell, - // ruby, python,...) use this - // symbol for comment line - - private List code; - private int lineNumber = 0; - private String currentLine; - - // both zero-based - private int tokBeginLine; - private int tokBeginCol; - - protected boolean spanMultipleLinesString = true; // Most languages do, so - // default is true - protected Character spanMultipleLinesLineContinuationCharacter = null; - - private boolean downcaseString = true; - - @Override - public void tokenize(TextDocument tokens, Tokens tokenEntries) { - code = tokens.getCode(); - - for (lineNumber = 0; lineNumber < code.size(); lineNumber++) { - currentLine = code.get(lineNumber); - int loc = 0; - while (loc < currentLine.length()) { - StringBuilder token = new StringBuilder(); - loc = getTokenFromLine(token, loc); // may jump several lines - - if (token.length() > 0 && !isIgnorableString(token.toString())) { - final String image; - if (downcaseString) { - image = token.toString().toLowerCase(Locale.ROOT); - } else { - image = token.toString(); - } - - tokenEntries.add(new TokenEntry(image, - tokens.getFileName(), - tokBeginLine + 1, - tokBeginCol + 1, - loc + 1)); - } - } - } - tokenEntries.add(TokenEntry.getEOF()); - } - - /** - * Returns (0-based) EXclusive offset of the end of the token, - * may jump several lines (sets {@link #lineNumber} in this case). - */ - private int getTokenFromLine(StringBuilder token, int loc) { - tokBeginLine = lineNumber; - tokBeginCol = loc; - - for (int j = loc; j < currentLine.length(); j++) { - char tok = currentLine.charAt(j); - if (!Character.isWhitespace(tok) && !ignoreCharacter(tok)) { - if (isComment(tok)) { - if (token.length() > 0) { - return j; - } else { - return getCommentToken(token, loc); - } - } else if (isString(tok)) { - if (token.length() > 0) { - return j; // we need to now parse the string as a - // separate token. - } else { - // we are at the start of a string - return parseString(token, j, tok); - } - } else { - token.append(tok); - } - } else { - if (token.length() > 0) { - return j; - } else { - // ignored char - tokBeginCol++; - } - } - loc = j; - } - return loc + 1; - } - - private int parseString(StringBuilder token, int loc, char stringDelimiter) { - boolean escaped = false; - boolean done = false; - char tok; - while (loc < currentLine.length() && !done) { - tok = currentLine.charAt(loc); - if (escaped && tok == stringDelimiter) { // Found an escaped string - escaped = false; - } else if (tok == stringDelimiter && token.length() > 0) { - // We are done, we found the end of the string... - done = true; - } else { - // Found an escaped char? - escaped = tok == '\\'; - } - // Adding char to String:" + token.toString()); - token.append(tok); - loc++; - } - // Handling multiple lines string - if (!done // ... we didn't find the end of the string (but the end of the line) - && spanMultipleLinesString // ... the language allow multiple line span Strings - && lineNumber < code.size() - 1 // ... there is still more lines to parse - ) { - // removes last character, if it is the line continuation (e.g. - // backslash) character - if (spanMultipleLinesLineContinuationCharacter != null - && token.length() > 0 - && token.charAt(token.length() - 1) == spanMultipleLinesLineContinuationCharacter) { - token.setLength(token.length() - 1); - } - // parsing new line - currentLine = code.get(++lineNumber); - // Warning : recursive call ! - loc = parseString(token, 0, stringDelimiter); - } - return loc; - } - - private boolean ignoreCharacter(char tok) { - return ignorableCharacter.contains(String.valueOf(tok)); - } - - private boolean isString(char tok) { - return stringToken.contains(String.valueOf(tok)); - } - - private boolean isComment(char tok) { - return tok == oneLineCommentChar; - } - - private int getCommentToken(StringBuilder token, int loc) { - while (loc < currentLine.length()) { - token.append(currentLine.charAt(loc++)); - } - return loc; - } - - private boolean isIgnorableString(String token) { - return ignorableStmt.contains(token); - } -} diff --git a/pmd-core/src/main/java/net/sourceforge/pmd/cpd/AnyTokenizer.java b/pmd-core/src/main/java/net/sourceforge/pmd/cpd/AnyTokenizer.java index 6e02dda6b8..671644eae6 100644 --- a/pmd-core/src/main/java/net/sourceforge/pmd/cpd/AnyTokenizer.java +++ b/pmd-core/src/main/java/net/sourceforge/pmd/cpd/AnyTokenizer.java @@ -9,6 +9,7 @@ import java.util.regex.Pattern; import org.apache.commons.lang3.StringUtils; +import net.sourceforge.pmd.lang.document.Chars; import net.sourceforge.pmd.lang.document.TextDocument; import net.sourceforge.pmd.util.StringUtil; @@ -61,36 +62,32 @@ public class AnyTokenizer implements Tokenizer { } @Override - public void tokenize(TextDocument sourceCode, Tokens tokenEntries) { - CharSequence text = sourceCode.getCodeBuffer(); + public void tokenize(TextDocument sourceCode, TokenFactory tokenEntries) { + Chars text = sourceCode.getText(); Matcher matcher = pattern.matcher(text); int lineNo = 1; int lastLineStart = 0; - try { - while (matcher.find()) { - String image = matcher.group(); - if (isComment(image)) { - continue; - } else if (StringUtils.isWhitespace(image)) { - lineNo++; - lastLineStart = matcher.end(); - continue; - } - - int bline = lineNo; - int bcol = 1 + matcher.start() - lastLineStart; // + 1 because columns are 1 based - int ecol = StringUtil.columnNumberAt(image, image.length()); // this already outputs a 1-based column - if (ecol == image.length() + 1) { - ecol = bcol + image.length(); // single-line token - } else { - // multiline, need to update the line count - lineNo += StringUtil.lineNumberAt(image, image.length()) - 1; - lastLineStart = matcher.start() + image.length() - ecol + 1; - } - tokenEntries.add(new TokenEntry(image, sourceCode.getFileName(), bline, bcol, ecol)); + while (matcher.find()) { + String image = matcher.group(); + if (isComment(image)) { + continue; + } else if (StringUtils.isWhitespace(image)) { + lineNo++; + lastLineStart = matcher.end(); + continue; } - } finally { - tokenEntries.add(TokenEntry.getEOF()); + + int bline = lineNo; + int bcol = 1 + matcher.start() - lastLineStart; // + 1 because columns are 1 based + int ecol = StringUtil.columnNumberAt(image, image.length()); // this already outputs a 1-based column + if (ecol == image.length() + 1) { + ecol = bcol + image.length(); // single-line token + } else { + // multiline, need to update the line count + lineNo += StringUtil.lineNumberAt(image, image.length()) - 1; + lastLineStart = matcher.start() + image.length() - ecol + 1; + } + tokenEntries.recordToken(image, bline, bcol, lineNo, ecol); } } diff --git a/pmd-core/src/main/java/net/sourceforge/pmd/cpd/CPD.java b/pmd-core/src/main/java/net/sourceforge/pmd/cpd/CPD.java index a4bf7e24c2..3c8a6e0265 100644 --- a/pmd-core/src/main/java/net/sourceforge/pmd/cpd/CPD.java +++ b/pmd-core/src/main/java/net/sourceforge/pmd/cpd/CPD.java @@ -27,6 +27,9 @@ import net.sourceforge.pmd.internal.util.FileFinder; import net.sourceforge.pmd.internal.util.FileUtil; import net.sourceforge.pmd.internal.util.IOUtil; import net.sourceforge.pmd.lang.ast.TokenMgrError; +import net.sourceforge.pmd.lang.document.SourceCode; +import net.sourceforge.pmd.lang.document.TextDocument; +import net.sourceforge.pmd.lang.document.TextFile; import net.sourceforge.pmd.util.database.DBMSMetadata; import net.sourceforge.pmd.util.database.DBURI; import net.sourceforge.pmd.util.database.SourceObject; @@ -41,7 +44,7 @@ public class CPD { private CPDConfiguration configuration; - private Map source = new TreeMap<>(); + private final SourceManager sourceManager = new SourceManager(); private CPDListener listener = new CPDNullListener(); private Tokens tokens = new Tokens(); private MatchAlgorithm matchAlgorithm; @@ -128,8 +131,8 @@ public class CPD { } public void go() { - log.debug("Running match algorithm on {} files...", source.size()); - matchAlgorithm = new MatchAlgorithm(source, tokens, configuration.getMinimumTileSize(), listener); + log.debug("Running match algorithm on {} files...", sourceManager.size()); + matchAlgorithm = new MatchAlgorithm(sourceManager, tokens, configuration.getMinimumTileSize(), listener); matchAlgorithm.findMatches(); log.debug("Finished: {} duplicates found", matchAlgorithm.getMatches().size()); } @@ -216,8 +219,7 @@ public class CPD { } } - @Experimental - public void add(SourceCode sourceCode) throws IOException { + private void add(SourceCode sourceCode) throws IOException { if (configuration.isSkipLexicalErrors()) { addAndSkipLexicalErrors(sourceCode); } else { @@ -226,11 +228,13 @@ public class CPD { } private void addAndThrowLexicalError(SourceCode sourceCode) throws IOException { - log.debug("Tokenizing {}", sourceCode.getFileName()); - configuration.tokenizer().tokenize(sourceCode, tokens); - listener.addedFile(1, new File(sourceCode.getFileName())); - source.put(sourceCode.getFileName(), sourceCode); - numberOfTokensPerFile.put(sourceCode.getFileName(), tokens.size() - lastTokenSize - 1 /*EOF*/); + log.debug("Tokenizing {}", sourceCode.getPathId()); + try (TextDocument doc = sourceCode.load()) { + configuration.tokenizer().tokenize(doc, tokens); + } + listener.addedFile(1); + source.put(sourceCode.getPathId(), sourceCode); + numberOfTokensPerFile.put(sourceCode.getPathId(), tokens.size() - lastTokenSize - 1 /*EOF*/); lastTokenSize = tokens.size(); } @@ -239,7 +243,7 @@ public class CPD { try { addAndThrowLexicalError(sourceCode); } catch (TokenMgrError e) { - System.err.println("Skipping " + sourceCode.getFileName() + ". Reason: " + e.getMessage()); + System.err.println("Skipping " + sourceCode.getDisplayName() + ". Reason: " + e.getMessage()); savedState.restore(tokens); } } @@ -253,15 +257,6 @@ public class CPD { return new ArrayList<>(source.keySet()); } - /** - * Get each Source to be processed. - * - * @return all Sources to be processed - */ - public List getSources() { - return new ArrayList<>(source.values()); - } - /** * Entry to invoke CPD as command line tool. Note that this will * invoke {@link System#exit(int)}. diff --git a/pmd-core/src/main/java/net/sourceforge/pmd/cpd/CPDConfiguration.java b/pmd-core/src/main/java/net/sourceforge/pmd/cpd/CPDConfiguration.java index efe6550261..2a4950d2f8 100644 --- a/pmd-core/src/main/java/net/sourceforge/pmd/cpd/CPDConfiguration.java +++ b/pmd-core/src/main/java/net/sourceforge/pmd/cpd/CPDConfiguration.java @@ -11,6 +11,7 @@ import java.io.FilenameFilter; import java.io.Reader; import java.lang.reflect.InvocationTargetException; import java.lang.reflect.Method; +import java.net.URI; import java.util.Arrays; import java.util.HashMap; import java.util.HashSet; @@ -24,6 +25,8 @@ import net.sourceforge.pmd.AbstractConfiguration; import net.sourceforge.pmd.cpd.renderer.CPDReportRenderer; import net.sourceforge.pmd.internal.util.FileFinder; import net.sourceforge.pmd.internal.util.FileUtil; +import net.sourceforge.pmd.lang.document.TextDocument; +import net.sourceforge.pmd.lang.document.TextFile; /** * @@ -79,7 +82,7 @@ public class CPDConfiguration extends AbstractConfiguration { private boolean nonRecursive; - private String uri; + private URI uri; private boolean help; @@ -87,7 +90,7 @@ public class CPDConfiguration extends AbstractConfiguration { private boolean debug = false; - public SourceCode sourceCodeFor(File file) { + public TextFile sourceCodeFor(File file) { return new SourceCode(new SourceCode.FileCodeLoader(file, getSourceEncoding().name())); } @@ -340,11 +343,11 @@ public class CPDConfiguration extends AbstractConfiguration { this.fileListPath = fileListPath; } - public String getURI() { + public URI getURI() { return uri; } - public void setURI(String uri) { + public void setURI(URI uri) { this.uri = uri; } diff --git a/pmd-core/src/main/java/net/sourceforge/pmd/cpd/CPDListener.java b/pmd-core/src/main/java/net/sourceforge/pmd/cpd/CPDListener.java index ae180b4c97..6f361d1afb 100644 --- a/pmd-core/src/main/java/net/sourceforge/pmd/cpd/CPDListener.java +++ b/pmd-core/src/main/java/net/sourceforge/pmd/cpd/CPDListener.java @@ -4,8 +4,6 @@ package net.sourceforge.pmd.cpd; -import java.io.File; - public interface CPDListener { int INIT = 0; @@ -14,7 +12,7 @@ public interface CPDListener { int GROUPING = 3; int DONE = 4; - void addedFile(int fileCount, File file); + void addedFile(int fileCount); void phaseUpdate(int phase); } diff --git a/pmd-core/src/main/java/net/sourceforge/pmd/cpd/CPDNullListener.java b/pmd-core/src/main/java/net/sourceforge/pmd/cpd/CPDNullListener.java index 64b6060166..3566a9a6cf 100644 --- a/pmd-core/src/main/java/net/sourceforge/pmd/cpd/CPDNullListener.java +++ b/pmd-core/src/main/java/net/sourceforge/pmd/cpd/CPDNullListener.java @@ -4,11 +4,9 @@ package net.sourceforge.pmd.cpd; -import java.io.File; - public class CPDNullListener implements CPDListener { @Override - public void addedFile(int fileCount, File file) { + public void addedFile(int fileCount) { // does nothing - override it if necessary } diff --git a/pmd-core/src/main/java/net/sourceforge/pmd/cpd/CpdAnalysis.java b/pmd-core/src/main/java/net/sourceforge/pmd/cpd/CpdAnalysis.java new file mode 100644 index 0000000000..e0cd1aa5b2 --- /dev/null +++ b/pmd-core/src/main/java/net/sourceforge/pmd/cpd/CpdAnalysis.java @@ -0,0 +1,147 @@ +/** + * BSD-style license; for more info see http://pmd.sourceforge.net/license.html + */ + +package net.sourceforge.pmd.cpd; + +import java.io.File; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.net.URI; +import java.net.URISyntaxException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import net.sourceforge.pmd.internal.util.FileCollectionUtil; +import net.sourceforge.pmd.internal.util.FileFinder; +import net.sourceforge.pmd.internal.util.FileUtil; +import net.sourceforge.pmd.internal.util.IOUtil; +import net.sourceforge.pmd.lang.ast.TokenMgrError; +import net.sourceforge.pmd.lang.document.FileCollector; +import net.sourceforge.pmd.lang.document.TextDocument; +import net.sourceforge.pmd.util.database.DBMSMetadata; +import net.sourceforge.pmd.util.database.DBURI; +import net.sourceforge.pmd.util.database.SourceObject; +import net.sourceforge.pmd.util.log.MessageReporter; + +/** + * @deprecated Use the module pmd-cli for CLI support. + */ +@Deprecated +public class CpdAnalysis { + + private CPDConfiguration configuration; + private FileCollector files; + private MessageReporter reporter; + private CPDListener listener; + + + public CpdAnalysis(CPDConfiguration theConfiguration) { + configuration = theConfiguration; + + // Add all sources + try { + extractAllSources(); + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + public FileCollector files() { + return files; + } + + private void extractAllSources() throws IOException { + // Add files + if (null != configuration.getFiles() && !configuration.getFiles().isEmpty()) { + addSourcesFilesToCPD(configuration.getFiles()); + } + + // Add Database URIS + if (null != configuration.getURI()) { + FileCollectionUtil.collectDB(files(), configuration.getURI()); + } + + if (null != configuration.getFileListPath()) { + FileCollectionUtil.collectFileList(files(), FileUtil.toExistingPath(configuration.getFileListPath())); + } + } + + private void addSourcesFilesToCPD(List files) throws IOException { + for (File file : files) { + files().addFileOrDirectory(file.toPath()); + } + } + + public void setCpdListener(CPDListener cpdListener) { + this.listener = cpdListener; + } + + private void addAndThrowLexicalError(SourceCode sourceCode) throws IOException { + log.debug("Tokenizing {}", sourceCode.getPathId()); + try (TextDocument doc = sourceCode.load()) { + configuration.tokenizer().tokenize(doc, tokens); + } + listener.addedFile(1); + source.put(sourceCode.getPathId(), sourceCode); + numberOfTokensPerFile.put(sourceCode.getPathId(), tokens.size() - lastTokenSize - 1 /*EOF*/); + lastTokenSize = tokens.size(); + } + + public CPDReport performAnalysis() { + + try (SourceManager sourceManager = new SourceManager(files.getCollectedFiles())) { + Tokens tokens = new Tokens(); + + + log.debug("Running match algorithm on {} files...", sourceManager.size()); + MatchAlgorithm matchAlgorithm = new MatchAlgorithm(sourceManager, tokens, configuration.getMinimumTileSize(), listener); + matchAlgorithm.findMatches(); + log.debug("Finished: {} duplicates found", matchAlgorithm.getMatches().size()); + + + + } catch (Exception e) { + reporter.errorEx("Exception while running CPD", e); + } + } + + public void add(File file) throws IOException { + + if (configuration.isSkipDuplicates()) { + // TODO refactor this thing into a separate class + String signature = file.getName() + '_' + file.length(); + if (current.contains(signature)) { + System.err.println("Skipping " + file.getAbsolutePath() + + " since it appears to be a duplicate file and --skip-duplicate-files is set"); + return; + } + current.add(signature); + } + + if (!IOUtil.equalsNormalizedPaths(file.getAbsoluteFile().getCanonicalPath(), file.getAbsolutePath())) { + System.err.println("Skipping " + file + " since it appears to be a symlink"); + return; + } + + if (!file.exists()) { + System.err.println("Skipping " + file + " since it doesn't exist (broken symlink?)"); + return; + } + + SourceCode sourceCode = configuration.sourceCodeFor(file); + add(sourceCode); + } + + +} diff --git a/pmd-core/src/main/java/net/sourceforge/pmd/cpd/GUI.java b/pmd-core/src/main/java/net/sourceforge/pmd/cpd/GUI.java index d3aa2ae9eb..13fd308305 100644 --- a/pmd-core/src/main/java/net/sourceforge/pmd/cpd/GUI.java +++ b/pmd-core/src/main/java/net/sourceforge/pmd/cpd/GUI.java @@ -911,7 +911,7 @@ public class GUI implements CPDListener { } @Override - public void addedFile(int fileCount, File file) { + public void addedFile(int fileCount) { tokenizingFilesBar.setMaximum(fileCount); tokenizingFilesBar.setValue(tokenizingFilesBar.getValue() + 1); } diff --git a/pmd-core/src/main/java/net/sourceforge/pmd/cpd/Mark.java b/pmd-core/src/main/java/net/sourceforge/pmd/cpd/Mark.java index 30e13c4b04..4cfe7dfd73 100644 --- a/pmd-core/src/main/java/net/sourceforge/pmd/cpd/Mark.java +++ b/pmd-core/src/main/java/net/sourceforge/pmd/cpd/Mark.java @@ -4,11 +4,14 @@ package net.sourceforge.pmd.cpd; +import net.sourceforge.pmd.lang.document.Chars; +import net.sourceforge.pmd.lang.document.TextDocument; + public class Mark implements Comparable { private TokenEntry token; private TokenEntry endToken; private int lineCount; - private SourceCode code; + private TextDocument code; public Mark(TokenEntry token) { this.token = token; @@ -69,11 +72,13 @@ public class Mark implements Comparable { } /** Newlines are normalized to \n. */ - public String getSourceCodeSlice() { - return this.code.getSlice(getBeginLine(), getEndLine()); + public Chars getSourceCodeSlice() { + return this.code.sliceOriginalText( + this.code.createLineRange(getBeginLine(), getEndLine()) + ); } - public void setSourceCode(SourceCode code) { + public void setSourceCode(TextDocument code) { this.code = code; } diff --git a/pmd-core/src/main/java/net/sourceforge/pmd/cpd/Match.java b/pmd-core/src/main/java/net/sourceforge/pmd/cpd/Match.java index 992f551e9c..3dbb8e2a9e 100644 --- a/pmd-core/src/main/java/net/sourceforge/pmd/cpd/Match.java +++ b/pmd-core/src/main/java/net/sourceforge/pmd/cpd/Match.java @@ -10,6 +10,7 @@ import java.util.Set; import java.util.TreeSet; import net.sourceforge.pmd.PMD; +import net.sourceforge.pmd.lang.document.Chars; public class Match implements Comparable, Iterable { @@ -74,7 +75,7 @@ public class Match implements Comparable, Iterable { } /** Newlines are normalized to \n. */ - public String getSourceCodeSlice() { + public Chars getSourceCodeSlice() { return this.getMark(0).getSourceCodeSlice(); } diff --git a/pmd-core/src/main/java/net/sourceforge/pmd/cpd/MatchAlgorithm.java b/pmd-core/src/main/java/net/sourceforge/pmd/cpd/MatchAlgorithm.java index d6f482dd05..ca198f032f 100644 --- a/pmd-core/src/main/java/net/sourceforge/pmd/cpd/MatchAlgorithm.java +++ b/pmd-core/src/main/java/net/sourceforge/pmd/cpd/MatchAlgorithm.java @@ -11,23 +11,25 @@ import java.util.Iterator; import java.util.List; import java.util.Map; -public class MatchAlgorithm { +import net.sourceforge.pmd.lang.document.TextDocument; + +class MatchAlgorithm { private static final int MOD = 37; private int lastMod = 1; private List matches; - private Map source; + private Map source; private Tokens tokens; private List code; private CPDListener cpdListener; private int min; - public MatchAlgorithm(Map sourceCode, Tokens tokens, int min) { + public MatchAlgorithm(Map sourceCode, Tokens tokens, int min) { this(sourceCode, tokens, min, new CPDNullListener()); } - public MatchAlgorithm(Map sourceCode, Tokens tokens, int min, CPDListener listener) { + public MatchAlgorithm(SourceManager sourceCode, Tokens tokens, int min, CPDListener listener) { this.source = sourceCode; this.tokens = tokens; this.code = tokens.getTokens(); @@ -85,7 +87,7 @@ public class MatchAlgorithm { mark.setLineCount(lineCount); mark.setEndToken(endToken); - SourceCode sourceCode = source.get(token.getTokenSrcID()); + TextDocument sourceCode = source.get(token.getTokenSrcID()); assert sourceCode != null : token.getTokenSrcID() + " is not registered in " + source.keySet(); mark.setSourceCode(sourceCode); } diff --git a/pmd-core/src/main/java/net/sourceforge/pmd/cpd/SimpleRenderer.java b/pmd-core/src/main/java/net/sourceforge/pmd/cpd/SimpleRenderer.java index 8595b38c3c..9ec4b99bc7 100644 --- a/pmd-core/src/main/java/net/sourceforge/pmd/cpd/SimpleRenderer.java +++ b/pmd-core/src/main/java/net/sourceforge/pmd/cpd/SimpleRenderer.java @@ -60,7 +60,7 @@ public class SimpleRenderer implements CPDReportRenderer { writer.append(PMD.EOL); // add a line to separate the source from the desc above - String source = match.getSourceCodeSlice(); + Chars source = match.getSourceCodeSlice(); if (trimLeadingWhitespace) { for (Chars line : StringUtil.linesWithTrimIndent(source)) { @@ -70,7 +70,8 @@ public class SimpleRenderer implements CPDReportRenderer { return; } - writer.append(source).append(PMD.EOL); + source.writeFully(writer); + writer.append(PMD.EOL); } } diff --git a/pmd-core/src/main/java/net/sourceforge/pmd/cpd/SourceCode.java b/pmd-core/src/main/java/net/sourceforge/pmd/cpd/SourceCode.java index bd93023de7..34288552cd 100644 --- a/pmd-core/src/main/java/net/sourceforge/pmd/cpd/SourceCode.java +++ b/pmd-core/src/main/java/net/sourceforge/pmd/cpd/SourceCode.java @@ -4,190 +4,32 @@ package net.sourceforge.pmd.cpd; -import java.io.BufferedReader; -import java.io.File; -import java.io.InputStreamReader; +import java.io.IOException; import java.io.Reader; -import java.io.StringReader; import java.lang.ref.SoftReference; -import java.nio.file.Files; -import java.util.ArrayList; import java.util.List; -import net.sourceforge.pmd.internal.util.IOUtil; +import net.sourceforge.pmd.lang.document.TextDocument; +import net.sourceforge.pmd.lang.document.TextFile; public class SourceCode { - public abstract static class CodeLoader { - private SoftReference> code; + private SoftReference softRef; + private final TextFile textFile; - public List getCode() { - List c = null; - if (code != null) { - c = code.get(); - } - if (c != null) { - return c; - } - this.code = new SoftReference<>(load()); - return code.get(); - } - - /** - * Loads a range of lines. - * - * @param startLine Start line (inclusive, 1-based) - * @param endLine End line (inclusive, 1-based) - */ - public List getCodeSlice(int startLine, int endLine) { - List c = null; - if (code != null) { - c = code.get(); - } - if (c != null) { - return c.subList(startLine - 1, endLine); - } - return load(startLine, endLine); - } - - public abstract String getFileName(); - - protected abstract Reader getReader() throws Exception; - - protected List load() { - try (BufferedReader reader = new BufferedReader(getReader())) { - List lines = new ArrayList<>(); - String currentLine; - while ((currentLine = reader.readLine()) != null) { - lines.add(currentLine); - } - return lines; - } catch (Exception e) { - e.printStackTrace(); - throw new RuntimeException("Problem while reading " + getFileName() + ":" + e.getMessage()); - } - } - - /** - * Loads a range of lines. - * - * @param startLine Start line (inclusive, 1-based) - * @param endLine End line (inclusive, 1-based) - */ - protected List load(int startLine, int endLine) { - try (BufferedReader reader = new BufferedReader(getReader())) { - int linesToRead = 1 + endLine - startLine; // +1 because endLine is inclusive - List lines = new ArrayList<>(linesToRead); - - // Skip lines until we reach the start point - for (int i = 0; i < startLine - 1; i++) { - reader.readLine(); - } - - String currentLine; - while ((currentLine = reader.readLine()) != null) { - lines.add(currentLine); - - if (lines.size() == linesToRead) { - break; - } - } - return lines; - } catch (Exception e) { - e.printStackTrace(); - throw new RuntimeException("Problem while reading " + getFileName() + ":" + e.getMessage()); - } - } + public SourceCode(TextFile textFile) { + this.textFile = textFile; } - public static class FileCodeLoader extends CodeLoader { - private File file; - private String encoding; - - public FileCodeLoader(File file, String encoding) { - this.file = file; - this.encoding = encoding; - } - - @Override - public Reader getReader() throws Exception { - IOUtil.BomAwareInputStream inputStream = new IOUtil.BomAwareInputStream(Files.newInputStream(file.toPath())); - - if (inputStream.hasBom()) { - encoding = inputStream.getBomCharsetName(); - } - return new InputStreamReader(inputStream, encoding); - } - - public String getEncoding() { - return encoding; - } - - @Override - public String getFileName() { - return file.getAbsolutePath(); + public TextDocument load() throws IOException { + if (softRef != null && softRef.get() != null) { + return softRef.get(); } + TextDocument doc = TextDocument.create(textFile); + softRef = new SoftReference<>(doc); + return doc; } - public static class StringCodeLoader extends CodeLoader { - public static final String DEFAULT_NAME = "CODE_LOADED_FROM_STRING"; - - private String code; - - private String name; - - public StringCodeLoader(String code) { - this(code, DEFAULT_NAME); - } - - public StringCodeLoader(String code, String name) { - this.code = code; - this.name = name; - } - - @Override - public Reader getReader() { - return new StringReader(code); - } - - @Override - public String getFileName() { - return name; - } - } - - public static class ReaderCodeLoader extends CodeLoader { - public static final String DEFAULT_NAME = "CODE_LOADED_FROM_READER"; - - private Reader code; - - private String name; - - public ReaderCodeLoader(Reader code) { - this(code, DEFAULT_NAME); - } - - public ReaderCodeLoader(Reader code, String name) { - this.code = code; - this.name = name; - } - - @Override - public Reader getReader() { - return code; - } - - @Override - public String getFileName() { - return name; - } - } - - private CodeLoader cl; - - public SourceCode(CodeLoader cl) { - this.cl = cl; - } public List getCode() { return cl.getCode(); diff --git a/pmd-core/src/main/java/net/sourceforge/pmd/cpd/SourceManager.java b/pmd-core/src/main/java/net/sourceforge/pmd/cpd/SourceManager.java new file mode 100644 index 0000000000..bbcc75e051 --- /dev/null +++ b/pmd-core/src/main/java/net/sourceforge/pmd/cpd/SourceManager.java @@ -0,0 +1,43 @@ +/** + * BSD-style license; for more info see http://pmd.sourceforge.net/license.html + */ + +package net.sourceforge.pmd.cpd; + +import java.lang.ref.SoftReference; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; + +import net.sourceforge.pmd.internal.util.IOUtil; +import net.sourceforge.pmd.lang.document.TextDocument; +import net.sourceforge.pmd.lang.document.TextFile; + +public class SourceManager implements AutoCloseable { + + private final Map> files = new ConcurrentHashMap<>(); + private final List textFiles; + + public SourceManager(List files) { + textFiles = new ArrayList<>(files); + } + + + TextDocument get(String pathId) { + + } + + public int size() { + return files.size(); + } + + + @Override + public void close() throws Exception { + Exception exception = IOUtil.closeAll(textFiles); + if (exception != null) { + throw exception; + } + } +} diff --git a/pmd-core/src/main/java/net/sourceforge/pmd/cpd/TokenEntry.java b/pmd-core/src/main/java/net/sourceforge/pmd/cpd/TokenEntry.java index 2d5ad99b4a..96c89cebda 100644 --- a/pmd-core/src/main/java/net/sourceforge/pmd/cpd/TokenEntry.java +++ b/pmd-core/src/main/java/net/sourceforge/pmd/cpd/TokenEntry.java @@ -25,19 +25,6 @@ public class TokenEntry implements Comparable { private int identifier; private int hashCode; - private static final ThreadLocal> TOKENS = new ThreadLocal>() { - @Override - protected Map initialValue() { - return new HashMap<>(); - } - }; - private static final ThreadLocal TOKEN_COUNT = new ThreadLocal() { - @Override - protected AtomicInteger initialValue() { - return new AtomicInteger(0); - } - }; - private TokenEntry() { this.identifier = 0; this.tokenSrcID = "EOFMarker"; @@ -59,14 +46,6 @@ public class TokenEntry implements Comparable { this(image, tokenSrcID, beginLine, -1, -1); } - /** - * Creates a new token entry with the given informations. - * @param image - * @param tokenSrcID - * @param beginLine the linenumber, 1-based. - * @param beginColumn the column number, 1-based - * @param endColumn the column number, 1-based - */ public TokenEntry(String image, String tokenSrcID, int beginLine, int beginColumn, int endColumn) { assert isOk(beginLine) && isOk(beginColumn) && isOk(endColumn) : "Coordinates are 1-based"; setImage(image); @@ -74,7 +53,6 @@ public class TokenEntry implements Comparable { this.beginLine = beginLine; this.beginColumn = beginColumn; this.endColumn = endColumn; - this.index = TOKEN_COUNT.get().getAndIncrement(); } public TokenEntry(String image, FileLocation location) { diff --git a/pmd-core/src/main/java/net/sourceforge/pmd/cpd/TokenFactory.java b/pmd-core/src/main/java/net/sourceforge/pmd/cpd/TokenFactory.java new file mode 100644 index 0000000000..d72821f4ab --- /dev/null +++ b/pmd-core/src/main/java/net/sourceforge/pmd/cpd/TokenFactory.java @@ -0,0 +1,42 @@ +/** + * BSD-style license; for more info see http://pmd.sourceforge.net/license.html + */ + +package net.sourceforge.pmd.cpd; + +import net.sourceforge.pmd.lang.document.FileLocation; +import net.sourceforge.pmd.lang.document.TextFile; + +public interface TokenFactory { + + void recordToken(String image, int startLine, int startCol, int endLine, int endCol); + + default void recordToken(String image, FileLocation location) { + recordToken(image, location.getStartLine(), location.getStartColumn(), location.getEndLine(), location.getEndColumn()); + } + + void setImage(TokenEntry entry, String newImage); + + TokenEntry peekLastToken(); + + static TokenFactory forFile(TextFile file, Tokens sink) { + return new TokenFactory() { + final String name = file.getPathId(); + + @Override + public void recordToken(String image, int startLine, int startCol, int endLine, int endCol) { + sink.addToken(image, name, startLine, startCol, endLine, endCol); + } + + @Override + public void setImage(TokenEntry entry, String newImage) { + sink.setImage(entry, newImage); + } + + @Override + public TokenEntry peekLastToken() { + return sink.peekLastToken(); + } + }; + } +} diff --git a/pmd-core/src/main/java/net/sourceforge/pmd/cpd/Tokenizer.java b/pmd-core/src/main/java/net/sourceforge/pmd/cpd/Tokenizer.java index 2e0d77f770..8abe88cd86 100644 --- a/pmd-core/src/main/java/net/sourceforge/pmd/cpd/Tokenizer.java +++ b/pmd-core/src/main/java/net/sourceforge/pmd/cpd/Tokenizer.java @@ -43,6 +43,13 @@ public interface Tokenizer { .build(); + PropertyDescriptor CPD_CASE_SENSITIVE = + PropertyFactory.booleanProperty("cpdCaseSensitive") + .defaultValue(true) + .desc("Whether CPD should ignore the case of tokens. Affects all tokens.") + .build(); + + String IGNORE_LITERALS = "ignore_literals"; String IGNORE_IDENTIFIERS = "ignore_identifiers"; String IGNORE_ANNOTATIONS = "ignore_annotations"; @@ -75,5 +82,5 @@ public interface Tokenizer { String DEFAULT_SKIP_BLOCKS_PATTERN = "#if 0|#endif"; - void tokenize(TextDocument sourceCode, Tokens tokenEntries) throws IOException; + void tokenize(TextDocument sourceCode, TokenFactory tokenEntries) throws IOException; } diff --git a/pmd-core/src/main/java/net/sourceforge/pmd/cpd/Tokens.java b/pmd-core/src/main/java/net/sourceforge/pmd/cpd/Tokens.java index 0840613dcc..1be2d32a5c 100644 --- a/pmd-core/src/main/java/net/sourceforge/pmd/cpd/Tokens.java +++ b/pmd-core/src/main/java/net/sourceforge/pmd/cpd/Tokens.java @@ -5,17 +5,47 @@ package net.sourceforge.pmd.cpd; import java.util.ArrayList; +import java.util.HashMap; import java.util.Iterator; import java.util.List; +import java.util.Map; +import java.util.concurrent.atomic.AtomicInteger; + +import net.sourceforge.pmd.lang.document.FileLocation; public class Tokens { - private List tokens = new ArrayList<>(); + private final List tokens = new ArrayList<>(); + + private static final ThreadLocal> TOKENS = new ThreadLocal>() { + @Override + protected Map initialValue() { + return new HashMap<>(); + } + }; + private static final ThreadLocal TOKEN_COUNT = new ThreadLocal() { + @Override + protected AtomicInteger initialValue() { + return new AtomicInteger(0); + } + }; public void add(TokenEntry tokenEntry) { this.tokens.add(tokenEntry); } + public void addToken(String image, FileLocation location) { + this.tokens.add(new TokenEntry(image, location)); + } + + public void setImage(TokenEntry entry, String newImage) { + entry.setImage(newImage); + } + + public TokenEntry peekLastToken() { + return get(size() - 1); + } + public Iterator iterator() { return tokens.iterator(); } @@ -43,4 +73,8 @@ public class Tokens { public List getTokens() { return tokens; } + + void addToken(String image, String fileName, int startLine, int startCol, int endLine, int endCol) { + + } } diff --git a/pmd-core/src/main/java/net/sourceforge/pmd/cpd/internal/TokenizerBase.java b/pmd-core/src/main/java/net/sourceforge/pmd/cpd/internal/TokenizerBase.java index 07d6e9894f..6168c2f58b 100644 --- a/pmd-core/src/main/java/net/sourceforge/pmd/cpd/internal/TokenizerBase.java +++ b/pmd-core/src/main/java/net/sourceforge/pmd/cpd/internal/TokenizerBase.java @@ -6,9 +6,8 @@ package net.sourceforge.pmd.cpd.internal; import java.io.IOException; -import net.sourceforge.pmd.cpd.TokenEntry; +import net.sourceforge.pmd.cpd.TokenFactory; import net.sourceforge.pmd.cpd.Tokenizer; -import net.sourceforge.pmd.cpd.Tokens; import net.sourceforge.pmd.cpd.token.internal.BaseTokenFilter; import net.sourceforge.pmd.lang.TokenManager; import net.sourceforge.pmd.lang.ast.GenericToken; @@ -22,8 +21,8 @@ public abstract class TokenizerBase> implements Tokeni return new BaseTokenFilter<>(tokenManager); } - protected TokenEntry processToken(Tokens tokenEntries, T currentToken) { - return new TokenEntry(getImage(currentToken), currentToken.getReportLocation()); + protected void processToken(TokenFactory tokenEntries, T currentToken) { + tokenEntries.recordToken(getImage(currentToken), currentToken.getReportLocation()); } protected String getImage(T token) { @@ -31,11 +30,11 @@ public abstract class TokenizerBase> implements Tokeni } @Override - public void tokenize(TextDocument document, Tokens tokenEntries) throws IOException { + public void tokenize(TextDocument document, TokenFactory tokenEntries) throws IOException { TokenManager tokenManager = filterTokenStream(makeLexerImpl(document)); T currentToken = tokenManager.getNextToken(); while (currentToken != null) { - tokenEntries.add(processToken(tokenEntries, currentToken)); + processToken(tokenEntries, currentToken); currentToken = tokenManager.getNextToken(); } } diff --git a/pmd-core/src/main/java/net/sourceforge/pmd/lang/document/FileCollector.java b/pmd-core/src/main/java/net/sourceforge/pmd/lang/document/FileCollector.java index 516851b167..55c9a1c952 100644 --- a/pmd-core/src/main/java/net/sourceforge/pmd/lang/document/FileCollector.java +++ b/pmd-core/src/main/java/net/sourceforge/pmd/lang/document/FileCollector.java @@ -343,11 +343,16 @@ public final class FileCollector implements AutoCloseable { * @return True if the directory has been added */ public boolean addDirectory(Path dir) throws IOException { + return addDirectory(dir, true); + } + + public boolean addDirectory(Path dir, boolean recurse) throws IOException { if (!Files.isDirectory(dir)) { reporter.error("Not a directory {0}", dir); return false; } - Files.walkFileTree(dir, EnumSet.of(FileVisitOption.FOLLOW_LINKS), Integer.MAX_VALUE, new SimpleFileVisitor() { + int maxDepth = recurse ? Integer.MAX_VALUE : 1; + Files.walkFileTree(dir, EnumSet.of(FileVisitOption.FOLLOW_LINKS), maxDepth, new SimpleFileVisitor() { @Override public FileVisitResult visitFile(Path file, BasicFileAttributes attrs) throws IOException { if (attrs.isRegularFile()) { @@ -367,8 +372,18 @@ public final class FileCollector implements AutoCloseable { * @return True if the file or directory has been added */ public boolean addFileOrDirectory(Path file) throws IOException { + return addFileOrDirectory(file, true); + } + + /** + * Add a file or directory recursively. Language is determined automatically + * from the extension/file patterns. + * + * @return True if the file or directory has been added + */ + public boolean addFileOrDirectory(Path file, boolean recurseIfDirectory) throws IOException { if (Files.isDirectory(file)) { - return addDirectory(file); + return addDirectory(file, recurseIfDirectory); } else if (Files.isRegularFile(file)) { return addFile(file); } else { diff --git a/pmd-core/src/main/java/net/sourceforge/pmd/util/StringUtil.java b/pmd-core/src/main/java/net/sourceforge/pmd/util/StringUtil.java index 1913a713ff..c6413b59ee 100644 --- a/pmd-core/src/main/java/net/sourceforge/pmd/util/StringUtil.java +++ b/pmd-core/src/main/java/net/sourceforge/pmd/util/StringUtil.java @@ -5,7 +5,6 @@ package net.sourceforge.pmd.util; import java.text.MessageFormat; -import java.util.Arrays; import java.util.List; import java.util.Locale; import java.util.regex.Matcher; @@ -291,9 +290,8 @@ public final class StringUtil { /** * Returns a list of */ - public static List linesWithTrimIndent(String source) { - List lines = Arrays.asList(source.split("\n")); - List result = lines.stream().map(Chars::wrap).collect(CollectionUtil.toMutableList()); + public static List linesWithTrimIndent(Chars source) { + List result = source.lineStream().collect(CollectionUtil.toMutableList()); trimIndentInPlace(result); return result; } diff --git a/pmd-core/src/test/java/net/sourceforge/pmd/cpd/CPDTest.java b/pmd-core/src/test/java/net/sourceforge/pmd/cpd/CPDTest.java index a4f966ca1f..8ac89bcf74 100644 --- a/pmd-core/src/test/java/net/sourceforge/pmd/cpd/CPDTest.java +++ b/pmd-core/src/test/java/net/sourceforge/pmd/cpd/CPDTest.java @@ -146,10 +146,10 @@ class CPDTest { } @Override - public void addedFile(int fileCount, File file) { + public void addedFile(int fileCount) { files++; if (files > expectedFilesCount) { - fail("File was added! - " + file); + fail("File was added!"); } } diff --git a/pmd-core/src/test/java/net/sourceforge/pmd/cpd/CSVRendererTest.java b/pmd-core/src/test/java/net/sourceforge/pmd/cpd/CSVRendererTest.java index 942e838c34..d06d1e7c47 100644 --- a/pmd-core/src/test/java/net/sourceforge/pmd/cpd/CSVRendererTest.java +++ b/pmd-core/src/test/java/net/sourceforge/pmd/cpd/CSVRendererTest.java @@ -56,6 +56,8 @@ class CSVRendererTest { } private Mark createMark(String image, String tokenSrcID, int beginLine, int lineCount, String code) { + Tokens tokens = new Tokens(); + tokens.addToken(image, ); Mark result = new Mark(new TokenEntry(image, tokenSrcID, beginLine)); result.setLineCount(lineCount); diff --git a/pmd-cpp/src/test/java/net/sourceforge/pmd/cpd/CPPTokenizerTest.java b/pmd-cpp/src/test/java/net/sourceforge/pmd/cpd/CPPTokenizerTest.java index b4746b7f64..64cc27eb69 100644 --- a/pmd-cpp/src/test/java/net/sourceforge/pmd/cpd/CPPTokenizerTest.java +++ b/pmd-cpp/src/test/java/net/sourceforge/pmd/cpd/CPPTokenizerTest.java @@ -6,16 +6,17 @@ package net.sourceforge.pmd.cpd; import static org.junit.jupiter.api.Assertions.assertEquals; -import java.util.Properties; - +import org.checkerframework.checker.nullness.qual.NonNull; import org.junit.jupiter.api.Test; import net.sourceforge.pmd.cpd.test.CpdTextComparisonTest; +import net.sourceforge.pmd.cpd.test.LanguagePropertyConfig; +import net.sourceforge.pmd.lang.cpp.CppLanguageModule; class CPPTokenizerTest extends CpdTextComparisonTest { CPPTokenizerTest() { - super(".cpp"); + super(CppLanguageModule.getInstance(), ".cpp"); } @Override @@ -24,14 +25,7 @@ class CPPTokenizerTest extends CpdTextComparisonTest { } @Override - public Tokenizer newTokenizer(Properties props) { - CPPTokenizer tok = new CPPTokenizer(); - tok.setProperties(props); - return tok; - } - - @Override - public Properties defaultProperties() { + public @NonNull LanguagePropertyConfig defaultProperties() { return dontSkipBlocks(); } @@ -139,29 +133,30 @@ class CPPTokenizerTest extends CpdTextComparisonTest { doTest("listOfNumbers", "_ignored", skipLiteralSequences()); } - private static Properties skipBlocks(String skipPattern) { + private static LanguagePropertyConfig skipBlocks(String skipPattern) { return properties(true, skipPattern, false); } - private static Properties skipBlocks() { + private static LanguagePropertyConfig skipBlocks() { return skipBlocks(null); } - private static Properties dontSkipBlocks() { + private static LanguagePropertyConfig dontSkipBlocks() { return properties(false, null, false); } - private static Properties skipLiteralSequences() { + private static LanguagePropertyConfig skipLiteralSequences() { return properties(false, null, true); } - private static Properties properties(boolean skipBlocks, String skipPattern, boolean skipLiteralSequences) { - Properties properties = new Properties(); - properties.setProperty(Tokenizer.OPTION_SKIP_BLOCKS, Boolean.toString(skipBlocks)); - if (skipPattern != null) { - properties.setProperty(Tokenizer.OPTION_SKIP_BLOCKS_PATTERN, skipPattern); - } - properties.setProperty(Tokenizer.OPTION_IGNORE_LITERAL_SEQUENCES, Boolean.toString(skipLiteralSequences)); - return properties; + private static LanguagePropertyConfig properties(boolean skipBlocks, String skipPattern, boolean skipLiteralSequences) { + return properties -> { + if (!skipBlocks) { + properties.setProperty(CppLanguageModule.CPD_SKIP_BLOCKS, ""); + } else if (skipPattern != null) { + properties.setProperty(CppLanguageModule.CPD_SKIP_BLOCKS, skipPattern); + } + properties.setProperty(Tokenizer.CPD_IGNORE_LITERAL_SEQUENCES, skipLiteralSequences); + }; } } diff --git a/pmd-cs/src/test/java/net/sourceforge/pmd/cpd/CsTokenizerTest.java b/pmd-cs/src/test/java/net/sourceforge/pmd/cpd/CsTokenizerTest.java index 0901187e79..d2d3e78f80 100644 --- a/pmd-cs/src/test/java/net/sourceforge/pmd/cpd/CsTokenizerTest.java +++ b/pmd-cs/src/test/java/net/sourceforge/pmd/cpd/CsTokenizerTest.java @@ -8,9 +8,11 @@ import static org.junit.jupiter.api.Assertions.assertThrows; import java.util.Properties; +import org.jetbrains.annotations.NotNull; import org.junit.jupiter.api.Test; import net.sourceforge.pmd.cpd.test.CpdTextComparisonTest; +import net.sourceforge.pmd.lang.LanguagePropertyBundle; import net.sourceforge.pmd.lang.ast.TokenMgrError; class CsTokenizerTest extends CpdTextComparisonTest { @@ -24,13 +26,6 @@ class CsTokenizerTest extends CpdTextComparisonTest { return "../lang/cs/cpd/testdata"; } - @Override - public Tokenizer newTokenizer(Properties properties) { - CsTokenizer tok = new CsTokenizer(); - tok.setProperties(properties); - return tok; - } - @Test void testSimpleClass() { doTest("simpleClass"); diff --git a/pmd-dart/src/test/java/net/sourceforge/pmd/cpd/DartTokenizerTest.java b/pmd-dart/src/test/java/net/sourceforge/pmd/cpd/DartTokenizerTest.java index 5d08b1b849..bf89bff6a5 100644 --- a/pmd-dart/src/test/java/net/sourceforge/pmd/cpd/DartTokenizerTest.java +++ b/pmd-dart/src/test/java/net/sourceforge/pmd/cpd/DartTokenizerTest.java @@ -4,8 +4,6 @@ package net.sourceforge.pmd.cpd; -import java.util.Properties; - import org.junit.jupiter.api.Test; import net.sourceforge.pmd.cpd.test.CpdTextComparisonTest; @@ -16,11 +14,6 @@ class DartTokenizerTest extends CpdTextComparisonTest { super(".dart"); } - @Override - public Tokenizer newTokenizer(Properties properties) { - return new DartTokenizer(); - } - @Test void testComment() { diff --git a/pmd-fortran/src/test/java/net/sourceforge/pmd/cpd/FortranTokenizerTest.java b/pmd-fortran/src/test/java/net/sourceforge/pmd/cpd/FortranTokenizerTest.java index c07b363a0a..54adb90fa5 100644 --- a/pmd-fortran/src/test/java/net/sourceforge/pmd/cpd/FortranTokenizerTest.java +++ b/pmd-fortran/src/test/java/net/sourceforge/pmd/cpd/FortranTokenizerTest.java @@ -4,8 +4,6 @@ package net.sourceforge.pmd.cpd; -import java.util.Properties; - import org.junit.jupiter.api.Test; import net.sourceforge.pmd.cpd.test.CpdTextComparisonTest; @@ -25,11 +23,6 @@ class FortranTokenizerTest extends CpdTextComparisonTest { return "../lang/fortran/cpd/testdata"; } - @Override - public Tokenizer newTokenizer(Properties properties) { - return new FortranLanguage().getTokenizer(); - } - @Test void testSample() { doTest("sample"); diff --git a/pmd-gherkin/src/test/java/net/sourceforge/pmd/cpd/GherkinTokenizerTest.java b/pmd-gherkin/src/test/java/net/sourceforge/pmd/cpd/GherkinTokenizerTest.java index 9b0f20c667..86ba9bf7ab 100644 --- a/pmd-gherkin/src/test/java/net/sourceforge/pmd/cpd/GherkinTokenizerTest.java +++ b/pmd-gherkin/src/test/java/net/sourceforge/pmd/cpd/GherkinTokenizerTest.java @@ -4,12 +4,9 @@ package net.sourceforge.pmd.cpd; -import java.util.Properties; - import org.junit.jupiter.api.Test; import net.sourceforge.pmd.cpd.test.CpdTextComparisonTest; -import net.sourceforge.pmd.lang.gherkin.cpd.GherkinTokenizer; class GherkinTokenizerTest extends CpdTextComparisonTest { GherkinTokenizerTest() { @@ -21,12 +18,6 @@ class GherkinTokenizerTest extends CpdTextComparisonTest { return "../lang/gherkin/cpd/testdata"; } - @Override - public Tokenizer newTokenizer(Properties properties) { - GherkinTokenizer tok = new GherkinTokenizer(); - return tok; - } - @Test void testAnnotatedSource() { doTest("annotatedSource"); diff --git a/pmd-go/src/test/java/net/sourceforge/pmd/cpd/GoTokenizerTest.java b/pmd-go/src/test/java/net/sourceforge/pmd/cpd/GoTokenizerTest.java index eada52e8f1..b40d4ff3bb 100644 --- a/pmd-go/src/test/java/net/sourceforge/pmd/cpd/GoTokenizerTest.java +++ b/pmd-go/src/test/java/net/sourceforge/pmd/cpd/GoTokenizerTest.java @@ -4,8 +4,6 @@ package net.sourceforge.pmd.cpd; -import java.util.Properties; - import org.junit.jupiter.api.Test; import net.sourceforge.pmd.cpd.test.CpdTextComparisonTest; @@ -16,11 +14,6 @@ class GoTokenizerTest extends CpdTextComparisonTest { super(".go"); } - @Override - public Tokenizer newTokenizer(Properties properties) { - return new GoTokenizer(); - } - @Override protected String getResourcePrefix() { return "../lang/go/cpd/testdata"; diff --git a/pmd-groovy/src/main/java/net/sourceforge/pmd/cpd/GroovyTokenizer.java b/pmd-groovy/src/main/java/net/sourceforge/pmd/cpd/GroovyTokenizer.java index 79ecf7b6b5..1b9283c279 100644 --- a/pmd-groovy/src/main/java/net/sourceforge/pmd/cpd/GroovyTokenizer.java +++ b/pmd-groovy/src/main/java/net/sourceforge/pmd/cpd/GroovyTokenizer.java @@ -4,8 +4,6 @@ package net.sourceforge.pmd.cpd; -import java.io.StringReader; - import org.codehaus.groovy.antlr.SourceInfo; import org.codehaus.groovy.antlr.parser.GroovyLexer; @@ -22,10 +20,8 @@ import groovyjarjarantlr.TokenStreamException; public class GroovyTokenizer implements Tokenizer { @Override - public void tokenize(TextDocument sourceCode, Tokens tokenEntries) { - StringBuilder buffer = sourceCode.getCodeBuffer(); - - GroovyLexer lexer = new GroovyLexer(new StringReader(buffer.toString())); + public void tokenize(TextDocument sourceCode, TokenFactory tokenEntries) { + GroovyLexer lexer = new GroovyLexer(sourceCode.newReader()); TokenStream tokenStream = lexer.plumb(); try { @@ -36,15 +32,17 @@ public class GroovyTokenizer implements Tokenizer { int lastCol; + int lastLine; if (token instanceof SourceInfo) { lastCol = ((SourceInfo) token).getColumnLast(); + lastLine = ((SourceInfo) token).getLineLast(); } else { // fallback lastCol = token.getColumn() + tokenText.length(); + lastLine = token.getLine(); // todo inaccurate } - TokenEntry tokenEntry = new TokenEntry(tokenText, sourceCode.getFileName(), token.getLine(), token.getColumn(), lastCol); - tokenEntries.add(tokenEntry); + tokenEntries.recordToken(tokenText, token.getLine(), token.getColumn(), lastLine, lastCol); token = tokenStream.nextToken(); } } catch (TokenStreamException err) { @@ -53,8 +51,6 @@ public class GroovyTokenizer implements Tokenizer { // when CPD is executed with the '--skipLexicalErrors' command line // option throw new TokenMgrError(lexer.getLine(), lexer.getColumn(), lexer.getFilename(), err.getMessage(), err); - } finally { - tokenEntries.add(TokenEntry.getEOF()); } } } diff --git a/pmd-groovy/src/test/java/net/sourceforge/pmd/cpd/GroovyTokenizerTest.java b/pmd-groovy/src/test/java/net/sourceforge/pmd/cpd/GroovyTokenizerTest.java index a11b546a87..0cbb81b7d5 100644 --- a/pmd-groovy/src/test/java/net/sourceforge/pmd/cpd/GroovyTokenizerTest.java +++ b/pmd-groovy/src/test/java/net/sourceforge/pmd/cpd/GroovyTokenizerTest.java @@ -4,8 +4,6 @@ package net.sourceforge.pmd.cpd; -import java.util.Properties; - import org.junit.jupiter.api.Test; import net.sourceforge.pmd.cpd.test.CpdTextComparisonTest; @@ -21,11 +19,6 @@ class GroovyTokenizerTest extends CpdTextComparisonTest { return "../lang/groovy/cpd/testdata"; } - @Override - public Tokenizer newTokenizer(Properties properties) { - return new GroovyTokenizer(); - } - @Test void testSample() { doTest("sample"); diff --git a/pmd-html/src/main/java/net/sourceforge/pmd/lang/html/ast/HtmlTokenizer.java b/pmd-html/src/main/java/net/sourceforge/pmd/lang/html/ast/HtmlTokenizer.java index 8f70dc69f0..63fa4e45b6 100644 --- a/pmd-html/src/main/java/net/sourceforge/pmd/lang/html/ast/HtmlTokenizer.java +++ b/pmd-html/src/main/java/net/sourceforge/pmd/lang/html/ast/HtmlTokenizer.java @@ -7,33 +7,25 @@ package net.sourceforge.pmd.lang.html.ast; import java.io.IOException; import java.io.UncheckedIOException; -import net.sourceforge.pmd.cpd.TokenEntry; +import net.sourceforge.pmd.cpd.TokenFactory; import net.sourceforge.pmd.cpd.Tokenizer; -import net.sourceforge.pmd.cpd.Tokens; import net.sourceforge.pmd.lang.LanguageProcessor; import net.sourceforge.pmd.lang.LanguageProcessorRegistry; import net.sourceforge.pmd.lang.ast.Parser.ParserTask; import net.sourceforge.pmd.lang.ast.SemanticErrorReporter; import net.sourceforge.pmd.lang.document.TextDocument; -import net.sourceforge.pmd.lang.document.TextFile; import net.sourceforge.pmd.lang.html.HtmlLanguageModule; public class HtmlTokenizer implements Tokenizer { @Override - public void tokenize(TextDocument sourceCode, Tokens tokenEntries) { + public void tokenize(TextDocument sourceCode, TokenFactory tokenEntries) { HtmlLanguageModule html = HtmlLanguageModule.getInstance(); - try (LanguageProcessor processor = html.createProcessor(html.newPropertyBundle()); - TextFile tf = TextFile.forCharSeq( - sourceCode.getCodeBuffer(), - sourceCode.getFileName(), - html.getDefaultVersion() - ); - TextDocument textDoc = TextDocument.create(tf)) { + try (LanguageProcessor processor = html.createProcessor(html.newPropertyBundle())) { ParserTask task = new ParserTask( - textDoc, + sourceCode, SemanticErrorReporter.noop(), LanguageProcessorRegistry.singleton(processor) ); @@ -46,20 +38,17 @@ public class HtmlTokenizer implements Tokenizer { throw new UncheckedIOException(e); } catch (Exception e) { throw new RuntimeException(e); - } finally { - tokenEntries.add(TokenEntry.EOF); } } - private void traverse(HtmlNode node, Tokens tokenEntries) { + private void traverse(HtmlNode node, TokenFactory tokenEntries) { String image = node.getXPathNodeName(); if (node instanceof ASTHtmlTextNode) { image = ((ASTHtmlTextNode) node).getText(); } - TokenEntry token = new TokenEntry(image, node.getReportLocation()); - tokenEntries.add(token); + tokenEntries.recordToken(image, node.getReportLocation()); for (HtmlNode child : node.children()) { traverse(child, tokenEntries); diff --git a/pmd-html/src/test/java/net/sourceforge/pmd/lang/html/HtmlTokenizerTest.java b/pmd-html/src/test/java/net/sourceforge/pmd/lang/html/HtmlTokenizerTest.java index f98bf2ed96..5e17879ddf 100644 --- a/pmd-html/src/test/java/net/sourceforge/pmd/lang/html/HtmlTokenizerTest.java +++ b/pmd-html/src/test/java/net/sourceforge/pmd/lang/html/HtmlTokenizerTest.java @@ -5,13 +5,9 @@ package net.sourceforge.pmd.lang.html; -import java.util.Properties; - import org.junit.jupiter.api.Test; -import net.sourceforge.pmd.cpd.Tokenizer; import net.sourceforge.pmd.cpd.test.CpdTextComparisonTest; -import net.sourceforge.pmd.lang.html.ast.HtmlTokenizer; class HtmlTokenizerTest extends CpdTextComparisonTest { @@ -19,11 +15,6 @@ class HtmlTokenizerTest extends CpdTextComparisonTest { super(".html"); } - @Override - public Tokenizer newTokenizer(Properties properties) { - return new HtmlTokenizer(); - } - @Override protected String getResourcePrefix() { return "cpd"; diff --git a/pmd-java/src/main/java/net/sourceforge/pmd/cpd/JavaTokenizer.java b/pmd-java/src/main/java/net/sourceforge/pmd/cpd/JavaTokenizer.java index 8a54eca671..fdec888007 100644 --- a/pmd-java/src/main/java/net/sourceforge/pmd/cpd/JavaTokenizer.java +++ b/pmd-java/src/main/java/net/sourceforge/pmd/cpd/JavaTokenizer.java @@ -4,10 +4,8 @@ package net.sourceforge.pmd.cpd; -import java.io.IOException; import java.util.Deque; import java.util.LinkedList; -import java.util.Properties; import net.sourceforge.pmd.cpd.internal.JavaCCTokenizer; import net.sourceforge.pmd.cpd.token.JavaCCTokenFilter; @@ -17,6 +15,7 @@ import net.sourceforge.pmd.lang.ast.impl.javacc.JavaccToken; import net.sourceforge.pmd.lang.document.TextDocument; import net.sourceforge.pmd.lang.java.ast.InternalApiBridge; import net.sourceforge.pmd.lang.java.ast.JavaTokenKinds; +import net.sourceforge.pmd.lang.java.internal.JavaLanguageProperties; public class JavaTokenizer extends JavaCCTokenizer { @@ -29,16 +28,11 @@ public class JavaTokenizer extends JavaCCTokenizer { private ConstructorDetector constructorDetector; - public void setProperties(Properties properties) { - ignoreAnnotations = Boolean.parseBoolean(properties.getProperty(IGNORE_ANNOTATIONS, "false")); - ignoreLiterals = Boolean.parseBoolean(properties.getProperty(IGNORE_LITERALS, "false")); - ignoreIdentifiers = Boolean.parseBoolean(properties.getProperty(IGNORE_IDENTIFIERS, "false")); - } - - @Override - public void tokenize(TextDocument sourceCode, Tokens tokenEntries) throws IOException { + public JavaTokenizer(JavaLanguageProperties properties) { + ignoreAnnotations = properties.getProperty(Tokenizer.CPD_IGNORE_METADATA); + ignoreLiterals = properties.getProperty(Tokenizer.CPD_ANONYMiZE_LITERALS); + ignoreIdentifiers = properties.getProperty(Tokenizer.CPD_ANONYMIZE_IDENTIFIERS); constructorDetector = new ConstructorDetector(ignoreIdentifiers); - super.tokenize(sourceCode, tokenEntries); } @Override @@ -52,7 +46,7 @@ public class JavaTokenizer extends JavaCCTokenizer { } @Override - protected TokenEntry processToken(Tokens tokenEntries, JavaccToken javaToken) { + protected void processToken(TokenFactory tokenEntries, JavaccToken javaToken) { String image = javaToken.getImage(); constructorDetector.restoreConstructorToken(tokenEntries, javaToken); @@ -69,7 +63,7 @@ public class JavaTokenizer extends JavaCCTokenizer { constructorDetector.processToken(javaToken); - return new TokenEntry(image, javaToken.getReportLocation()); + tokenEntries.recordToken(image, javaToken.getReportLocation()); } public void setIgnoreLiterals(boolean ignore) { @@ -268,7 +262,7 @@ public class JavaTokenizer extends JavaCCTokenizer { storeNextIdentifier = true; } - public void restoreConstructorToken(Tokens tokenEntries, JavaccToken currentToken) { + public void restoreConstructorToken(TokenFactory tokenEntries, JavaccToken currentToken) { if (!ignoreIdentifiers) { return; } @@ -278,9 +272,8 @@ public class JavaTokenizer extends JavaCCTokenizer { // identifier if (!classMembersIndentations.isEmpty() && classMembersIndentations.peek().name.equals(prevIdentifier)) { - int lastTokenIndex = tokenEntries.size() - 1; - TokenEntry lastToken = tokenEntries.getTokens().get(lastTokenIndex); - lastToken.setImage(prevIdentifier); + TokenEntry lastToken = tokenEntries.peekLastToken(); + tokenEntries.setImage(lastToken, prevIdentifier); } } } diff --git a/pmd-java/src/test/java/net/sourceforge/pmd/cpd/JavaTokenizerTest.java b/pmd-java/src/test/java/net/sourceforge/pmd/cpd/JavaTokenizerTest.java index f2840b90f3..5f95ef8785 100644 --- a/pmd-java/src/test/java/net/sourceforge/pmd/cpd/JavaTokenizerTest.java +++ b/pmd-java/src/test/java/net/sourceforge/pmd/cpd/JavaTokenizerTest.java @@ -19,13 +19,6 @@ class JavaTokenizerTest extends CpdTextComparisonTest { super(".java"); } - @Override - public Tokenizer newTokenizer(Properties properties) { - JavaTokenizer javaTokenizer = new JavaTokenizer(); - javaTokenizer.setProperties(properties); - return javaTokenizer; - } - @Override protected String getResourcePrefix() { return "../lang/java/cpd/testdata"; diff --git a/pmd-javascript/src/test/java/net/sourceforge/pmd/cpd/AnyTokenizerForTypescriptTest.java b/pmd-javascript/src/test/java/net/sourceforge/pmd/cpd/AnyTokenizerForTypescriptTest.java index 79a48c9ea6..c70c736f07 100644 --- a/pmd-javascript/src/test/java/net/sourceforge/pmd/cpd/AnyTokenizerForTypescriptTest.java +++ b/pmd-javascript/src/test/java/net/sourceforge/pmd/cpd/AnyTokenizerForTypescriptTest.java @@ -4,8 +4,6 @@ package net.sourceforge.pmd.cpd; -import java.util.Properties; - import org.junit.jupiter.api.Test; import net.sourceforge.pmd.cpd.test.CpdTextComparisonTest; @@ -24,11 +22,6 @@ class AnyTokenizerForTypescriptTest extends CpdTextComparisonTest { return "testdata/ts"; } - @Override - public Tokenizer newTokenizer(Properties properties) { - return new AnyTokenizer(); - } - @Test void testFile1() { doTest("SampleTypeScript"); diff --git a/pmd-javascript/src/test/java/net/sourceforge/pmd/cpd/EcmascriptTokenizerTest.java b/pmd-javascript/src/test/java/net/sourceforge/pmd/cpd/EcmascriptTokenizerTest.java index 9554a436d9..d097841bbd 100644 --- a/pmd-javascript/src/test/java/net/sourceforge/pmd/cpd/EcmascriptTokenizerTest.java +++ b/pmd-javascript/src/test/java/net/sourceforge/pmd/cpd/EcmascriptTokenizerTest.java @@ -4,8 +4,6 @@ package net.sourceforge.pmd.cpd; -import java.util.Properties; - import org.junit.jupiter.api.Test; import net.sourceforge.pmd.cpd.test.CpdTextComparisonTest; @@ -16,11 +14,6 @@ class EcmascriptTokenizerTest extends CpdTextComparisonTest { super(".js"); } - @Override - public Tokenizer newTokenizer(Properties properties) { - return new EcmascriptTokenizer(); - } - @Override protected String getResourcePrefix() { return "../lang/ecmascript/cpd/testdata"; diff --git a/pmd-jsp/src/test/java/net/sourceforge/pmd/cpd/JSPTokenizerTest.java b/pmd-jsp/src/test/java/net/sourceforge/pmd/cpd/JSPTokenizerTest.java index fc4a29a13c..b1a3c3879e 100644 --- a/pmd-jsp/src/test/java/net/sourceforge/pmd/cpd/JSPTokenizerTest.java +++ b/pmd-jsp/src/test/java/net/sourceforge/pmd/cpd/JSPTokenizerTest.java @@ -4,8 +4,6 @@ package net.sourceforge.pmd.cpd; -import java.util.Properties; - import org.junit.jupiter.api.Test; import net.sourceforge.pmd.cpd.test.CpdTextComparisonTest; @@ -22,11 +20,6 @@ class JSPTokenizerTest extends CpdTextComparisonTest { return "../lang/jsp/cpd/testdata"; } - @Override - public Tokenizer newTokenizer(Properties properties) { - return new JSPTokenizer(); - } - @Test void scriptletWithString() { doTest("scriptletWithString"); diff --git a/pmd-kotlin/src/test/java/net/sourceforge/pmd/cpd/KotlinTokenizerTest.java b/pmd-kotlin/src/test/java/net/sourceforge/pmd/cpd/KotlinTokenizerTest.java index ab960e055d..2f9241296f 100644 --- a/pmd-kotlin/src/test/java/net/sourceforge/pmd/cpd/KotlinTokenizerTest.java +++ b/pmd-kotlin/src/test/java/net/sourceforge/pmd/cpd/KotlinTokenizerTest.java @@ -4,8 +4,6 @@ package net.sourceforge.pmd.cpd; -import java.util.Properties; - import org.junit.jupiter.api.Test; import net.sourceforge.pmd.cpd.test.CpdTextComparisonTest; @@ -21,11 +19,6 @@ class KotlinTokenizerTest extends CpdTextComparisonTest { return "../lang/kotlin/cpd/testdata"; } - @Override - public Tokenizer newTokenizer(Properties properties) { - return new KotlinTokenizer(); - } - @Test void testComments() { doTest("comment"); diff --git a/pmd-lang-test/src/main/kotlin/net/sourceforge/pmd/cpd/test/CpdTextComparisonTest.kt b/pmd-lang-test/src/main/kotlin/net/sourceforge/pmd/cpd/test/CpdTextComparisonTest.kt index 2a09f7874f..611caa3781 100644 --- a/pmd-lang-test/src/main/kotlin/net/sourceforge/pmd/cpd/test/CpdTextComparisonTest.kt +++ b/pmd-lang-test/src/main/kotlin/net/sourceforge/pmd/cpd/test/CpdTextComparisonTest.kt @@ -9,7 +9,11 @@ import net.sourceforge.pmd.cpd.SourceCode import net.sourceforge.pmd.cpd.TokenEntry import net.sourceforge.pmd.cpd.Tokenizer import net.sourceforge.pmd.cpd.Tokens +import net.sourceforge.pmd.lang.Language +import net.sourceforge.pmd.lang.LanguagePropertyBundle import net.sourceforge.pmd.lang.ast.TokenMgrError +import net.sourceforge.pmd.lang.document.TextDocument +import net.sourceforge.pmd.lang.document.TextFile import net.sourceforge.pmd.test.BaseTextComparisonTest import org.apache.commons.lang3.StringUtils import java.util.* @@ -22,10 +26,15 @@ import java.util.* * Baseline files are saved in txt files. */ abstract class CpdTextComparisonTest( - override val extensionIncludingDot: String + val language: Language, + override val extensionIncludingDot: String ) : BaseTextComparisonTest() { - abstract fun newTokenizer(properties: Properties): Tokenizer + + fun newTokenizer(config: LanguagePropertyConfig): Tokenizer { + val properties = language.newPropertyBundle().also { config.setProperties(it) } + return language.createCpdTokenizer(properties) + } override val resourceLoader: Class<*> get() = javaClass @@ -34,7 +43,11 @@ abstract class CpdTextComparisonTest( get() = "testdata" - open fun defaultProperties() = Properties() + open fun defaultProperties(): LanguagePropertyConfig = object : LanguagePropertyConfig { + override fun setProperties(properties: LanguagePropertyBundle) { + // use defaults + } + } /** * A test comparing the output of the tokenizer. @@ -42,14 +55,18 @@ abstract class CpdTextComparisonTest( * @param fileBaseName Name of the source file (without extension or resource prefix) * @param expectedSuffix Suffix to append to the expected file. This allows reusing the same source file * with different configurations, provided the suffix is different - * @param properties Properties to configure [newTokenizer] + * @param config Properties to configure the tokenizer */ @JvmOverloads - fun doTest(fileBaseName: String, expectedSuffix: String = "", properties: Properties = defaultProperties()) { + fun doTest( + fileBaseName: String, + expectedSuffix: String = "", + config: LanguagePropertyConfig = defaultProperties() + ) { super.doTest(fileBaseName, expectedSuffix) { fileData -> - val sourceCode = SourceCode(SourceCode.StringCodeLoader(fileData.fileText, fileData.fileName)) + val sourceCode = TextDocument.readOnlyString(fileBaseName, fileBaseName, language.defaultVersion) val tokens = Tokens().also { - val tokenizer = newTokenizer(properties) + val tokenizer = newTokenizer(config) tokenizer.tokenize(sourceCode, it) } @@ -61,14 +78,18 @@ abstract class CpdTextComparisonTest( fun expectTokenMgrError( source: String, fileName: String = SourceCode.StringCodeLoader.DEFAULT_NAME, - properties: Properties = defaultProperties() + properties: LanguagePropertyConfig = defaultProperties() ): TokenMgrError = expectTokenMgrError(FileData(fileName, source), properties) @JvmOverloads - fun expectTokenMgrError(fileData: FileData, properties: Properties = defaultProperties()): TokenMgrError = + fun expectTokenMgrError( + fileData: FileData, + config: LanguagePropertyConfig = defaultProperties() + ): TokenMgrError = shouldThrow { - newTokenizer(properties).tokenize(sourceCodeOf(fileData), Tokens()) + val tokenizer = newTokenizer(config) + tokenizer.tokenize(sourceCodeOf(fileData), Tokens()) } @@ -147,9 +168,11 @@ abstract class CpdTextComparisonTest( } - fun sourceCodeOf(str: String): SourceCode = SourceCode(SourceCode.StringCodeLoader(str)) - fun sourceCodeOf(fileData: FileData): SourceCode = - SourceCode(SourceCode.StringCodeLoader(fileData.fileText, fileData.fileName)) + fun sourceCodeOf(str: String): TextDocument = + sourceCodeOf(FileData(fileName = TextFile.UNKNOWN_FILENAME, fileText = str)) + + fun sourceCodeOf(fileData: FileData): TextDocument = + TextDocument.readOnlyString(fileData.fileText, fileData.fileName, language.defaultVersion) fun tokenize(tokenizer: Tokenizer, str: String): Tokens = Tokens().also { @@ -163,3 +186,7 @@ abstract class CpdTextComparisonTest( val ImageSize = Col0Width - Indent.length - 2 // -2 is for the "[]" } } + +interface LanguagePropertyConfig { + fun setProperties(properties: LanguagePropertyBundle) +} diff --git a/pmd-lua/src/test/java/net/sourceforge/pmd/cpd/LuaTokenizerTest.java b/pmd-lua/src/test/java/net/sourceforge/pmd/cpd/LuaTokenizerTest.java index 5062ebe242..5045837e2c 100644 --- a/pmd-lua/src/test/java/net/sourceforge/pmd/cpd/LuaTokenizerTest.java +++ b/pmd-lua/src/test/java/net/sourceforge/pmd/cpd/LuaTokenizerTest.java @@ -4,8 +4,6 @@ package net.sourceforge.pmd.cpd; -import java.util.Properties; - import org.junit.jupiter.api.Test; import net.sourceforge.pmd.cpd.test.CpdTextComparisonTest; @@ -20,11 +18,6 @@ class LuaTokenizerTest extends CpdTextComparisonTest { return "../lang/lua/cpd/testdata"; } - @Override - public Tokenizer newTokenizer(Properties properties) { - return new LuaTokenizer(); - } - @Test void testSimple() { doTest("helloworld"); diff --git a/pmd-matlab/src/test/java/net/sourceforge/pmd/cpd/MatlabTokenizerTest.java b/pmd-matlab/src/test/java/net/sourceforge/pmd/cpd/MatlabTokenizerTest.java index 4492c55ba9..73400c993e 100644 --- a/pmd-matlab/src/test/java/net/sourceforge/pmd/cpd/MatlabTokenizerTest.java +++ b/pmd-matlab/src/test/java/net/sourceforge/pmd/cpd/MatlabTokenizerTest.java @@ -4,8 +4,6 @@ package net.sourceforge.pmd.cpd; -import java.util.Properties; - import org.junit.jupiter.api.Test; import net.sourceforge.pmd.cpd.test.CpdTextComparisonTest; @@ -21,11 +19,6 @@ class MatlabTokenizerTest extends CpdTextComparisonTest { return "../lang/matlab/cpd/testdata"; } - @Override - public Tokenizer newTokenizer(Properties properties) { - return new MatlabTokenizer(); - } - @Test void testLongSample() { doTest("sample-matlab"); diff --git a/pmd-objectivec/src/test/java/net/sourceforge/pmd/cpd/ObjectiveCTokenizerTest.java b/pmd-objectivec/src/test/java/net/sourceforge/pmd/cpd/ObjectiveCTokenizerTest.java index bb199ea7ac..edb8979a1c 100644 --- a/pmd-objectivec/src/test/java/net/sourceforge/pmd/cpd/ObjectiveCTokenizerTest.java +++ b/pmd-objectivec/src/test/java/net/sourceforge/pmd/cpd/ObjectiveCTokenizerTest.java @@ -4,8 +4,6 @@ package net.sourceforge.pmd.cpd; -import java.util.Properties; - import org.junit.jupiter.api.Test; import net.sourceforge.pmd.cpd.test.CpdTextComparisonTest; @@ -22,11 +20,6 @@ class ObjectiveCTokenizerTest extends CpdTextComparisonTest { return "../lang/objc/cpd/testdata"; } - @Override - public Tokenizer newTokenizer(Properties properties) { - return new ObjectiveCTokenizer(); - } - @Test void testLongSample() { doTest("big_sample"); diff --git a/pmd-perl/src/test/java/net/sourceforge/pmd/lang/perl/cpd/PerlTokenizerTest.java b/pmd-perl/src/test/java/net/sourceforge/pmd/lang/perl/cpd/PerlTokenizerTest.java index 43830a85d1..ec2a8393fb 100644 --- a/pmd-perl/src/test/java/net/sourceforge/pmd/lang/perl/cpd/PerlTokenizerTest.java +++ b/pmd-perl/src/test/java/net/sourceforge/pmd/lang/perl/cpd/PerlTokenizerTest.java @@ -4,12 +4,8 @@ package net.sourceforge.pmd.lang.perl.cpd; -import java.util.Properties; - import org.junit.jupiter.api.Test; -import net.sourceforge.pmd.cpd.PerlLanguage; -import net.sourceforge.pmd.cpd.Tokenizer; import net.sourceforge.pmd.cpd.test.CpdTextComparisonTest; /** @@ -21,11 +17,6 @@ class PerlTokenizerTest extends CpdTextComparisonTest { super(".pl"); } - @Override - public Tokenizer newTokenizer(Properties properties) { - return new PerlLanguage().getTokenizer(); - } - @Test void testSample() { doTest("sample"); diff --git a/pmd-php/src/main/java/net/sourceforge/pmd/cpd/PHPTokenizer.java b/pmd-php/src/main/java/net/sourceforge/pmd/cpd/PHPTokenizer.java index b63cfecd0e..56f3615697 100644 --- a/pmd-php/src/main/java/net/sourceforge/pmd/cpd/PHPTokenizer.java +++ b/pmd-php/src/main/java/net/sourceforge/pmd/cpd/PHPTokenizer.java @@ -4,27 +4,9 @@ package net.sourceforge.pmd.cpd; -import java.util.List; - -import net.sourceforge.pmd.lang.document.TextDocument; - /** * Simple tokenizer for PHP. */ -public class PHPTokenizer implements Tokenizer { +public class PHPTokenizer extends AnyTokenizer { - @Override - public void tokenize(TextDocument tokens, Tokens tokenEntries) { - List code = tokens.getCode(); - for (int i = 0; i < code.size(); i++) { - String currentLine = code.get(i); - for (int j = 0; j < currentLine.length(); j++) { - char tok = currentLine.charAt(j); - if (!Character.isWhitespace(tok) && tok != '{' && tok != '}' && tok != ';') { - tokenEntries.add(new TokenEntry(String.valueOf(tok), tokens.getFileName(), i + 1)); - } - } - } - tokenEntries.add(TokenEntry.getEOF()); - } } diff --git a/pmd-plsql/src/main/java/net/sourceforge/pmd/cpd/PLSQLLanguage.java b/pmd-plsql/src/main/java/net/sourceforge/pmd/cpd/PLSQLLanguage.java deleted file mode 100755 index 5331ec9f72..0000000000 --- a/pmd-plsql/src/main/java/net/sourceforge/pmd/cpd/PLSQLLanguage.java +++ /dev/null @@ -1,31 +0,0 @@ -/** - * BSD-style license; for more info see http://pmd.sourceforge.net/license.html - */ - -package net.sourceforge.pmd.cpd; - -import java.util.Properties; - -/** - * - * @author Stuart Turton sturton@users.sourceforge.net - */ -public class PLSQLLanguage extends AbstractLanguage { - public PLSQLLanguage() { - super("PL/SQL", "plsql", new PLSQLTokenizer(), - ".sql", - ".trg", // Triggers - ".prc", ".fnc", // Standalone Procedures and Functions - ".pld", // Oracle*Forms - ".pls", ".plh", ".plb", // Packages - ".pck", ".pks", ".pkh", ".pkb", // Packages - ".typ", ".tyb", // Object Types - ".tps", ".tpb" // Object Types - ); - } - - @Override - public final void setProperties(Properties properties) { - ((PLSQLTokenizer) getTokenizer()).setProperties(properties); - } -} diff --git a/pmd-plsql/src/main/java/net/sourceforge/pmd/cpd/PLSQLTokenizer.java b/pmd-plsql/src/main/java/net/sourceforge/pmd/cpd/PLSQLTokenizer.java index 4d66cf3089..91abf4f844 100644 --- a/pmd-plsql/src/main/java/net/sourceforge/pmd/cpd/PLSQLTokenizer.java +++ b/pmd-plsql/src/main/java/net/sourceforge/pmd/cpd/PLSQLTokenizer.java @@ -4,46 +4,28 @@ package net.sourceforge.pmd.cpd; -import java.util.Properties; - import net.sourceforge.pmd.cpd.internal.JavaCCTokenizer; +import net.sourceforge.pmd.lang.LanguagePropertyBundle; import net.sourceforge.pmd.lang.TokenManager; +import net.sourceforge.pmd.lang.ast.impl.javacc.CharStream; import net.sourceforge.pmd.lang.ast.impl.javacc.JavaccToken; import net.sourceforge.pmd.lang.document.TextDocument; import net.sourceforge.pmd.lang.plsql.ast.PLSQLTokenKinds; public class PLSQLTokenizer extends JavaCCTokenizer { - // This is actually useless, the comments are special tokens, never taken into account by CPD - @Deprecated - public static final String IGNORE_COMMENTS = "ignore_comments"; - public static final String IGNORE_IDENTIFIERS = "ignore_identifiers"; - public static final String IGNORE_LITERALS = "ignore_literals"; - private boolean ignoreIdentifiers; - private boolean ignoreLiterals; + private final boolean ignoreIdentifiers; + private final boolean ignoreLiterals; - public void setProperties(Properties properties) { + public PLSQLTokenizer(LanguagePropertyBundle properties) { /* * The Tokenizer is derived from PLDoc, in which comments are very * important When looking for duplication, we are probably not * interested in comment variation, so we shall default ignoreComments * to true */ - ignoreIdentifiers = Boolean.parseBoolean(properties.getProperty(IGNORE_IDENTIFIERS, "false")); - ignoreLiterals = Boolean.parseBoolean(properties.getProperty(IGNORE_LITERALS, "false")); - } - - @Deprecated - public void setIgnoreComments(boolean ignore) { - // This is actually useless, the comments are special tokens, never taken into account by CPD - } - - public void setIgnoreLiterals(boolean ignore) { - this.ignoreLiterals = ignore; - } - - public void setIgnoreIdentifiers(boolean ignore) { - this.ignoreIdentifiers = ignore; + ignoreIdentifiers = properties.getProperty(Tokenizer.CPD_ANONYMIZE_IDENTIFIERS); + ignoreLiterals = properties.getProperty(Tokenizer.CPD_ANONYMiZE_LITERALS); } @Override @@ -67,6 +49,6 @@ public class PLSQLTokenizer extends JavaCCTokenizer { @Override protected TokenManager makeLexerImpl(TextDocument doc) { - return PLSQLTokenKinds.newTokenManager(doc); + return PLSQLTokenKinds.newTokenManager(CharStream.create(doc)); } } diff --git a/pmd-plsql/src/main/java/net/sourceforge/pmd/lang/plsql/PLSQLLanguageModule.java b/pmd-plsql/src/main/java/net/sourceforge/pmd/lang/plsql/PLSQLLanguageModule.java index 0b74d631c2..6428d5938b 100644 --- a/pmd-plsql/src/main/java/net/sourceforge/pmd/lang/plsql/PLSQLLanguageModule.java +++ b/pmd-plsql/src/main/java/net/sourceforge/pmd/lang/plsql/PLSQLLanguageModule.java @@ -4,6 +4,11 @@ package net.sourceforge.pmd.lang.plsql; +import net.sourceforge.pmd.cpd.PLSQLTokenizer; +import net.sourceforge.pmd.cpd.Tokenizer; +import net.sourceforge.pmd.lang.Language; +import net.sourceforge.pmd.lang.LanguagePropertyBundle; +import net.sourceforge.pmd.lang.LanguageRegistry; import net.sourceforge.pmd.lang.impl.SimpleLanguageModuleBase; /** @@ -31,4 +36,21 @@ public class PLSQLLanguageModule extends SimpleLanguageModuleBase { new PLSQLHandler() ); } + + @Override + public LanguagePropertyBundle newPropertyBundle() { + LanguagePropertyBundle bundle = super.newPropertyBundle(); + bundle.definePropertyDescriptor(Tokenizer.CPD_ANONYMiZE_LITERALS); + bundle.definePropertyDescriptor(Tokenizer.CPD_ANONYMIZE_IDENTIFIERS); + return bundle; + } + + @Override + public Tokenizer createCpdTokenizer(LanguagePropertyBundle bundle) { + return new PLSQLTokenizer(bundle); + } + + public static Language getInstance() { + return LanguageRegistry.PMD.getLanguageById("plsql"); + } } diff --git a/pmd-plsql/src/main/resources/META-INF/services/net.sourceforge.pmd.cpd.Language b/pmd-plsql/src/main/resources/META-INF/services/net.sourceforge.pmd.cpd.Language deleted file mode 100644 index cb05edd026..0000000000 --- a/pmd-plsql/src/main/resources/META-INF/services/net.sourceforge.pmd.cpd.Language +++ /dev/null @@ -1 +0,0 @@ -net.sourceforge.pmd.cpd.PLSQLLanguage diff --git a/pmd-plsql/src/test/java/net/sourceforge/pmd/cpd/PLSQLTokenizerTest.java b/pmd-plsql/src/test/java/net/sourceforge/pmd/cpd/PLSQLTokenizerTest.java index cfd5a3260f..280c1a0eeb 100644 --- a/pmd-plsql/src/test/java/net/sourceforge/pmd/cpd/PLSQLTokenizerTest.java +++ b/pmd-plsql/src/test/java/net/sourceforge/pmd/cpd/PLSQLTokenizerTest.java @@ -4,16 +4,15 @@ package net.sourceforge.pmd.cpd; -import java.util.Properties; - import org.junit.jupiter.api.Test; import net.sourceforge.pmd.cpd.test.CpdTextComparisonTest; +import net.sourceforge.pmd.lang.plsql.PLSQLLanguageModule; class PLSQLTokenizerTest extends CpdTextComparisonTest { PLSQLTokenizerTest() { - super(".sql"); + super(PLSQLLanguageModule.getInstance(), ".sql"); } @Override @@ -21,12 +20,7 @@ class PLSQLTokenizerTest extends CpdTextComparisonTest { return "../lang/plsql/cpd/testdata"; } - @Override - public Tokenizer newTokenizer(Properties properties) { - return new PLSQLTokenizer(); - } - @Test void testSimple() { doTest("sample-plsql"); diff --git a/pmd-python/src/test/java/net/sourceforge/pmd/cpd/PythonTokenizerTest.java b/pmd-python/src/test/java/net/sourceforge/pmd/cpd/PythonTokenizerTest.java index 5b22f66402..b5c7988cc1 100644 --- a/pmd-python/src/test/java/net/sourceforge/pmd/cpd/PythonTokenizerTest.java +++ b/pmd-python/src/test/java/net/sourceforge/pmd/cpd/PythonTokenizerTest.java @@ -4,8 +4,6 @@ package net.sourceforge.pmd.cpd; -import java.util.Properties; - import org.junit.jupiter.api.Test; import net.sourceforge.pmd.cpd.test.CpdTextComparisonTest; @@ -21,12 +19,7 @@ class PythonTokenizerTest extends CpdTextComparisonTest { return "../lang/python/cpd/testdata"; } - @Override - public Tokenizer newTokenizer(Properties properties) { - return new PythonTokenizer(); - } - - + @Test void sampleTest() { doTest("sample_python"); diff --git a/pmd-ruby/src/test/java/net/sourceforge/pmd/cpd/RubyTokenizerTest.java b/pmd-ruby/src/test/java/net/sourceforge/pmd/cpd/RubyTokenizerTest.java index 48704a2527..dfe8574b64 100644 --- a/pmd-ruby/src/test/java/net/sourceforge/pmd/cpd/RubyTokenizerTest.java +++ b/pmd-ruby/src/test/java/net/sourceforge/pmd/cpd/RubyTokenizerTest.java @@ -4,8 +4,6 @@ package net.sourceforge.pmd.cpd; -import java.util.Properties; - import org.junit.jupiter.api.Test; import net.sourceforge.pmd.cpd.test.CpdTextComparisonTest; @@ -21,11 +19,6 @@ class RubyTokenizerTest extends CpdTextComparisonTest { return "../lang/ruby/cpd/testdata"; } - @Override - public Tokenizer newTokenizer(Properties properties) { - return new RubyLanguage().getTokenizer(); - } - @Test void testSimple() { diff --git a/pmd-scala-modules/pmd-scala-common/src/main/java/net/sourceforge/pmd/cpd/ScalaTokenizer.java b/pmd-scala-modules/pmd-scala-common/src/main/java/net/sourceforge/pmd/cpd/ScalaTokenizer.java index 5c2b98892b..0fca0e0d6a 100644 --- a/pmd-scala-modules/pmd-scala-common/src/main/java/net/sourceforge/pmd/cpd/ScalaTokenizer.java +++ b/pmd-scala-modules/pmd-scala-common/src/main/java/net/sourceforge/pmd/cpd/ScalaTokenizer.java @@ -4,7 +4,6 @@ package net.sourceforge.pmd.cpd; -import java.io.IOException; import java.util.Properties; import org.apache.commons.lang3.StringUtils; @@ -13,7 +12,6 @@ import net.sourceforge.pmd.cpd.token.internal.BaseTokenFilter; import net.sourceforge.pmd.lang.LanguageVersion; import net.sourceforge.pmd.lang.TokenManager; import net.sourceforge.pmd.lang.ast.TokenMgrError; -import net.sourceforge.pmd.lang.document.CpdCompat; import net.sourceforge.pmd.lang.document.TextDocument; import net.sourceforge.pmd.lang.scala.ScalaLanguageModule; @@ -62,20 +60,20 @@ public class ScalaTokenizer implements Tokenizer { } @Override - public void tokenize(TextDocument sourceCode, Tokens tokenEntries) throws IOException { + public void tokenize(TextDocument sourceCode, TokenFactory tokenEntries) { - try (TextDocument textDoc = TextDocument.create(CpdCompat.cpdCompat(sourceCode))) { - String fullCode = textDoc.getText().toString(); + try { + String fullCode = sourceCode.getText().toString(); // create the input file for scala - Input.VirtualFile vf = new Input.VirtualFile(sourceCode.getFileName(), fullCode); + Input.VirtualFile vf = new Input.VirtualFile(sourceCode.getDisplayName(), fullCode); ScalametaTokenizer tokenizer = new ScalametaTokenizer(vf, dialect); // tokenize with a filter scala.meta.tokens.Tokens tokens = tokenizer.tokenize(); // use extensions to the standard PMD TokenManager and Filter - ScalaTokenManager scalaTokenManager = new ScalaTokenManager(tokens.iterator(), textDoc); + ScalaTokenManager scalaTokenManager = new ScalaTokenManager(tokens.iterator(), sourceCode); ScalaTokenFilter filter = new ScalaTokenFilter(scalaTokenManager); ScalaTokenAdapter token; @@ -83,21 +81,19 @@ public class ScalaTokenizer implements Tokenizer { if (StringUtils.isEmpty(token.getImage())) { continue; } - TokenEntry cpdToken = new TokenEntry(token.getImage(), - token.getReportLocation()); - tokenEntries.add(cpdToken); + tokenEntries.recordToken(token.getImage(), + token.getReportLocation()); } } catch (Exception e) { if (e instanceof TokenizeException) { // NOPMD // cannot catch it as it's a checked exception and Scala sneaky throws TokenizeException tokE = (TokenizeException) e; Position pos = tokE.pos(); - throw new TokenMgrError(pos.startLine() + 1, pos.startColumn() + 1, sourceCode.getFileName(), "Scalameta threw", tokE); + throw new TokenMgrError( + pos.startLine() + 1, pos.startColumn() + 1, sourceCode.getDisplayName(), "Scalameta threw", tokE); } else { throw e; } - } finally { - tokenEntries.add(TokenEntry.getEOF()); } } diff --git a/pmd-scala-modules/pmd-scala-common/src/test/java/net/sourceforge/pmd/cpd/ScalaTokenizerTest.java b/pmd-scala-modules/pmd-scala-common/src/test/java/net/sourceforge/pmd/cpd/ScalaTokenizerTest.java index 49cc0f76c4..8bda7ffea0 100644 --- a/pmd-scala-modules/pmd-scala-common/src/test/java/net/sourceforge/pmd/cpd/ScalaTokenizerTest.java +++ b/pmd-scala-modules/pmd-scala-common/src/test/java/net/sourceforge/pmd/cpd/ScalaTokenizerTest.java @@ -6,8 +6,6 @@ package net.sourceforge.pmd.cpd; import static org.junit.jupiter.api.Assertions.assertThrows; -import java.util.Properties; - import org.junit.jupiter.api.Test; import net.sourceforge.pmd.cpd.test.CpdTextComparisonTest; @@ -24,11 +22,6 @@ class ScalaTokenizerTest extends CpdTextComparisonTest { return "../lang/scala/cpd/testdata"; } - @Override - public Tokenizer newTokenizer(Properties properties) { - return new ScalaTokenizer(); - } - @Test void testSample() { doTest("sample-LiftActor"); diff --git a/pmd-swift/src/test/java/net/sourceforge/pmd/cpd/SwiftTokenizerTest.java b/pmd-swift/src/test/java/net/sourceforge/pmd/cpd/SwiftTokenizerTest.java index 4969321f77..b63688ed7b 100644 --- a/pmd-swift/src/test/java/net/sourceforge/pmd/cpd/SwiftTokenizerTest.java +++ b/pmd-swift/src/test/java/net/sourceforge/pmd/cpd/SwiftTokenizerTest.java @@ -4,8 +4,6 @@ package net.sourceforge.pmd.cpd; -import java.util.Properties; - import org.junit.jupiter.api.Test; import net.sourceforge.pmd.cpd.test.CpdTextComparisonTest; @@ -21,11 +19,6 @@ class SwiftTokenizerTest extends CpdTextComparisonTest { return "../lang/swift/cpd/testdata"; } - @Override - public Tokenizer newTokenizer(Properties properties) { - return new SwiftTokenizer(); - } - @Test void testSwift42() { diff --git a/pmd-visualforce/src/test/java/net/sourceforge/pmd/lang/vf/cpd/VfTokenizerTest.java b/pmd-visualforce/src/test/java/net/sourceforge/pmd/lang/vf/cpd/VfTokenizerTest.java index 487f4ed9ec..07842f43c6 100644 --- a/pmd-visualforce/src/test/java/net/sourceforge/pmd/lang/vf/cpd/VfTokenizerTest.java +++ b/pmd-visualforce/src/test/java/net/sourceforge/pmd/lang/vf/cpd/VfTokenizerTest.java @@ -5,12 +5,8 @@ package net.sourceforge.pmd.lang.vf.cpd; -import java.util.Properties; - import org.junit.jupiter.api.Test; -import net.sourceforge.pmd.cpd.Tokenizer; -import net.sourceforge.pmd.cpd.VfTokenizer; import net.sourceforge.pmd.cpd.test.CpdTextComparisonTest; class VfTokenizerTest extends CpdTextComparisonTest { @@ -19,12 +15,6 @@ class VfTokenizerTest extends CpdTextComparisonTest { super(".page"); } - @Override - public Tokenizer newTokenizer(Properties properties) { - VfTokenizer tokenizer = new VfTokenizer(); - return tokenizer; - } - @Test void testTokenize() { doTest("SampleUnescapeElWithTab"); diff --git a/pmd-xml/src/test/java/net/sourceforge/pmd/xml/cpd/XmlCPDTokenizerTest.java b/pmd-xml/src/test/java/net/sourceforge/pmd/xml/cpd/XmlCPDTokenizerTest.java index 194a219b2f..9fc4f39400 100644 --- a/pmd-xml/src/test/java/net/sourceforge/pmd/xml/cpd/XmlCPDTokenizerTest.java +++ b/pmd-xml/src/test/java/net/sourceforge/pmd/xml/cpd/XmlCPDTokenizerTest.java @@ -4,11 +4,8 @@ package net.sourceforge.pmd.xml.cpd; -import java.util.Properties; - import org.junit.jupiter.api.Test; -import net.sourceforge.pmd.cpd.Tokenizer; import net.sourceforge.pmd.cpd.test.CpdTextComparisonTest; class XmlCPDTokenizerTest extends CpdTextComparisonTest { @@ -17,11 +14,6 @@ class XmlCPDTokenizerTest extends CpdTextComparisonTest { super(".xml"); } - @Override - public Tokenizer newTokenizer(Properties properties) { - return new XmlTokenizer(); - } - @Test void tokenizeTest() { doTest("simple");