diff --git a/pmd-apex/src/main/java/net/sourceforge/pmd/cpd/ApexLanguage.java b/pmd-apex/src/main/java/net/sourceforge/pmd/cpd/ApexLanguage.java
deleted file mode 100644
index 0bb7bd7014..0000000000
--- a/pmd-apex/src/main/java/net/sourceforge/pmd/cpd/ApexLanguage.java
+++ /dev/null
@@ -1,25 +0,0 @@
-/**
- * BSD-style license; for more info see http://pmd.sourceforge.net/license.html
- */
-
-package net.sourceforge.pmd.cpd;
-
-import java.util.Properties;
-
-public class ApexLanguage extends AbstractLanguage {
-
- public ApexLanguage() {
- this(new Properties());
- }
-
- public ApexLanguage(Properties properties) {
- super("Apex", "apex", new ApexTokenizer(), ".cls");
- setProperties(properties);
- }
-
- @Override
- public final void setProperties(Properties properties) {
- ApexTokenizer tokenizer = (ApexTokenizer) getTokenizer();
- tokenizer.setProperties(properties);
- }
-}
diff --git a/pmd-apex/src/main/java/net/sourceforge/pmd/cpd/ApexTokenizer.java b/pmd-apex/src/main/java/net/sourceforge/pmd/cpd/ApexTokenizer.java
index d2a01abb01..37872c2cc2 100644
--- a/pmd-apex/src/main/java/net/sourceforge/pmd/cpd/ApexTokenizer.java
+++ b/pmd-apex/src/main/java/net/sourceforge/pmd/cpd/ApexTokenizer.java
@@ -5,67 +5,30 @@
package net.sourceforge.pmd.cpd;
import java.util.Locale;
-import java.util.Properties;
-import org.antlr.runtime.ANTLRStringStream;
-import org.antlr.runtime.Lexer;
-import org.antlr.runtime.Token;
+import org.antlr.v4.runtime.CharStream;
-import net.sourceforge.pmd.lang.apex.ApexJorjeLogging;
-import net.sourceforge.pmd.lang.ast.TokenMgrError;
-import net.sourceforge.pmd.lang.document.TextDocument;
+import net.sourceforge.pmd.cpd.internal.AntlrTokenizer;
+import net.sourceforge.pmd.lang.apex.ApexLanguageProperties;
+import net.sourceforge.pmd.lang.ast.impl.antlr4.AntlrToken;
-import apex.jorje.parser.impl.ApexLexer;
+public class ApexTokenizer extends AntlrTokenizer {
+ private final boolean caseSensitive;
-public class ApexTokenizer implements Tokenizer {
-
- public ApexTokenizer() {
- ApexJorjeLogging.disableLogging();
- }
-
- /**
- * If the properties is false
(default), then the case of any token
- * is ignored.
- */
- public static final String CASE_SENSITIVE = "net.sourceforge.pmd.cpd.ApexTokenizer.caseSensitive";
-
- private boolean caseSensitive;
-
- public void setProperties(Properties properties) {
- caseSensitive = Boolean.parseBoolean(properties.getProperty(CASE_SENSITIVE, "false"));
+ public ApexTokenizer(ApexLanguageProperties properties) {
+ this.caseSensitive = properties.getProperty(Tokenizer.CPD_CASE_SENSITIVE);
}
@Override
- public void tokenize(TextDocument sourceCode, Tokens tokenEntries) {
- StringBuilder code = sourceCode.getCodeBuffer();
-
- ANTLRStringStream ass = new ANTLRStringStream(code.toString());
- ApexLexer lexer = new ApexLexer(ass) {
- @Override
- public void emitErrorMessage(String msg) {
- throw new TokenMgrError(getLine(), getCharPositionInLine(), getSourceName(), msg, null);
- }
- };
-
- try {
- Token token = lexer.nextToken();
-
- while (token.getType() != Token.EOF) {
- if (token.getChannel() != Lexer.HIDDEN) {
- String tokenText = token.getText();
- if (!caseSensitive) {
- tokenText = tokenText.toLowerCase(Locale.ROOT);
- }
- TokenEntry tokenEntry = new TokenEntry(tokenText, sourceCode.getFileName(),
- token.getLine(),
- token.getCharPositionInLine() + 1,
- token.getCharPositionInLine() + tokenText.length() + 1);
- tokenEntries.add(tokenEntry);
- }
- token = lexer.nextToken();
- }
- } finally {
- tokenEntries.add(TokenEntry.getEOF());
+ protected String getImage(AntlrToken token) {
+ if (caseSensitive) {
+ return token.getImage();
}
+ return token.getImage().toLowerCase(Locale.ROOT);
+ }
+
+ @Override
+ protected org.antlr.v4.runtime.Lexer getLexerForSource(CharStream charStream) {
+ return new com.nawforce.runtime.parsers.ApexLexer(charStream);
}
}
diff --git a/pmd-apex/src/main/java/net/sourceforge/pmd/lang/apex/ApexLanguageModule.java b/pmd-apex/src/main/java/net/sourceforge/pmd/lang/apex/ApexLanguageModule.java
index 7b57934bd7..690c228c0b 100644
--- a/pmd-apex/src/main/java/net/sourceforge/pmd/lang/apex/ApexLanguageModule.java
+++ b/pmd-apex/src/main/java/net/sourceforge/pmd/lang/apex/ApexLanguageModule.java
@@ -4,6 +4,8 @@
package net.sourceforge.pmd.lang.apex;
+import net.sourceforge.pmd.cpd.ApexTokenizer;
+import net.sourceforge.pmd.cpd.Tokenizer;
import net.sourceforge.pmd.lang.Language;
import net.sourceforge.pmd.lang.LanguageModuleBase;
import net.sourceforge.pmd.lang.LanguageProcessor;
@@ -32,6 +34,11 @@ public class ApexLanguageModule extends LanguageModuleBase {
return new ApexLanguageProcessor((ApexLanguageProperties) bundle);
}
+ @Override
+ public Tokenizer createCpdTokenizer(LanguagePropertyBundle bundle) {
+ return new ApexTokenizer((ApexLanguageProperties) bundle);
+ }
+
public static Language getInstance() {
return LanguageRegistry.PMD.getLanguageByFullName(NAME);
}
diff --git a/pmd-apex/src/main/java/net/sourceforge/pmd/lang/apex/ApexLanguageProperties.java b/pmd-apex/src/main/java/net/sourceforge/pmd/lang/apex/ApexLanguageProperties.java
index 1b33565565..3431c89dd5 100644
--- a/pmd-apex/src/main/java/net/sourceforge/pmd/lang/apex/ApexLanguageProperties.java
+++ b/pmd-apex/src/main/java/net/sourceforge/pmd/lang/apex/ApexLanguageProperties.java
@@ -4,6 +4,7 @@
package net.sourceforge.pmd.lang.apex;
+import net.sourceforge.pmd.cpd.Tokenizer;
import net.sourceforge.pmd.lang.LanguagePropertyBundle;
import net.sourceforge.pmd.properties.PropertyDescriptor;
import net.sourceforge.pmd.properties.PropertyFactory;
@@ -23,6 +24,7 @@ public class ApexLanguageProperties extends LanguagePropertyBundle {
public ApexLanguageProperties() {
super(ApexLanguageModule.getInstance());
definePropertyDescriptor(MULTIFILE_DIRECTORY);
+ definePropertyDescriptor(Tokenizer.CPD_CASE_SENSITIVE);
}
diff --git a/pmd-apex/src/main/resources/META-INF/services/net.sourceforge.pmd.cpd.Language b/pmd-apex/src/main/resources/META-INF/services/net.sourceforge.pmd.cpd.Language
deleted file mode 100644
index fd84d4a22b..0000000000
--- a/pmd-apex/src/main/resources/META-INF/services/net.sourceforge.pmd.cpd.Language
+++ /dev/null
@@ -1 +0,0 @@
-net.sourceforge.pmd.cpd.ApexLanguage
diff --git a/pmd-apex/src/test/java/net/sourceforge/pmd/cpd/ApexTokenizerTest.java b/pmd-apex/src/test/java/net/sourceforge/pmd/cpd/ApexTokenizerTest.java
index aeb4a51252..47fd06778b 100644
--- a/pmd-apex/src/test/java/net/sourceforge/pmd/cpd/ApexTokenizerTest.java
+++ b/pmd-apex/src/test/java/net/sourceforge/pmd/cpd/ApexTokenizerTest.java
@@ -4,16 +4,16 @@
package net.sourceforge.pmd.cpd;
-import java.util.Properties;
-
import org.junit.jupiter.api.Test;
import net.sourceforge.pmd.cpd.test.CpdTextComparisonTest;
+import net.sourceforge.pmd.cpd.test.LanguagePropertyConfig;
+import net.sourceforge.pmd.lang.apex.ApexLanguageModule;
class ApexTokenizerTest extends CpdTextComparisonTest {
ApexTokenizerTest() {
- super(".cls");
+ super(ApexLanguageModule.getInstance(), ".cls");
}
@Override
@@ -21,13 +21,6 @@ class ApexTokenizerTest extends CpdTextComparisonTest {
return "../lang/apex/cpd/testdata";
}
- @Override
- public Tokenizer newTokenizer(Properties properties) {
- ApexTokenizer tokenizer = new ApexTokenizer();
- tokenizer.setProperties(properties);
- return tokenizer;
- }
-
@Test
void testTokenize() {
@@ -52,14 +45,12 @@ class ApexTokenizerTest extends CpdTextComparisonTest {
doTest("tabWidth");
}
- private Properties caseSensitive() {
+ private LanguagePropertyConfig caseSensitive() {
return properties(true);
}
- private Properties properties(boolean caseSensitive) {
- Properties properties = new Properties();
- properties.setProperty(ApexTokenizer.CASE_SENSITIVE, Boolean.toString(caseSensitive));
- return properties;
+ private LanguagePropertyConfig properties(boolean caseSensitive) {
+ return properties -> properties.setProperty(Tokenizer.CPD_CASE_SENSITIVE, caseSensitive);
}
}
diff --git a/pmd-cli/src/main/java/net/sourceforge/pmd/cli/commands/internal/CpdCommand.java b/pmd-cli/src/main/java/net/sourceforge/pmd/cli/commands/internal/CpdCommand.java
index da3fdacdc0..b228f2ee92 100644
--- a/pmd-cli/src/main/java/net/sourceforge/pmd/cli/commands/internal/CpdCommand.java
+++ b/pmd-cli/src/main/java/net/sourceforge/pmd/cli/commands/internal/CpdCommand.java
@@ -116,7 +116,7 @@ public class CpdCommand extends AbstractAnalysisPmdSubcommand {
configuration.setSkipDuplicates(skipDuplicates);
configuration.setSkipLexicalErrors(skipLexicalErrors);
configuration.setSourceEncoding(encoding.getEncoding().name());
- configuration.setURI(uri == null ? null : uri.toString());
+ configuration.setURI(uri);
configuration.postContruct();
// Pass extra parameters as System properties to allow language
diff --git a/pmd-core/src/main/java/net/sourceforge/pmd/cpd/AbstractTokenizer.java b/pmd-core/src/main/java/net/sourceforge/pmd/cpd/AbstractTokenizer.java
deleted file mode 100644
index 5db9827346..0000000000
--- a/pmd-core/src/main/java/net/sourceforge/pmd/cpd/AbstractTokenizer.java
+++ /dev/null
@@ -1,184 +0,0 @@
-/**
- * BSD-style license; for more info see http://pmd.sourceforge.net/license.html
- */
-
-package net.sourceforge.pmd.cpd;
-
-import java.util.List;
-import java.util.Locale;
-
-import net.sourceforge.pmd.lang.document.TextDocument;
-
-/**
- *
- * @author Zev Blut zb@ubit.com
- * @author Romain PELISSE belaran@gmail.com
- *
- * @deprecated Use an {@link AnyTokenizer} instead, it's basically as powerful.
- */
-@Deprecated
-public abstract class AbstractTokenizer implements Tokenizer {
-
- // FIXME depending on subclasses to assign local vars is rather fragile -
- // better to make private and setup via explicit hook methods
-
- protected List stringToken; // List, should be set by sub
- // classes
- protected List ignorableCharacter; // List, should be set by
- // sub classes
- // FIXME:Maybe an array of 'char'
- // would be better for
- // performance ?
- protected List ignorableStmt; // List, should be set by sub
- // classes
- protected char oneLineCommentChar = '#'; // Most script languages ( shell,
- // ruby, python,...) use this
- // symbol for comment line
-
- private List code;
- private int lineNumber = 0;
- private String currentLine;
-
- // both zero-based
- private int tokBeginLine;
- private int tokBeginCol;
-
- protected boolean spanMultipleLinesString = true; // Most languages do, so
- // default is true
- protected Character spanMultipleLinesLineContinuationCharacter = null;
-
- private boolean downcaseString = true;
-
- @Override
- public void tokenize(TextDocument tokens, Tokens tokenEntries) {
- code = tokens.getCode();
-
- for (lineNumber = 0; lineNumber < code.size(); lineNumber++) {
- currentLine = code.get(lineNumber);
- int loc = 0;
- while (loc < currentLine.length()) {
- StringBuilder token = new StringBuilder();
- loc = getTokenFromLine(token, loc); // may jump several lines
-
- if (token.length() > 0 && !isIgnorableString(token.toString())) {
- final String image;
- if (downcaseString) {
- image = token.toString().toLowerCase(Locale.ROOT);
- } else {
- image = token.toString();
- }
-
- tokenEntries.add(new TokenEntry(image,
- tokens.getFileName(),
- tokBeginLine + 1,
- tokBeginCol + 1,
- loc + 1));
- }
- }
- }
- tokenEntries.add(TokenEntry.getEOF());
- }
-
- /**
- * Returns (0-based) EXclusive offset of the end of the token,
- * may jump several lines (sets {@link #lineNumber} in this case).
- */
- private int getTokenFromLine(StringBuilder token, int loc) {
- tokBeginLine = lineNumber;
- tokBeginCol = loc;
-
- for (int j = loc; j < currentLine.length(); j++) {
- char tok = currentLine.charAt(j);
- if (!Character.isWhitespace(tok) && !ignoreCharacter(tok)) {
- if (isComment(tok)) {
- if (token.length() > 0) {
- return j;
- } else {
- return getCommentToken(token, loc);
- }
- } else if (isString(tok)) {
- if (token.length() > 0) {
- return j; // we need to now parse the string as a
- // separate token.
- } else {
- // we are at the start of a string
- return parseString(token, j, tok);
- }
- } else {
- token.append(tok);
- }
- } else {
- if (token.length() > 0) {
- return j;
- } else {
- // ignored char
- tokBeginCol++;
- }
- }
- loc = j;
- }
- return loc + 1;
- }
-
- private int parseString(StringBuilder token, int loc, char stringDelimiter) {
- boolean escaped = false;
- boolean done = false;
- char tok;
- while (loc < currentLine.length() && !done) {
- tok = currentLine.charAt(loc);
- if (escaped && tok == stringDelimiter) { // Found an escaped string
- escaped = false;
- } else if (tok == stringDelimiter && token.length() > 0) {
- // We are done, we found the end of the string...
- done = true;
- } else {
- // Found an escaped char?
- escaped = tok == '\\';
- }
- // Adding char to String:" + token.toString());
- token.append(tok);
- loc++;
- }
- // Handling multiple lines string
- if (!done // ... we didn't find the end of the string (but the end of the line)
- && spanMultipleLinesString // ... the language allow multiple line span Strings
- && lineNumber < code.size() - 1 // ... there is still more lines to parse
- ) {
- // removes last character, if it is the line continuation (e.g.
- // backslash) character
- if (spanMultipleLinesLineContinuationCharacter != null
- && token.length() > 0
- && token.charAt(token.length() - 1) == spanMultipleLinesLineContinuationCharacter) {
- token.setLength(token.length() - 1);
- }
- // parsing new line
- currentLine = code.get(++lineNumber);
- // Warning : recursive call !
- loc = parseString(token, 0, stringDelimiter);
- }
- return loc;
- }
-
- private boolean ignoreCharacter(char tok) {
- return ignorableCharacter.contains(String.valueOf(tok));
- }
-
- private boolean isString(char tok) {
- return stringToken.contains(String.valueOf(tok));
- }
-
- private boolean isComment(char tok) {
- return tok == oneLineCommentChar;
- }
-
- private int getCommentToken(StringBuilder token, int loc) {
- while (loc < currentLine.length()) {
- token.append(currentLine.charAt(loc++));
- }
- return loc;
- }
-
- private boolean isIgnorableString(String token) {
- return ignorableStmt.contains(token);
- }
-}
diff --git a/pmd-core/src/main/java/net/sourceforge/pmd/cpd/AnyTokenizer.java b/pmd-core/src/main/java/net/sourceforge/pmd/cpd/AnyTokenizer.java
index 6e02dda6b8..671644eae6 100644
--- a/pmd-core/src/main/java/net/sourceforge/pmd/cpd/AnyTokenizer.java
+++ b/pmd-core/src/main/java/net/sourceforge/pmd/cpd/AnyTokenizer.java
@@ -9,6 +9,7 @@ import java.util.regex.Pattern;
import org.apache.commons.lang3.StringUtils;
+import net.sourceforge.pmd.lang.document.Chars;
import net.sourceforge.pmd.lang.document.TextDocument;
import net.sourceforge.pmd.util.StringUtil;
@@ -61,36 +62,32 @@ public class AnyTokenizer implements Tokenizer {
}
@Override
- public void tokenize(TextDocument sourceCode, Tokens tokenEntries) {
- CharSequence text = sourceCode.getCodeBuffer();
+ public void tokenize(TextDocument sourceCode, TokenFactory tokenEntries) {
+ Chars text = sourceCode.getText();
Matcher matcher = pattern.matcher(text);
int lineNo = 1;
int lastLineStart = 0;
- try {
- while (matcher.find()) {
- String image = matcher.group();
- if (isComment(image)) {
- continue;
- } else if (StringUtils.isWhitespace(image)) {
- lineNo++;
- lastLineStart = matcher.end();
- continue;
- }
-
- int bline = lineNo;
- int bcol = 1 + matcher.start() - lastLineStart; // + 1 because columns are 1 based
- int ecol = StringUtil.columnNumberAt(image, image.length()); // this already outputs a 1-based column
- if (ecol == image.length() + 1) {
- ecol = bcol + image.length(); // single-line token
- } else {
- // multiline, need to update the line count
- lineNo += StringUtil.lineNumberAt(image, image.length()) - 1;
- lastLineStart = matcher.start() + image.length() - ecol + 1;
- }
- tokenEntries.add(new TokenEntry(image, sourceCode.getFileName(), bline, bcol, ecol));
+ while (matcher.find()) {
+ String image = matcher.group();
+ if (isComment(image)) {
+ continue;
+ } else if (StringUtils.isWhitespace(image)) {
+ lineNo++;
+ lastLineStart = matcher.end();
+ continue;
}
- } finally {
- tokenEntries.add(TokenEntry.getEOF());
+
+ int bline = lineNo;
+ int bcol = 1 + matcher.start() - lastLineStart; // + 1 because columns are 1 based
+ int ecol = StringUtil.columnNumberAt(image, image.length()); // this already outputs a 1-based column
+ if (ecol == image.length() + 1) {
+ ecol = bcol + image.length(); // single-line token
+ } else {
+ // multiline, need to update the line count
+ lineNo += StringUtil.lineNumberAt(image, image.length()) - 1;
+ lastLineStart = matcher.start() + image.length() - ecol + 1;
+ }
+ tokenEntries.recordToken(image, bline, bcol, lineNo, ecol);
}
}
diff --git a/pmd-core/src/main/java/net/sourceforge/pmd/cpd/CPD.java b/pmd-core/src/main/java/net/sourceforge/pmd/cpd/CPD.java
index a4bf7e24c2..3c8a6e0265 100644
--- a/pmd-core/src/main/java/net/sourceforge/pmd/cpd/CPD.java
+++ b/pmd-core/src/main/java/net/sourceforge/pmd/cpd/CPD.java
@@ -27,6 +27,9 @@ import net.sourceforge.pmd.internal.util.FileFinder;
import net.sourceforge.pmd.internal.util.FileUtil;
import net.sourceforge.pmd.internal.util.IOUtil;
import net.sourceforge.pmd.lang.ast.TokenMgrError;
+import net.sourceforge.pmd.lang.document.SourceCode;
+import net.sourceforge.pmd.lang.document.TextDocument;
+import net.sourceforge.pmd.lang.document.TextFile;
import net.sourceforge.pmd.util.database.DBMSMetadata;
import net.sourceforge.pmd.util.database.DBURI;
import net.sourceforge.pmd.util.database.SourceObject;
@@ -41,7 +44,7 @@ public class CPD {
private CPDConfiguration configuration;
- private Map source = new TreeMap<>();
+ private final SourceManager sourceManager = new SourceManager();
private CPDListener listener = new CPDNullListener();
private Tokens tokens = new Tokens();
private MatchAlgorithm matchAlgorithm;
@@ -128,8 +131,8 @@ public class CPD {
}
public void go() {
- log.debug("Running match algorithm on {} files...", source.size());
- matchAlgorithm = new MatchAlgorithm(source, tokens, configuration.getMinimumTileSize(), listener);
+ log.debug("Running match algorithm on {} files...", sourceManager.size());
+ matchAlgorithm = new MatchAlgorithm(sourceManager, tokens, configuration.getMinimumTileSize(), listener);
matchAlgorithm.findMatches();
log.debug("Finished: {} duplicates found", matchAlgorithm.getMatches().size());
}
@@ -216,8 +219,7 @@ public class CPD {
}
}
- @Experimental
- public void add(SourceCode sourceCode) throws IOException {
+ private void add(SourceCode sourceCode) throws IOException {
if (configuration.isSkipLexicalErrors()) {
addAndSkipLexicalErrors(sourceCode);
} else {
@@ -226,11 +228,13 @@ public class CPD {
}
private void addAndThrowLexicalError(SourceCode sourceCode) throws IOException {
- log.debug("Tokenizing {}", sourceCode.getFileName());
- configuration.tokenizer().tokenize(sourceCode, tokens);
- listener.addedFile(1, new File(sourceCode.getFileName()));
- source.put(sourceCode.getFileName(), sourceCode);
- numberOfTokensPerFile.put(sourceCode.getFileName(), tokens.size() - lastTokenSize - 1 /*EOF*/);
+ log.debug("Tokenizing {}", sourceCode.getPathId());
+ try (TextDocument doc = sourceCode.load()) {
+ configuration.tokenizer().tokenize(doc, tokens);
+ }
+ listener.addedFile(1);
+ source.put(sourceCode.getPathId(), sourceCode);
+ numberOfTokensPerFile.put(sourceCode.getPathId(), tokens.size() - lastTokenSize - 1 /*EOF*/);
lastTokenSize = tokens.size();
}
@@ -239,7 +243,7 @@ public class CPD {
try {
addAndThrowLexicalError(sourceCode);
} catch (TokenMgrError e) {
- System.err.println("Skipping " + sourceCode.getFileName() + ". Reason: " + e.getMessage());
+ System.err.println("Skipping " + sourceCode.getDisplayName() + ". Reason: " + e.getMessage());
savedState.restore(tokens);
}
}
@@ -253,15 +257,6 @@ public class CPD {
return new ArrayList<>(source.keySet());
}
- /**
- * Get each Source to be processed.
- *
- * @return all Sources to be processed
- */
- public List getSources() {
- return new ArrayList<>(source.values());
- }
-
/**
* Entry to invoke CPD as command line tool. Note that this will
* invoke {@link System#exit(int)}.
diff --git a/pmd-core/src/main/java/net/sourceforge/pmd/cpd/CPDConfiguration.java b/pmd-core/src/main/java/net/sourceforge/pmd/cpd/CPDConfiguration.java
index efe6550261..2a4950d2f8 100644
--- a/pmd-core/src/main/java/net/sourceforge/pmd/cpd/CPDConfiguration.java
+++ b/pmd-core/src/main/java/net/sourceforge/pmd/cpd/CPDConfiguration.java
@@ -11,6 +11,7 @@ import java.io.FilenameFilter;
import java.io.Reader;
import java.lang.reflect.InvocationTargetException;
import java.lang.reflect.Method;
+import java.net.URI;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
@@ -24,6 +25,8 @@ import net.sourceforge.pmd.AbstractConfiguration;
import net.sourceforge.pmd.cpd.renderer.CPDReportRenderer;
import net.sourceforge.pmd.internal.util.FileFinder;
import net.sourceforge.pmd.internal.util.FileUtil;
+import net.sourceforge.pmd.lang.document.TextDocument;
+import net.sourceforge.pmd.lang.document.TextFile;
/**
*
@@ -79,7 +82,7 @@ public class CPDConfiguration extends AbstractConfiguration {
private boolean nonRecursive;
- private String uri;
+ private URI uri;
private boolean help;
@@ -87,7 +90,7 @@ public class CPDConfiguration extends AbstractConfiguration {
private boolean debug = false;
- public SourceCode sourceCodeFor(File file) {
+ public TextFile sourceCodeFor(File file) {
return new SourceCode(new SourceCode.FileCodeLoader(file, getSourceEncoding().name()));
}
@@ -340,11 +343,11 @@ public class CPDConfiguration extends AbstractConfiguration {
this.fileListPath = fileListPath;
}
- public String getURI() {
+ public URI getURI() {
return uri;
}
- public void setURI(String uri) {
+ public void setURI(URI uri) {
this.uri = uri;
}
diff --git a/pmd-core/src/main/java/net/sourceforge/pmd/cpd/CPDListener.java b/pmd-core/src/main/java/net/sourceforge/pmd/cpd/CPDListener.java
index ae180b4c97..6f361d1afb 100644
--- a/pmd-core/src/main/java/net/sourceforge/pmd/cpd/CPDListener.java
+++ b/pmd-core/src/main/java/net/sourceforge/pmd/cpd/CPDListener.java
@@ -4,8 +4,6 @@
package net.sourceforge.pmd.cpd;
-import java.io.File;
-
public interface CPDListener {
int INIT = 0;
@@ -14,7 +12,7 @@ public interface CPDListener {
int GROUPING = 3;
int DONE = 4;
- void addedFile(int fileCount, File file);
+ void addedFile(int fileCount);
void phaseUpdate(int phase);
}
diff --git a/pmd-core/src/main/java/net/sourceforge/pmd/cpd/CPDNullListener.java b/pmd-core/src/main/java/net/sourceforge/pmd/cpd/CPDNullListener.java
index 64b6060166..3566a9a6cf 100644
--- a/pmd-core/src/main/java/net/sourceforge/pmd/cpd/CPDNullListener.java
+++ b/pmd-core/src/main/java/net/sourceforge/pmd/cpd/CPDNullListener.java
@@ -4,11 +4,9 @@
package net.sourceforge.pmd.cpd;
-import java.io.File;
-
public class CPDNullListener implements CPDListener {
@Override
- public void addedFile(int fileCount, File file) {
+ public void addedFile(int fileCount) {
// does nothing - override it if necessary
}
diff --git a/pmd-core/src/main/java/net/sourceforge/pmd/cpd/CpdAnalysis.java b/pmd-core/src/main/java/net/sourceforge/pmd/cpd/CpdAnalysis.java
new file mode 100644
index 0000000000..e0cd1aa5b2
--- /dev/null
+++ b/pmd-core/src/main/java/net/sourceforge/pmd/cpd/CpdAnalysis.java
@@ -0,0 +1,147 @@
+/**
+ * BSD-style license; for more info see http://pmd.sourceforge.net/license.html
+ */
+
+package net.sourceforge.pmd.cpd;
+
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.net.URI;
+import java.net.URISyntaxException;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import net.sourceforge.pmd.internal.util.FileCollectionUtil;
+import net.sourceforge.pmd.internal.util.FileFinder;
+import net.sourceforge.pmd.internal.util.FileUtil;
+import net.sourceforge.pmd.internal.util.IOUtil;
+import net.sourceforge.pmd.lang.ast.TokenMgrError;
+import net.sourceforge.pmd.lang.document.FileCollector;
+import net.sourceforge.pmd.lang.document.TextDocument;
+import net.sourceforge.pmd.util.database.DBMSMetadata;
+import net.sourceforge.pmd.util.database.DBURI;
+import net.sourceforge.pmd.util.database.SourceObject;
+import net.sourceforge.pmd.util.log.MessageReporter;
+
+/**
+ * @deprecated Use the module pmd-cli for CLI support.
+ */
+@Deprecated
+public class CpdAnalysis {
+
+ private CPDConfiguration configuration;
+ private FileCollector files;
+ private MessageReporter reporter;
+ private CPDListener listener;
+
+
+ public CpdAnalysis(CPDConfiguration theConfiguration) {
+ configuration = theConfiguration;
+
+ // Add all sources
+ try {
+ extractAllSources();
+ } catch (IOException e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ public FileCollector files() {
+ return files;
+ }
+
+ private void extractAllSources() throws IOException {
+ // Add files
+ if (null != configuration.getFiles() && !configuration.getFiles().isEmpty()) {
+ addSourcesFilesToCPD(configuration.getFiles());
+ }
+
+ // Add Database URIS
+ if (null != configuration.getURI()) {
+ FileCollectionUtil.collectDB(files(), configuration.getURI());
+ }
+
+ if (null != configuration.getFileListPath()) {
+ FileCollectionUtil.collectFileList(files(), FileUtil.toExistingPath(configuration.getFileListPath()));
+ }
+ }
+
+ private void addSourcesFilesToCPD(List files) throws IOException {
+ for (File file : files) {
+ files().addFileOrDirectory(file.toPath());
+ }
+ }
+
+ public void setCpdListener(CPDListener cpdListener) {
+ this.listener = cpdListener;
+ }
+
+ private void addAndThrowLexicalError(SourceCode sourceCode) throws IOException {
+ log.debug("Tokenizing {}", sourceCode.getPathId());
+ try (TextDocument doc = sourceCode.load()) {
+ configuration.tokenizer().tokenize(doc, tokens);
+ }
+ listener.addedFile(1);
+ source.put(sourceCode.getPathId(), sourceCode);
+ numberOfTokensPerFile.put(sourceCode.getPathId(), tokens.size() - lastTokenSize - 1 /*EOF*/);
+ lastTokenSize = tokens.size();
+ }
+
+ public CPDReport performAnalysis() {
+
+ try (SourceManager sourceManager = new SourceManager(files.getCollectedFiles())) {
+ Tokens tokens = new Tokens();
+
+
+ log.debug("Running match algorithm on {} files...", sourceManager.size());
+ MatchAlgorithm matchAlgorithm = new MatchAlgorithm(sourceManager, tokens, configuration.getMinimumTileSize(), listener);
+ matchAlgorithm.findMatches();
+ log.debug("Finished: {} duplicates found", matchAlgorithm.getMatches().size());
+
+
+
+ } catch (Exception e) {
+ reporter.errorEx("Exception while running CPD", e);
+ }
+ }
+
+ public void add(File file) throws IOException {
+
+ if (configuration.isSkipDuplicates()) {
+ // TODO refactor this thing into a separate class
+ String signature = file.getName() + '_' + file.length();
+ if (current.contains(signature)) {
+ System.err.println("Skipping " + file.getAbsolutePath()
+ + " since it appears to be a duplicate file and --skip-duplicate-files is set");
+ return;
+ }
+ current.add(signature);
+ }
+
+ if (!IOUtil.equalsNormalizedPaths(file.getAbsoluteFile().getCanonicalPath(), file.getAbsolutePath())) {
+ System.err.println("Skipping " + file + " since it appears to be a symlink");
+ return;
+ }
+
+ if (!file.exists()) {
+ System.err.println("Skipping " + file + " since it doesn't exist (broken symlink?)");
+ return;
+ }
+
+ SourceCode sourceCode = configuration.sourceCodeFor(file);
+ add(sourceCode);
+ }
+
+
+}
diff --git a/pmd-core/src/main/java/net/sourceforge/pmd/cpd/GUI.java b/pmd-core/src/main/java/net/sourceforge/pmd/cpd/GUI.java
index d3aa2ae9eb..13fd308305 100644
--- a/pmd-core/src/main/java/net/sourceforge/pmd/cpd/GUI.java
+++ b/pmd-core/src/main/java/net/sourceforge/pmd/cpd/GUI.java
@@ -911,7 +911,7 @@ public class GUI implements CPDListener {
}
@Override
- public void addedFile(int fileCount, File file) {
+ public void addedFile(int fileCount) {
tokenizingFilesBar.setMaximum(fileCount);
tokenizingFilesBar.setValue(tokenizingFilesBar.getValue() + 1);
}
diff --git a/pmd-core/src/main/java/net/sourceforge/pmd/cpd/Mark.java b/pmd-core/src/main/java/net/sourceforge/pmd/cpd/Mark.java
index 30e13c4b04..4cfe7dfd73 100644
--- a/pmd-core/src/main/java/net/sourceforge/pmd/cpd/Mark.java
+++ b/pmd-core/src/main/java/net/sourceforge/pmd/cpd/Mark.java
@@ -4,11 +4,14 @@
package net.sourceforge.pmd.cpd;
+import net.sourceforge.pmd.lang.document.Chars;
+import net.sourceforge.pmd.lang.document.TextDocument;
+
public class Mark implements Comparable {
private TokenEntry token;
private TokenEntry endToken;
private int lineCount;
- private SourceCode code;
+ private TextDocument code;
public Mark(TokenEntry token) {
this.token = token;
@@ -69,11 +72,13 @@ public class Mark implements Comparable {
}
/** Newlines are normalized to \n. */
- public String getSourceCodeSlice() {
- return this.code.getSlice(getBeginLine(), getEndLine());
+ public Chars getSourceCodeSlice() {
+ return this.code.sliceOriginalText(
+ this.code.createLineRange(getBeginLine(), getEndLine())
+ );
}
- public void setSourceCode(SourceCode code) {
+ public void setSourceCode(TextDocument code) {
this.code = code;
}
diff --git a/pmd-core/src/main/java/net/sourceforge/pmd/cpd/Match.java b/pmd-core/src/main/java/net/sourceforge/pmd/cpd/Match.java
index 992f551e9c..3dbb8e2a9e 100644
--- a/pmd-core/src/main/java/net/sourceforge/pmd/cpd/Match.java
+++ b/pmd-core/src/main/java/net/sourceforge/pmd/cpd/Match.java
@@ -10,6 +10,7 @@ import java.util.Set;
import java.util.TreeSet;
import net.sourceforge.pmd.PMD;
+import net.sourceforge.pmd.lang.document.Chars;
public class Match implements Comparable, Iterable {
@@ -74,7 +75,7 @@ public class Match implements Comparable, Iterable {
}
/** Newlines are normalized to \n. */
- public String getSourceCodeSlice() {
+ public Chars getSourceCodeSlice() {
return this.getMark(0).getSourceCodeSlice();
}
diff --git a/pmd-core/src/main/java/net/sourceforge/pmd/cpd/MatchAlgorithm.java b/pmd-core/src/main/java/net/sourceforge/pmd/cpd/MatchAlgorithm.java
index d6f482dd05..ca198f032f 100644
--- a/pmd-core/src/main/java/net/sourceforge/pmd/cpd/MatchAlgorithm.java
+++ b/pmd-core/src/main/java/net/sourceforge/pmd/cpd/MatchAlgorithm.java
@@ -11,23 +11,25 @@ import java.util.Iterator;
import java.util.List;
import java.util.Map;
-public class MatchAlgorithm {
+import net.sourceforge.pmd.lang.document.TextDocument;
+
+class MatchAlgorithm {
private static final int MOD = 37;
private int lastMod = 1;
private List matches;
- private Map source;
+ private Map source;
private Tokens tokens;
private List code;
private CPDListener cpdListener;
private int min;
- public MatchAlgorithm(Map sourceCode, Tokens tokens, int min) {
+ public MatchAlgorithm(Map sourceCode, Tokens tokens, int min) {
this(sourceCode, tokens, min, new CPDNullListener());
}
- public MatchAlgorithm(Map sourceCode, Tokens tokens, int min, CPDListener listener) {
+ public MatchAlgorithm(SourceManager sourceCode, Tokens tokens, int min, CPDListener listener) {
this.source = sourceCode;
this.tokens = tokens;
this.code = tokens.getTokens();
@@ -85,7 +87,7 @@ public class MatchAlgorithm {
mark.setLineCount(lineCount);
mark.setEndToken(endToken);
- SourceCode sourceCode = source.get(token.getTokenSrcID());
+ TextDocument sourceCode = source.get(token.getTokenSrcID());
assert sourceCode != null : token.getTokenSrcID() + " is not registered in " + source.keySet();
mark.setSourceCode(sourceCode);
}
diff --git a/pmd-core/src/main/java/net/sourceforge/pmd/cpd/SimpleRenderer.java b/pmd-core/src/main/java/net/sourceforge/pmd/cpd/SimpleRenderer.java
index 8595b38c3c..9ec4b99bc7 100644
--- a/pmd-core/src/main/java/net/sourceforge/pmd/cpd/SimpleRenderer.java
+++ b/pmd-core/src/main/java/net/sourceforge/pmd/cpd/SimpleRenderer.java
@@ -60,7 +60,7 @@ public class SimpleRenderer implements CPDReportRenderer {
writer.append(PMD.EOL); // add a line to separate the source from the desc above
- String source = match.getSourceCodeSlice();
+ Chars source = match.getSourceCodeSlice();
if (trimLeadingWhitespace) {
for (Chars line : StringUtil.linesWithTrimIndent(source)) {
@@ -70,7 +70,8 @@ public class SimpleRenderer implements CPDReportRenderer {
return;
}
- writer.append(source).append(PMD.EOL);
+ source.writeFully(writer);
+ writer.append(PMD.EOL);
}
}
diff --git a/pmd-core/src/main/java/net/sourceforge/pmd/cpd/SourceCode.java b/pmd-core/src/main/java/net/sourceforge/pmd/cpd/SourceCode.java
index bd93023de7..34288552cd 100644
--- a/pmd-core/src/main/java/net/sourceforge/pmd/cpd/SourceCode.java
+++ b/pmd-core/src/main/java/net/sourceforge/pmd/cpd/SourceCode.java
@@ -4,190 +4,32 @@
package net.sourceforge.pmd.cpd;
-import java.io.BufferedReader;
-import java.io.File;
-import java.io.InputStreamReader;
+import java.io.IOException;
import java.io.Reader;
-import java.io.StringReader;
import java.lang.ref.SoftReference;
-import java.nio.file.Files;
-import java.util.ArrayList;
import java.util.List;
-import net.sourceforge.pmd.internal.util.IOUtil;
+import net.sourceforge.pmd.lang.document.TextDocument;
+import net.sourceforge.pmd.lang.document.TextFile;
public class SourceCode {
- public abstract static class CodeLoader {
- private SoftReference> code;
+ private SoftReference softRef;
+ private final TextFile textFile;
- public List getCode() {
- List c = null;
- if (code != null) {
- c = code.get();
- }
- if (c != null) {
- return c;
- }
- this.code = new SoftReference<>(load());
- return code.get();
- }
-
- /**
- * Loads a range of lines.
- *
- * @param startLine Start line (inclusive, 1-based)
- * @param endLine End line (inclusive, 1-based)
- */
- public List getCodeSlice(int startLine, int endLine) {
- List c = null;
- if (code != null) {
- c = code.get();
- }
- if (c != null) {
- return c.subList(startLine - 1, endLine);
- }
- return load(startLine, endLine);
- }
-
- public abstract String getFileName();
-
- protected abstract Reader getReader() throws Exception;
-
- protected List load() {
- try (BufferedReader reader = new BufferedReader(getReader())) {
- List lines = new ArrayList<>();
- String currentLine;
- while ((currentLine = reader.readLine()) != null) {
- lines.add(currentLine);
- }
- return lines;
- } catch (Exception e) {
- e.printStackTrace();
- throw new RuntimeException("Problem while reading " + getFileName() + ":" + e.getMessage());
- }
- }
-
- /**
- * Loads a range of lines.
- *
- * @param startLine Start line (inclusive, 1-based)
- * @param endLine End line (inclusive, 1-based)
- */
- protected List load(int startLine, int endLine) {
- try (BufferedReader reader = new BufferedReader(getReader())) {
- int linesToRead = 1 + endLine - startLine; // +1 because endLine is inclusive
- List lines = new ArrayList<>(linesToRead);
-
- // Skip lines until we reach the start point
- for (int i = 0; i < startLine - 1; i++) {
- reader.readLine();
- }
-
- String currentLine;
- while ((currentLine = reader.readLine()) != null) {
- lines.add(currentLine);
-
- if (lines.size() == linesToRead) {
- break;
- }
- }
- return lines;
- } catch (Exception e) {
- e.printStackTrace();
- throw new RuntimeException("Problem while reading " + getFileName() + ":" + e.getMessage());
- }
- }
+ public SourceCode(TextFile textFile) {
+ this.textFile = textFile;
}
- public static class FileCodeLoader extends CodeLoader {
- private File file;
- private String encoding;
-
- public FileCodeLoader(File file, String encoding) {
- this.file = file;
- this.encoding = encoding;
- }
-
- @Override
- public Reader getReader() throws Exception {
- IOUtil.BomAwareInputStream inputStream = new IOUtil.BomAwareInputStream(Files.newInputStream(file.toPath()));
-
- if (inputStream.hasBom()) {
- encoding = inputStream.getBomCharsetName();
- }
- return new InputStreamReader(inputStream, encoding);
- }
-
- public String getEncoding() {
- return encoding;
- }
-
- @Override
- public String getFileName() {
- return file.getAbsolutePath();
+ public TextDocument load() throws IOException {
+ if (softRef != null && softRef.get() != null) {
+ return softRef.get();
}
+ TextDocument doc = TextDocument.create(textFile);
+ softRef = new SoftReference<>(doc);
+ return doc;
}
- public static class StringCodeLoader extends CodeLoader {
- public static final String DEFAULT_NAME = "CODE_LOADED_FROM_STRING";
-
- private String code;
-
- private String name;
-
- public StringCodeLoader(String code) {
- this(code, DEFAULT_NAME);
- }
-
- public StringCodeLoader(String code, String name) {
- this.code = code;
- this.name = name;
- }
-
- @Override
- public Reader getReader() {
- return new StringReader(code);
- }
-
- @Override
- public String getFileName() {
- return name;
- }
- }
-
- public static class ReaderCodeLoader extends CodeLoader {
- public static final String DEFAULT_NAME = "CODE_LOADED_FROM_READER";
-
- private Reader code;
-
- private String name;
-
- public ReaderCodeLoader(Reader code) {
- this(code, DEFAULT_NAME);
- }
-
- public ReaderCodeLoader(Reader code, String name) {
- this.code = code;
- this.name = name;
- }
-
- @Override
- public Reader getReader() {
- return code;
- }
-
- @Override
- public String getFileName() {
- return name;
- }
- }
-
- private CodeLoader cl;
-
- public SourceCode(CodeLoader cl) {
- this.cl = cl;
- }
public List getCode() {
return cl.getCode();
diff --git a/pmd-core/src/main/java/net/sourceforge/pmd/cpd/SourceManager.java b/pmd-core/src/main/java/net/sourceforge/pmd/cpd/SourceManager.java
new file mode 100644
index 0000000000..bbcc75e051
--- /dev/null
+++ b/pmd-core/src/main/java/net/sourceforge/pmd/cpd/SourceManager.java
@@ -0,0 +1,43 @@
+/**
+ * BSD-style license; for more info see http://pmd.sourceforge.net/license.html
+ */
+
+package net.sourceforge.pmd.cpd;
+
+import java.lang.ref.SoftReference;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+import java.util.concurrent.ConcurrentHashMap;
+
+import net.sourceforge.pmd.internal.util.IOUtil;
+import net.sourceforge.pmd.lang.document.TextDocument;
+import net.sourceforge.pmd.lang.document.TextFile;
+
+public class SourceManager implements AutoCloseable {
+
+ private final Map> files = new ConcurrentHashMap<>();
+ private final List textFiles;
+
+ public SourceManager(List extends TextFile> files) {
+ textFiles = new ArrayList<>(files);
+ }
+
+
+ TextDocument get(String pathId) {
+
+ }
+
+ public int size() {
+ return files.size();
+ }
+
+
+ @Override
+ public void close() throws Exception {
+ Exception exception = IOUtil.closeAll(textFiles);
+ if (exception != null) {
+ throw exception;
+ }
+ }
+}
diff --git a/pmd-core/src/main/java/net/sourceforge/pmd/cpd/TokenEntry.java b/pmd-core/src/main/java/net/sourceforge/pmd/cpd/TokenEntry.java
index 2d5ad99b4a..96c89cebda 100644
--- a/pmd-core/src/main/java/net/sourceforge/pmd/cpd/TokenEntry.java
+++ b/pmd-core/src/main/java/net/sourceforge/pmd/cpd/TokenEntry.java
@@ -25,19 +25,6 @@ public class TokenEntry implements Comparable {
private int identifier;
private int hashCode;
- private static final ThreadLocal