diff --git a/pmd-cli/src/main/java/net/sourceforge/pmd/cli/commands/internal/CpdCommand.java b/pmd-cli/src/main/java/net/sourceforge/pmd/cli/commands/internal/CpdCommand.java
index da3fdacdc0..daef0b559c 100644
--- a/pmd-cli/src/main/java/net/sourceforge/pmd/cli/commands/internal/CpdCommand.java
+++ b/pmd-cli/src/main/java/net/sourceforge/pmd/cli/commands/internal/CpdCommand.java
@@ -69,6 +69,9 @@ public class CpdCommand extends AbstractAnalysisPmdSubcommand {
@Option(names = "--ignore-literal-sequences", description = "Ignore sequences of literals such as list initializers.")
private boolean ignoreLiteralSequences;
+ @Option(names = "--ignore-sequences", description = "Ignore sequences of identifiers and literals")
+ private boolean ignoreIdentifierAndLiteralSequences;
+
@Option(names = "--skip-lexical-errors",
description = "Skip files which can't be tokenized due to invalid characters, instead of aborting with an error.")
private boolean skipLexicalErrors;
diff --git a/pmd-core/src/main/java/net/sourceforge/pmd/cpd/CPDConfiguration.java b/pmd-core/src/main/java/net/sourceforge/pmd/cpd/CPDConfiguration.java
index efe6550261..984b771503 100644
--- a/pmd-core/src/main/java/net/sourceforge/pmd/cpd/CPDConfiguration.java
+++ b/pmd-core/src/main/java/net/sourceforge/pmd/cpd/CPDConfiguration.java
@@ -65,6 +65,8 @@ public class CPDConfiguration extends AbstractConfiguration {
private boolean ignoreLiteralSequences = false;
+ private boolean ignoreIdentifierAndLiteralSequences = false;
+
private boolean skipLexicalErrors = false;
private boolean noSkipBlocks = false;
@@ -190,6 +192,11 @@ public class CPDConfiguration extends AbstractConfiguration {
} else {
properties.remove(Tokenizer.OPTION_IGNORE_LITERAL_SEQUENCES);
}
+ if (configuration.isIgnoreIdentifierAndLiteralSequences()) {
+ properties.setProperty(Tokenizer.OPTION_IGNORE_IDENTIFIER_AND_LITERAL_SEQUENCES, "true");
+ } else {
+ properties.remove(Tokenizer.OPTION_IGNORE_IDENTIFIER_AND_LITERAL_SEQUENCES);
+ }
properties.setProperty(Tokenizer.OPTION_SKIP_BLOCKS, Boolean.toString(!configuration.isNoSkipBlocks()));
properties.setProperty(Tokenizer.OPTION_SKIP_BLOCKS_PATTERN, configuration.getSkipBlocksPattern());
configuration.getLanguage().setProperties(properties);
@@ -316,6 +323,14 @@ public class CPDConfiguration extends AbstractConfiguration {
this.ignoreLiteralSequences = ignoreLiteralSequences;
}
+ public boolean isIgnoreIdentifierAndLiteralSequences() {
+ return ignoreIdentifierAndLiteralSequences;
+ }
+
+ public void setIgnoreIdentifierAndLiteralSequences(boolean ignoreIdentifierAndLiteralSequences) {
+ this.ignoreIdentifierAndLiteralSequences = ignoreIdentifierAndLiteralSequences;
+ }
+
public boolean isSkipLexicalErrors() {
return skipLexicalErrors;
}
diff --git a/pmd-core/src/main/java/net/sourceforge/pmd/cpd/Tokenizer.java b/pmd-core/src/main/java/net/sourceforge/pmd/cpd/Tokenizer.java
index e6876fb960..66e4344a03 100644
--- a/pmd-core/src/main/java/net/sourceforge/pmd/cpd/Tokenizer.java
+++ b/pmd-core/src/main/java/net/sourceforge/pmd/cpd/Tokenizer.java
@@ -15,6 +15,12 @@ public interface Tokenizer {
* Ignore sequences of literals (e.g, 0,0,0,0...
).
*/
String OPTION_IGNORE_LITERAL_SEQUENCES = "net.sourceforge.pmd.cpd.Tokenizer.skipLiteralSequences";
+
+ /**
+ * Ignore comma separated sequences of identifies and literals (e.g, 0,0,0,0...
).
+ */
+ String OPTION_IGNORE_IDENTIFIER_AND_LITERAL_SEQUENCES = "net.sourceforge.pmd.cpd.Tokenizer.skipSequences";
+
/**
* Ignore using directives in C#. The default value is false
.
*/
diff --git a/pmd-cpp/src/main/java/net/sourceforge/pmd/cpd/CPPTokenizer.java b/pmd-cpp/src/main/java/net/sourceforge/pmd/cpd/CPPTokenizer.java
index 0728e9afbf..1df6ebe865 100644
--- a/pmd-cpp/src/main/java/net/sourceforge/pmd/cpd/CPPTokenizer.java
+++ b/pmd-cpp/src/main/java/net/sourceforge/pmd/cpd/CPPTokenizer.java
@@ -26,6 +26,7 @@ public class CPPTokenizer extends JavaCCTokenizer {
private boolean skipBlocks;
private Pattern skipBlocksStart;
private Pattern skipBlocksEnd;
+ private boolean ignoreIdentifierAndLiteralSeqences = false;
private boolean ignoreLiteralSequences = false;
public CPPTokenizer() {
@@ -52,8 +53,9 @@ public class CPPTokenizer extends JavaCCTokenizer {
skipBlocksEnd = CppBlockSkipper.compileSkipMarker(split[1]);
}
}
- ignoreLiteralSequences = Boolean.parseBoolean(properties.getProperty(OPTION_IGNORE_LITERAL_SEQUENCES,
- Boolean.FALSE.toString()));
+ ignoreLiteralSequences = Boolean.parseBoolean(properties.getProperty(OPTION_IGNORE_LITERAL_SEQUENCES, Boolean.FALSE.toString()));
+ ignoreIdentifierAndLiteralSeqences =
+ Boolean.parseBoolean(properties.getProperty(OPTION_IGNORE_IDENTIFIER_AND_LITERAL_SEQUENCES, Boolean.FALSE.toString()));
}
@@ -78,42 +80,44 @@ public class CPPTokenizer extends JavaCCTokenizer {
@Override
protected TokenFilter getTokenFilter(final TokenManager tokenManager) {
- return new CppTokenFilter(tokenManager, ignoreLiteralSequences);
+ return new CppTokenFilter(tokenManager, ignoreLiteralSequences, ignoreIdentifierAndLiteralSeqences);
}
private static class CppTokenFilter extends JavaCCTokenFilter {
private final boolean ignoreLiteralSequences;
- private JavaccToken discardingLiteralsUntil = null;
+ private final boolean ignoreIdentifierAndLiteralSeqences;
+ private JavaccToken discardingTokensUntil = null;
private boolean discardCurrent = false;
- CppTokenFilter(final TokenManager tokenManager, final boolean ignoreLiteralSequences) {
+ CppTokenFilter(final TokenManager tokenManager, final boolean ignoreLiteralSequences, final boolean ignoreIdentifierAndLiteralSeqences) {
super(tokenManager);
+ this.ignoreIdentifierAndLiteralSeqences = ignoreIdentifierAndLiteralSeqences;
this.ignoreLiteralSequences = ignoreLiteralSequences;
}
@Override
protected void analyzeTokens(final JavaccToken currentToken, final Iterable remainingTokens) {
discardCurrent = false;
- skipLiteralSequences(currentToken, remainingTokens);
+ skipSequences(currentToken, remainingTokens);
}
- private void skipLiteralSequences(final JavaccToken currentToken, final Iterable remainingTokens) {
- if (ignoreLiteralSequences) {
+ private void skipSequences(final JavaccToken currentToken, final Iterable remainingTokens) {
+ if (ignoreLiteralSequences || ignoreIdentifierAndLiteralSeqences) {
final int kind = currentToken.getKind();
- if (isDiscardingLiterals()) {
- if (currentToken == discardingLiteralsUntil) { // NOPMD - intentional check for reference equality
- discardingLiteralsUntil = null;
+ if (isDiscardingToken()) {
+ if (currentToken == discardingTokensUntil) { // NOPMD - intentional check for reference equality
+ discardingTokensUntil = null;
discardCurrent = true;
}
} else if (kind == CppTokenKinds.LCURLYBRACE) {
- final JavaccToken finalToken = findEndOfSequenceOfLiterals(remainingTokens);
- discardingLiteralsUntil = finalToken;
+ final JavaccToken finalToken = findEndOfSequenceToDiscard(remainingTokens, ignoreIdentifierAndLiteralSeqences);
+ discardingTokensUntil = finalToken;
}
}
}
- private static JavaccToken findEndOfSequenceOfLiterals(final Iterable remainingTokens) {
- boolean seenLiteral = false;
+ private static JavaccToken findEndOfSequenceToDiscard(final Iterable remainingTokens, boolean ignoreIdentifierAndLiteralSeqences) {
+ boolean seenAllowedToken = false;
int braceCount = 0;
for (final JavaccToken token : remainingTokens) {
switch (token.getKind()) {
@@ -123,8 +127,18 @@ public class CPPTokenizer extends JavaCCTokenizer {
case CppTokenKinds.HEXADECIMAL_INT_LITERAL:
case CppTokenKinds.OCTAL_INT_LITERAL:
case CppTokenKinds.ZERO:
- seenLiteral = true;
+ case CppTokenKinds.STRING:
+ seenAllowedToken = true;
break; // can be skipped; continue to the next token
+ case CppTokenKinds.ID:
+ // Ignore identifiers if instructed
+ if (ignoreIdentifierAndLiteralSeqences) {
+ seenAllowedToken = true;
+ break; // can be skipped; continue to the next token
+ } else {
+ // token not expected, other than identifier
+ return null;
+ }
case CppTokenKinds.COMMA:
break; // can be skipped; continue to the next token
case CppTokenKinds.LCURLYBRACE:
@@ -134,7 +148,7 @@ public class CPPTokenizer extends JavaCCTokenizer {
braceCount--;
if (braceCount < 0) {
// end of the list; skip all contents
- return seenLiteral ? token : null;
+ return seenAllowedToken ? token : null;
} else {
// curly braces are not yet balanced; continue to the next token
break;
@@ -147,13 +161,13 @@ public class CPPTokenizer extends JavaCCTokenizer {
return null;
}
- private boolean isDiscardingLiterals() {
- return discardingLiteralsUntil != null;
+ private boolean isDiscardingToken() {
+ return discardingTokensUntil != null;
}
@Override
protected boolean isLanguageSpecificDiscarding() {
- return isDiscardingLiterals() || discardCurrent;
+ return isDiscardingToken() || discardCurrent;
}
}
}
diff --git a/pmd-cpp/src/test/java/net/sourceforge/pmd/cpd/CPPTokenizerTest.java b/pmd-cpp/src/test/java/net/sourceforge/pmd/cpd/CPPTokenizerTest.java
index b4746b7f64..5192ee80f4 100644
--- a/pmd-cpp/src/test/java/net/sourceforge/pmd/cpd/CPPTokenizerTest.java
+++ b/pmd-cpp/src/test/java/net/sourceforge/pmd/cpd/CPPTokenizerTest.java
@@ -139,8 +139,18 @@ class CPPTokenizerTest extends CpdTextComparisonTest {
doTest("listOfNumbers", "_ignored", skipLiteralSequences());
}
+ @Test
+ void testLongListsOfNumbersAndIdentifiersAreIgnored() {
+ doTest("listOfNumbers", "_ignored_identifiers", skipIdentifierAndLiteralsSequences());
+ }
+
+ @Test
+ void testLongListsOfIdentifiersAreIgnored() {
+ doTest("listOfNumbers", "_ignored_identifiers", skipIdentifierSequences());
+ }
+
private static Properties skipBlocks(String skipPattern) {
- return properties(true, skipPattern, false);
+ return properties(true, skipPattern, false, false);
}
private static Properties skipBlocks() {
@@ -148,20 +158,29 @@ class CPPTokenizerTest extends CpdTextComparisonTest {
}
private static Properties dontSkipBlocks() {
- return properties(false, null, false);
+ return properties(false, null, false, false);
}
private static Properties skipLiteralSequences() {
- return properties(false, null, true);
+ return properties(false, null, true, false);
}
- private static Properties properties(boolean skipBlocks, String skipPattern, boolean skipLiteralSequences) {
+ private static Properties skipIdentifierAndLiteralsSequences() {
+ return properties(false, null, true, true);
+ }
+
+ private static Properties skipIdentifierSequences() {
+ return properties(false, null, false, true);
+ }
+
+ private static Properties properties(boolean skipBlocks, String skipPattern, boolean skipLiteralSequences, boolean skipSequences) {
Properties properties = new Properties();
properties.setProperty(Tokenizer.OPTION_SKIP_BLOCKS, Boolean.toString(skipBlocks));
if (skipPattern != null) {
properties.setProperty(Tokenizer.OPTION_SKIP_BLOCKS_PATTERN, skipPattern);
}
properties.setProperty(Tokenizer.OPTION_IGNORE_LITERAL_SEQUENCES, Boolean.toString(skipLiteralSequences));
+ properties.setProperty(Tokenizer.OPTION_IGNORE_IDENTIFIER_AND_LITERAL_SEQUENCES, Boolean.toString(skipSequences));
return properties;
}
}
diff --git a/pmd-cpp/src/test/resources/net/sourceforge/pmd/lang/cpp/cpd/testdata/listOfNumbers.cpp b/pmd-cpp/src/test/resources/net/sourceforge/pmd/lang/cpp/cpd/testdata/listOfNumbers.cpp
index c9900ff83d..6de803c329 100644
--- a/pmd-cpp/src/test/resources/net/sourceforge/pmd/lang/cpp/cpd/testdata/listOfNumbers.cpp
+++ b/pmd-cpp/src/test/resources/net/sourceforge/pmd/lang/cpp/cpd/testdata/listOfNumbers.cpp
@@ -18,7 +18,7 @@ int main() {
0b000001, // C++ 14 binary literal
};
int c[3][4] = {{0,1,2,3},{4,5,6,7},{8,9,10,11}}; // multi-dimensional array
- int d[3] = {a, a, a}; // identifiers should not be filtered out
+ int d[3] = {a, a, a}; // identifiers should filtered out if identifiers are allowed in sequences
int e[1][3] = {{a, a, a}}; // identifiers in multi-dimensional array
int f[1] = {main()}; // method invocations should not be filtered out
int g[1][1] = {{main()}}; // method invocation in multi-dimensional array
diff --git a/pmd-cpp/src/test/resources/net/sourceforge/pmd/lang/cpp/cpd/testdata/listOfNumbers_ignored_identifiers.txt b/pmd-cpp/src/test/resources/net/sourceforge/pmd/lang/cpp/cpd/testdata/listOfNumbers_ignored_identifiers.txt
new file mode 100644
index 0000000000..358c5009da
--- /dev/null
+++ b/pmd-cpp/src/test/resources/net/sourceforge/pmd/lang/cpp/cpd/testdata/listOfNumbers_ignored_identifiers.txt
@@ -0,0 +1,92 @@
+ [Image] or [Truncated image[ Bcol Ecol
+L2
+ [int] 1 4
+ [main] 5 9
+ [(] 9 10
+ [)] 10 11
+ [{] 12 13
+L3
+ [int] 3 6
+ [a] 7 8
+ [\[] 8 9
+ [50] 9 11
+ [\]] 11 12
+ [=] 13 14
+ [;] 116 117
+L4
+ [double] 3 9
+ [b] 10 11
+ [\[] 11 12
+ [14] 12 14
+ [\]] 14 15
+ [=] 16 17
+L19
+ [;] 4 5
+L20
+ [int] 3 6
+ [c] 7 8
+ [\[] 8 9
+ [3] 9 10
+ [\]] 10 11
+ [\[] 11 12
+ [4] 12 13
+ [\]] 13 14
+ [=] 15 16
+ [;] 50 51
+L21
+ [int] 3 6
+ [d] 7 8
+ [\[] 8 9
+ [3] 9 10
+ [\]] 10 11
+ [=] 12 13
+ [;] 23 24
+L22
+ [int] 3 6
+ [e] 7 8
+ [\[] 8 9
+ [1] 9 10
+ [\]] 10 11
+ [\[] 11 12
+ [3] 12 13
+ [\]] 13 14
+ [=] 15 16
+ [;] 28 29
+L23
+ [int] 3 6
+ [f] 7 8
+ [\[] 8 9
+ [1] 9 10
+ [\]] 10 11
+ [=] 12 13
+ [{] 14 15
+ [main] 15 19
+ [(] 19 20
+ [)] 20 21
+ [}] 21 22
+ [;] 22 23
+L24
+ [int] 3 6
+ [g] 7 8
+ [\[] 8 9
+ [1] 9 10
+ [\]] 10 11
+ [\[] 11 12
+ [1] 12 13
+ [\]] 13 14
+ [=] 15 16
+ [{] 17 18
+ [{] 18 19
+ [main] 19 23
+ [(] 23 24
+ [)] 24 25
+ [}] 25 26
+ [}] 26 27
+ [;] 27 28
+L25
+ [return] 3 9
+ [0] 10 11
+ [;] 11 12
+L26
+ [}] 1 2
+EOF