diff --git a/pmd-cli/src/main/java/net/sourceforge/pmd/cli/commands/internal/CpdCommand.java b/pmd-cli/src/main/java/net/sourceforge/pmd/cli/commands/internal/CpdCommand.java index da3fdacdc0..daef0b559c 100644 --- a/pmd-cli/src/main/java/net/sourceforge/pmd/cli/commands/internal/CpdCommand.java +++ b/pmd-cli/src/main/java/net/sourceforge/pmd/cli/commands/internal/CpdCommand.java @@ -69,6 +69,9 @@ public class CpdCommand extends AbstractAnalysisPmdSubcommand { @Option(names = "--ignore-literal-sequences", description = "Ignore sequences of literals such as list initializers.") private boolean ignoreLiteralSequences; + @Option(names = "--ignore-sequences", description = "Ignore sequences of identifiers and literals") + private boolean ignoreIdentifierAndLiteralSequences; + @Option(names = "--skip-lexical-errors", description = "Skip files which can't be tokenized due to invalid characters, instead of aborting with an error.") private boolean skipLexicalErrors; diff --git a/pmd-core/src/main/java/net/sourceforge/pmd/cpd/CPDConfiguration.java b/pmd-core/src/main/java/net/sourceforge/pmd/cpd/CPDConfiguration.java index efe6550261..984b771503 100644 --- a/pmd-core/src/main/java/net/sourceforge/pmd/cpd/CPDConfiguration.java +++ b/pmd-core/src/main/java/net/sourceforge/pmd/cpd/CPDConfiguration.java @@ -65,6 +65,8 @@ public class CPDConfiguration extends AbstractConfiguration { private boolean ignoreLiteralSequences = false; + private boolean ignoreIdentifierAndLiteralSequences = false; + private boolean skipLexicalErrors = false; private boolean noSkipBlocks = false; @@ -190,6 +192,11 @@ public class CPDConfiguration extends AbstractConfiguration { } else { properties.remove(Tokenizer.OPTION_IGNORE_LITERAL_SEQUENCES); } + if (configuration.isIgnoreIdentifierAndLiteralSequences()) { + properties.setProperty(Tokenizer.OPTION_IGNORE_IDENTIFIER_AND_LITERAL_SEQUENCES, "true"); + } else { + properties.remove(Tokenizer.OPTION_IGNORE_IDENTIFIER_AND_LITERAL_SEQUENCES); + } properties.setProperty(Tokenizer.OPTION_SKIP_BLOCKS, Boolean.toString(!configuration.isNoSkipBlocks())); properties.setProperty(Tokenizer.OPTION_SKIP_BLOCKS_PATTERN, configuration.getSkipBlocksPattern()); configuration.getLanguage().setProperties(properties); @@ -316,6 +323,14 @@ public class CPDConfiguration extends AbstractConfiguration { this.ignoreLiteralSequences = ignoreLiteralSequences; } + public boolean isIgnoreIdentifierAndLiteralSequences() { + return ignoreIdentifierAndLiteralSequences; + } + + public void setIgnoreIdentifierAndLiteralSequences(boolean ignoreIdentifierAndLiteralSequences) { + this.ignoreIdentifierAndLiteralSequences = ignoreIdentifierAndLiteralSequences; + } + public boolean isSkipLexicalErrors() { return skipLexicalErrors; } diff --git a/pmd-core/src/main/java/net/sourceforge/pmd/cpd/Tokenizer.java b/pmd-core/src/main/java/net/sourceforge/pmd/cpd/Tokenizer.java index e6876fb960..66e4344a03 100644 --- a/pmd-core/src/main/java/net/sourceforge/pmd/cpd/Tokenizer.java +++ b/pmd-core/src/main/java/net/sourceforge/pmd/cpd/Tokenizer.java @@ -15,6 +15,12 @@ public interface Tokenizer { * Ignore sequences of literals (e.g, 0,0,0,0...). */ String OPTION_IGNORE_LITERAL_SEQUENCES = "net.sourceforge.pmd.cpd.Tokenizer.skipLiteralSequences"; + + /** + * Ignore comma separated sequences of identifies and literals (e.g, 0,0,0,0...). + */ + String OPTION_IGNORE_IDENTIFIER_AND_LITERAL_SEQUENCES = "net.sourceforge.pmd.cpd.Tokenizer.skipSequences"; + /** * Ignore using directives in C#. The default value is false. */ diff --git a/pmd-cpp/src/main/java/net/sourceforge/pmd/cpd/CPPTokenizer.java b/pmd-cpp/src/main/java/net/sourceforge/pmd/cpd/CPPTokenizer.java index 0728e9afbf..1df6ebe865 100644 --- a/pmd-cpp/src/main/java/net/sourceforge/pmd/cpd/CPPTokenizer.java +++ b/pmd-cpp/src/main/java/net/sourceforge/pmd/cpd/CPPTokenizer.java @@ -26,6 +26,7 @@ public class CPPTokenizer extends JavaCCTokenizer { private boolean skipBlocks; private Pattern skipBlocksStart; private Pattern skipBlocksEnd; + private boolean ignoreIdentifierAndLiteralSeqences = false; private boolean ignoreLiteralSequences = false; public CPPTokenizer() { @@ -52,8 +53,9 @@ public class CPPTokenizer extends JavaCCTokenizer { skipBlocksEnd = CppBlockSkipper.compileSkipMarker(split[1]); } } - ignoreLiteralSequences = Boolean.parseBoolean(properties.getProperty(OPTION_IGNORE_LITERAL_SEQUENCES, - Boolean.FALSE.toString())); + ignoreLiteralSequences = Boolean.parseBoolean(properties.getProperty(OPTION_IGNORE_LITERAL_SEQUENCES, Boolean.FALSE.toString())); + ignoreIdentifierAndLiteralSeqences = + Boolean.parseBoolean(properties.getProperty(OPTION_IGNORE_IDENTIFIER_AND_LITERAL_SEQUENCES, Boolean.FALSE.toString())); } @@ -78,42 +80,44 @@ public class CPPTokenizer extends JavaCCTokenizer { @Override protected TokenFilter getTokenFilter(final TokenManager tokenManager) { - return new CppTokenFilter(tokenManager, ignoreLiteralSequences); + return new CppTokenFilter(tokenManager, ignoreLiteralSequences, ignoreIdentifierAndLiteralSeqences); } private static class CppTokenFilter extends JavaCCTokenFilter { private final boolean ignoreLiteralSequences; - private JavaccToken discardingLiteralsUntil = null; + private final boolean ignoreIdentifierAndLiteralSeqences; + private JavaccToken discardingTokensUntil = null; private boolean discardCurrent = false; - CppTokenFilter(final TokenManager tokenManager, final boolean ignoreLiteralSequences) { + CppTokenFilter(final TokenManager tokenManager, final boolean ignoreLiteralSequences, final boolean ignoreIdentifierAndLiteralSeqences) { super(tokenManager); + this.ignoreIdentifierAndLiteralSeqences = ignoreIdentifierAndLiteralSeqences; this.ignoreLiteralSequences = ignoreLiteralSequences; } @Override protected void analyzeTokens(final JavaccToken currentToken, final Iterable remainingTokens) { discardCurrent = false; - skipLiteralSequences(currentToken, remainingTokens); + skipSequences(currentToken, remainingTokens); } - private void skipLiteralSequences(final JavaccToken currentToken, final Iterable remainingTokens) { - if (ignoreLiteralSequences) { + private void skipSequences(final JavaccToken currentToken, final Iterable remainingTokens) { + if (ignoreLiteralSequences || ignoreIdentifierAndLiteralSeqences) { final int kind = currentToken.getKind(); - if (isDiscardingLiterals()) { - if (currentToken == discardingLiteralsUntil) { // NOPMD - intentional check for reference equality - discardingLiteralsUntil = null; + if (isDiscardingToken()) { + if (currentToken == discardingTokensUntil) { // NOPMD - intentional check for reference equality + discardingTokensUntil = null; discardCurrent = true; } } else if (kind == CppTokenKinds.LCURLYBRACE) { - final JavaccToken finalToken = findEndOfSequenceOfLiterals(remainingTokens); - discardingLiteralsUntil = finalToken; + final JavaccToken finalToken = findEndOfSequenceToDiscard(remainingTokens, ignoreIdentifierAndLiteralSeqences); + discardingTokensUntil = finalToken; } } } - private static JavaccToken findEndOfSequenceOfLiterals(final Iterable remainingTokens) { - boolean seenLiteral = false; + private static JavaccToken findEndOfSequenceToDiscard(final Iterable remainingTokens, boolean ignoreIdentifierAndLiteralSeqences) { + boolean seenAllowedToken = false; int braceCount = 0; for (final JavaccToken token : remainingTokens) { switch (token.getKind()) { @@ -123,8 +127,18 @@ public class CPPTokenizer extends JavaCCTokenizer { case CppTokenKinds.HEXADECIMAL_INT_LITERAL: case CppTokenKinds.OCTAL_INT_LITERAL: case CppTokenKinds.ZERO: - seenLiteral = true; + case CppTokenKinds.STRING: + seenAllowedToken = true; break; // can be skipped; continue to the next token + case CppTokenKinds.ID: + // Ignore identifiers if instructed + if (ignoreIdentifierAndLiteralSeqences) { + seenAllowedToken = true; + break; // can be skipped; continue to the next token + } else { + // token not expected, other than identifier + return null; + } case CppTokenKinds.COMMA: break; // can be skipped; continue to the next token case CppTokenKinds.LCURLYBRACE: @@ -134,7 +148,7 @@ public class CPPTokenizer extends JavaCCTokenizer { braceCount--; if (braceCount < 0) { // end of the list; skip all contents - return seenLiteral ? token : null; + return seenAllowedToken ? token : null; } else { // curly braces are not yet balanced; continue to the next token break; @@ -147,13 +161,13 @@ public class CPPTokenizer extends JavaCCTokenizer { return null; } - private boolean isDiscardingLiterals() { - return discardingLiteralsUntil != null; + private boolean isDiscardingToken() { + return discardingTokensUntil != null; } @Override protected boolean isLanguageSpecificDiscarding() { - return isDiscardingLiterals() || discardCurrent; + return isDiscardingToken() || discardCurrent; } } } diff --git a/pmd-cpp/src/test/java/net/sourceforge/pmd/cpd/CPPTokenizerTest.java b/pmd-cpp/src/test/java/net/sourceforge/pmd/cpd/CPPTokenizerTest.java index b4746b7f64..5192ee80f4 100644 --- a/pmd-cpp/src/test/java/net/sourceforge/pmd/cpd/CPPTokenizerTest.java +++ b/pmd-cpp/src/test/java/net/sourceforge/pmd/cpd/CPPTokenizerTest.java @@ -139,8 +139,18 @@ class CPPTokenizerTest extends CpdTextComparisonTest { doTest("listOfNumbers", "_ignored", skipLiteralSequences()); } + @Test + void testLongListsOfNumbersAndIdentifiersAreIgnored() { + doTest("listOfNumbers", "_ignored_identifiers", skipIdentifierAndLiteralsSequences()); + } + + @Test + void testLongListsOfIdentifiersAreIgnored() { + doTest("listOfNumbers", "_ignored_identifiers", skipIdentifierSequences()); + } + private static Properties skipBlocks(String skipPattern) { - return properties(true, skipPattern, false); + return properties(true, skipPattern, false, false); } private static Properties skipBlocks() { @@ -148,20 +158,29 @@ class CPPTokenizerTest extends CpdTextComparisonTest { } private static Properties dontSkipBlocks() { - return properties(false, null, false); + return properties(false, null, false, false); } private static Properties skipLiteralSequences() { - return properties(false, null, true); + return properties(false, null, true, false); } - private static Properties properties(boolean skipBlocks, String skipPattern, boolean skipLiteralSequences) { + private static Properties skipIdentifierAndLiteralsSequences() { + return properties(false, null, true, true); + } + + private static Properties skipIdentifierSequences() { + return properties(false, null, false, true); + } + + private static Properties properties(boolean skipBlocks, String skipPattern, boolean skipLiteralSequences, boolean skipSequences) { Properties properties = new Properties(); properties.setProperty(Tokenizer.OPTION_SKIP_BLOCKS, Boolean.toString(skipBlocks)); if (skipPattern != null) { properties.setProperty(Tokenizer.OPTION_SKIP_BLOCKS_PATTERN, skipPattern); } properties.setProperty(Tokenizer.OPTION_IGNORE_LITERAL_SEQUENCES, Boolean.toString(skipLiteralSequences)); + properties.setProperty(Tokenizer.OPTION_IGNORE_IDENTIFIER_AND_LITERAL_SEQUENCES, Boolean.toString(skipSequences)); return properties; } } diff --git a/pmd-cpp/src/test/resources/net/sourceforge/pmd/lang/cpp/cpd/testdata/listOfNumbers.cpp b/pmd-cpp/src/test/resources/net/sourceforge/pmd/lang/cpp/cpd/testdata/listOfNumbers.cpp index c9900ff83d..6de803c329 100644 --- a/pmd-cpp/src/test/resources/net/sourceforge/pmd/lang/cpp/cpd/testdata/listOfNumbers.cpp +++ b/pmd-cpp/src/test/resources/net/sourceforge/pmd/lang/cpp/cpd/testdata/listOfNumbers.cpp @@ -18,7 +18,7 @@ int main() { 0b000001, // C++ 14 binary literal }; int c[3][4] = {{0,1,2,3},{4,5,6,7},{8,9,10,11}}; // multi-dimensional array - int d[3] = {a, a, a}; // identifiers should not be filtered out + int d[3] = {a, a, a}; // identifiers should filtered out if identifiers are allowed in sequences int e[1][3] = {{a, a, a}}; // identifiers in multi-dimensional array int f[1] = {main()}; // method invocations should not be filtered out int g[1][1] = {{main()}}; // method invocation in multi-dimensional array diff --git a/pmd-cpp/src/test/resources/net/sourceforge/pmd/lang/cpp/cpd/testdata/listOfNumbers_ignored_identifiers.txt b/pmd-cpp/src/test/resources/net/sourceforge/pmd/lang/cpp/cpd/testdata/listOfNumbers_ignored_identifiers.txt new file mode 100644 index 0000000000..358c5009da --- /dev/null +++ b/pmd-cpp/src/test/resources/net/sourceforge/pmd/lang/cpp/cpd/testdata/listOfNumbers_ignored_identifiers.txt @@ -0,0 +1,92 @@ + [Image] or [Truncated image[ Bcol Ecol +L2 + [int] 1 4 + [main] 5 9 + [(] 9 10 + [)] 10 11 + [{] 12 13 +L3 + [int] 3 6 + [a] 7 8 + [\[] 8 9 + [50] 9 11 + [\]] 11 12 + [=] 13 14 + [;] 116 117 +L4 + [double] 3 9 + [b] 10 11 + [\[] 11 12 + [14] 12 14 + [\]] 14 15 + [=] 16 17 +L19 + [;] 4 5 +L20 + [int] 3 6 + [c] 7 8 + [\[] 8 9 + [3] 9 10 + [\]] 10 11 + [\[] 11 12 + [4] 12 13 + [\]] 13 14 + [=] 15 16 + [;] 50 51 +L21 + [int] 3 6 + [d] 7 8 + [\[] 8 9 + [3] 9 10 + [\]] 10 11 + [=] 12 13 + [;] 23 24 +L22 + [int] 3 6 + [e] 7 8 + [\[] 8 9 + [1] 9 10 + [\]] 10 11 + [\[] 11 12 + [3] 12 13 + [\]] 13 14 + [=] 15 16 + [;] 28 29 +L23 + [int] 3 6 + [f] 7 8 + [\[] 8 9 + [1] 9 10 + [\]] 10 11 + [=] 12 13 + [{] 14 15 + [main] 15 19 + [(] 19 20 + [)] 20 21 + [}] 21 22 + [;] 22 23 +L24 + [int] 3 6 + [g] 7 8 + [\[] 8 9 + [1] 9 10 + [\]] 10 11 + [\[] 11 12 + [1] 12 13 + [\]] 13 14 + [=] 15 16 + [{] 17 18 + [{] 18 19 + [main] 19 23 + [(] 23 24 + [)] 24 25 + [}] 25 26 + [}] 26 27 + [;] 27 28 +L25 + [return] 3 9 + [0] 10 11 + [;] 11 12 +L26 + [}] 1 2 +EOF