diff --git a/.all-contributorsrc b/.all-contributorsrc
index 8d11a3745a..6d612e8df8 100644
--- a/.all-contributorsrc
+++ b/.all-contributorsrc
@@ -7846,6 +7846,15 @@
"contributions": [
"code"
]
+ },
+ {
+ "login": "jdupak",
+ "name": "Jakub Dupak",
+ "avatar_url": "https://avatars.githubusercontent.com/u/22683640?v=4",
+ "profile": "https://github.com/jdupak",
+ "contributions": [
+ "code"
+ ]
}
],
"contributorsPerLine": 7,
diff --git a/docs/pages/pmd/userdocs/cpd/cpd.md b/docs/pages/pmd/userdocs/cpd/cpd.md
index 1bf2954c18..0580732bb4 100644
--- a/docs/pages/pmd/userdocs/cpd/cpd.md
+++ b/docs/pages/pmd/userdocs/cpd/cpd.md
@@ -163,7 +163,7 @@ exactly identical.
{% include custom/cli_option_row.html options="--ignore-literals"
description="Ignore literal values such as numbers and strings when comparing text.
By default, literals are not ignored."
- languages="Java"
+ languages="Java, C++"
%}
{% include custom/cli_option_row.html options="--ignore-literal-sequences"
description="Ignore sequences of literals such as list initializers.
@@ -173,7 +173,7 @@ exactly identical.
{% include custom/cli_option_row.html options="--ignore-identifiers"
description="Ignore names of classes, methods, variables, constants, etc. when comparing text.
By default, identifier names are not ignored."
- languages="Java"
+ languages="Java, C++"
%}
{% include custom/cli_option_row.html options="--ignore-annotations"
description="Ignore language annotations (Java) or attributes (C#) when comparing text.
diff --git a/docs/pages/release_notes.md b/docs/pages/release_notes.md
index 8b0a13de9b..f595d1a885 100644
--- a/docs/pages/release_notes.md
+++ b/docs/pages/release_notes.md
@@ -14,6 +14,13 @@ This is a {{ site.pmd.release_type }} release.
### 🚀 New and noteworthy
+#### CPD can now ignore literals and identifiers in C++ code
+
+When searching for duplicated code in C++ differences in literals or identifiers can be
+ignored now (like in Java). This can be enabled via the command line options `--ignore-literal`
+and `--ignore-identifiers`.
+See [PR #5040](https://github.com/pmd/pmd/pull/5040) for details.
+
### 🌟 Rule Changes
#### Renamed Rules
@@ -49,6 +56,7 @@ The old rule names still work but are deprecated.
### ✨ Merged pull requests
* [#4965](https://github.com/pmd/pmd/pull/4965): Fix #4532: \[java] Rename JUnit rules with overly restrictive names - [Juan MartÃn Sotuyo Dodero](https://github.com/jsotuyod) (@jsotuyod)
+* [#5040](https://github.com/pmd/pmd/pull/5040): \[cpp] Ignore literals and ignore identifiers capability to C++ CPD - [Jakub Dupak](https://github.com/jdupak) (@jdupak)
* [#5225](https://github.com/pmd/pmd/pull/5225): Fix #5067: \[java] CloseResource: False positive for FileSystems.getDefault() - [Lukas Gräf](https://github.com/lukasgraef) (@lukasgraef)
* [#5241](https://github.com/pmd/pmd/pull/5241): Ignore javacc code in coverage report - [Juan MartÃn Sotuyo Dodero](https://github.com/jsotuyod) (@jsotuyod)
* [#5258](https://github.com/pmd/pmd/pull/5258): Ignore generated antlr classes in coverage reports - [Juan MartÃn Sotuyo Dodero](https://github.com/jsotuyod) (@jsotuyod)
diff --git a/pmd-cpp/src/main/java/net/sourceforge/pmd/lang/cpp/CppLanguageModule.java b/pmd-cpp/src/main/java/net/sourceforge/pmd/lang/cpp/CppLanguageModule.java
index 520fc7327c..5312f8abaf 100644
--- a/pmd-cpp/src/main/java/net/sourceforge/pmd/lang/cpp/CppLanguageModule.java
+++ b/pmd-cpp/src/main/java/net/sourceforge/pmd/lang/cpp/CppLanguageModule.java
@@ -47,6 +47,8 @@ public class CppLanguageModule extends CpdOnlyLanguageModuleBase {
LanguagePropertyBundle bundle = super.newPropertyBundle();
bundle.definePropertyDescriptor(CpdLanguageProperties.CPD_IGNORE_LITERAL_SEQUENCES);
bundle.definePropertyDescriptor(CpdLanguageProperties.CPD_IGNORE_LITERAL_AND_IDENTIFIER_SEQUENCES);
+ bundle.definePropertyDescriptor(CpdLanguageProperties.CPD_ANONYMIZE_IDENTIFIERS);
+ bundle.definePropertyDescriptor(CpdLanguageProperties.CPD_ANONYMIZE_LITERALS);
bundle.definePropertyDescriptor(CPD_SKIP_BLOCKS);
return bundle;
}
diff --git a/pmd-cpp/src/main/java/net/sourceforge/pmd/lang/cpp/cpd/CppCpdLexer.java b/pmd-cpp/src/main/java/net/sourceforge/pmd/lang/cpp/cpd/CppCpdLexer.java
index 5d0e423a51..69287747ba 100644
--- a/pmd-cpp/src/main/java/net/sourceforge/pmd/lang/cpp/cpd/CppCpdLexer.java
+++ b/pmd-cpp/src/main/java/net/sourceforge/pmd/lang/cpp/cpd/CppCpdLexer.java
@@ -9,8 +9,9 @@ import java.util.regex.Pattern;
import org.apache.commons.lang3.StringUtils;
import net.sourceforge.pmd.cpd.CpdLanguageProperties;
-import net.sourceforge.pmd.cpd.impl.CpdLexerBase;
+import net.sourceforge.pmd.cpd.TokenFactory;
import net.sourceforge.pmd.cpd.impl.JavaCCTokenFilter;
+import net.sourceforge.pmd.cpd.impl.JavaccCpdLexer;
import net.sourceforge.pmd.lang.LanguagePropertyBundle;
import net.sourceforge.pmd.lang.TokenManager;
import net.sourceforge.pmd.lang.ast.impl.javacc.CharStream;
@@ -26,17 +27,21 @@ import net.sourceforge.pmd.lang.document.TextDocument;
*
*
Note: This class has been called CPPTokenizer in PMD 6
.
*/
-public class CppCpdLexer extends CpdLexerBase {
+public class CppCpdLexer extends JavaccCpdLexer {
private boolean skipBlocks;
private Pattern skipBlocksStart;
private Pattern skipBlocksEnd;
private final boolean ignoreIdentifierAndLiteralSeqences;
private final boolean ignoreLiteralSequences;
+ private final boolean ignoreLiterals;
+ private final boolean ignoreIdentifiers;
public CppCpdLexer(LanguagePropertyBundle cppProperties) {
ignoreLiteralSequences = cppProperties.getProperty(CpdLanguageProperties.CPD_IGNORE_LITERAL_SEQUENCES);
ignoreIdentifierAndLiteralSeqences = cppProperties.getProperty(CpdLanguageProperties.CPD_IGNORE_LITERAL_AND_IDENTIFIER_SEQUENCES);
+ ignoreLiterals = cppProperties.getProperty(CpdLanguageProperties.CPD_ANONYMIZE_LITERALS);
+ ignoreIdentifiers = cppProperties.getProperty(CpdLanguageProperties.CPD_ANONYMIZE_IDENTIFIERS);
String skipBlocksPattern = cppProperties.getProperty(CppLanguageModule.CPD_SKIP_BLOCKS);
if (StringUtils.isNotBlank(skipBlocksPattern)) {
skipBlocks = true;
@@ -73,6 +78,23 @@ public class CppCpdLexer extends CpdLexerBase {
return new CppTokenFilter(tokenManager, ignoreLiteralSequences, ignoreIdentifierAndLiteralSeqences);
}
+ @Override
+ protected void processToken(TokenFactory tokenEntries, JavaccToken currentToken) {
+ int kind = currentToken.getKind();
+ String image = currentToken.getImage();
+
+ boolean isLiteral = kind == CppTokenKinds.STRING || kind == CppTokenKinds.RSTRING || kind == CppTokenKinds.CHARACTER || kind == CppTokenKinds.DECIMAL_INT_LITERAL || kind == CppTokenKinds.HEXADECIMAL_INT_LITERAL || kind == CppTokenKinds.OCTAL_INT_LITERAL || kind == CppTokenKinds.FLOAT_LITERAL || kind == CppTokenKinds.BINARY_INT_LITERAL || kind == CppTokenKinds.ZERO;
+ if (ignoreLiterals && isLiteral) {
+ image = CppTokenKinds.describe(kind);
+ }
+
+ if (ignoreIdentifiers && (kind == CppTokenKinds.ID)) {
+ image = CppTokenKinds.describe(kind);
+ }
+
+ tokenEntries.recordToken(image, currentToken.getReportLocation());
+ }
+
private static class CppTokenFilter extends JavaCCTokenFilter {
private final boolean ignoreLiteralSequences;
diff --git a/pmd-cpp/src/test/java/net/sourceforge/pmd/lang/cpp/cpd/CppCpdLexerTest.java b/pmd-cpp/src/test/java/net/sourceforge/pmd/lang/cpp/cpd/CppCpdLexerTest.java
index 805bb9a86d..90bfe90a32 100644
--- a/pmd-cpp/src/test/java/net/sourceforge/pmd/lang/cpp/cpd/CppCpdLexerTest.java
+++ b/pmd-cpp/src/test/java/net/sourceforge/pmd/lang/cpp/cpd/CppCpdLexerTest.java
@@ -59,6 +59,16 @@ class CppCpdLexerTest extends CpdTextComparisonTest {
doTest("specialComments");
}
+ @Test
+ void testIgnoreLiterals() {
+ doTest("ignoreLiterals", "", ignoreLiterals());
+ }
+
+ @Test
+ void testIgnoreIdents() {
+ doTest("ignoreIdents", "", ignoreIdents());
+ }
+
@Test
void testMultiLineMacros() {
doTest("multilineMacros");
@@ -142,7 +152,7 @@ class CppCpdLexerTest extends CpdTextComparisonTest {
}
private static LanguagePropertyConfig skipBlocks(String skipPattern) {
- return properties(true, skipPattern, false, false);
+ return properties(true, skipPattern, false, false, false, false);
}
private static LanguagePropertyConfig skipBlocks() {
@@ -150,22 +160,31 @@ class CppCpdLexerTest extends CpdTextComparisonTest {
}
private static LanguagePropertyConfig dontSkipBlocks() {
- return properties(false, null, false, false);
+ return properties(false, null, false, false, false, false);
}
private static LanguagePropertyConfig skipLiteralSequences() {
- return properties(false, null, true, false);
+ return properties(false, null, true, false, false, false);
}
private static LanguagePropertyConfig skipIdentifierAndLiteralsSequences() {
- return properties(false, null, true, true);
+ return properties(false, null, true, true, false, false);
}
private static LanguagePropertyConfig skipIdentifierSequences() {
- return properties(false, null, false, true);
+ return properties(false, null, false, true, false, false);
}
- private static LanguagePropertyConfig properties(boolean skipBlocks, String skipPattern, boolean skipLiteralSequences, boolean skipSequences) {
+ private static LanguagePropertyConfig ignoreIdents() {
+ return properties(false, null, false, false, false, true);
+ }
+
+ private static LanguagePropertyConfig ignoreLiterals() {
+ return properties(false, null, false, false, true, false);
+ }
+
+
+ private static LanguagePropertyConfig properties(boolean skipBlocks, String skipPattern, boolean skipLiteralSequences, boolean skipSequences, boolean ignoreLiterals, boolean ignoreIdents) {
return properties -> {
if (!skipBlocks) {
properties.setProperty(CppLanguageModule.CPD_SKIP_BLOCKS, "");
@@ -174,6 +193,8 @@ class CppCpdLexerTest extends CpdTextComparisonTest {
}
properties.setProperty(CpdLanguageProperties.CPD_IGNORE_LITERAL_SEQUENCES, skipLiteralSequences);
properties.setProperty(CpdLanguageProperties.CPD_IGNORE_LITERAL_AND_IDENTIFIER_SEQUENCES, skipSequences);
+ properties.setProperty(CpdLanguageProperties.CPD_ANONYMIZE_LITERALS, ignoreLiterals);
+ properties.setProperty(CpdLanguageProperties.CPD_ANONYMIZE_IDENTIFIERS, ignoreIdents);
};
}
}
diff --git a/pmd-cpp/src/test/resources/net/sourceforge/pmd/lang/cpp/cpd/testdata/ignoreIdents.cpp b/pmd-cpp/src/test/resources/net/sourceforge/pmd/lang/cpp/cpd/testdata/ignoreIdents.cpp
new file mode 100644
index 0000000000..91473e3e67
--- /dev/null
+++ b/pmd-cpp/src/test/resources/net/sourceforge/pmd/lang/cpp/cpd/testdata/ignoreIdents.cpp
@@ -0,0 +1,6 @@
+class Test {
+ void f(int a, float b) {
+ auto c = a + b;
+ int d = 6;
+ }
+}
\ No newline at end of file
diff --git a/pmd-cpp/src/test/resources/net/sourceforge/pmd/lang/cpp/cpd/testdata/ignoreIdents.txt b/pmd-cpp/src/test/resources/net/sourceforge/pmd/lang/cpp/cpd/testdata/ignoreIdents.txt
new file mode 100644
index 0000000000..b564fde247
--- /dev/null
+++ b/pmd-cpp/src/test/resources/net/sourceforge/pmd/lang/cpp/cpd/testdata/ignoreIdents.txt
@@ -0,0 +1,35 @@
+ [Image] or [Truncated image[ Bcol Ecol
+L1
+ [class] 1 6
+ [] 7 11
+ [{] 12 13
+L2
+ [void] 2 6
+ [] 7 8
+ [(] 8 9
+ [int] 9 12
+ [] 13 14
+ [,] 14 15
+ [float] 16 21
+ [] 22 23
+ [)] 23 24
+ [{] 25 26
+L3
+ [auto] 3 7
+ [] 8 9
+ [=] 10 11
+ [] 12 13
+ [+] 14 15
+ [] 16 17
+ [;] 17 18
+L4
+ [int] 3 6
+ [] 7 8
+ [=] 9 10
+ [6] 11 12
+ [;] 12 13
+L5
+ [}] 2 3
+L6
+ [}] 1 2
+EOF
diff --git a/pmd-cpp/src/test/resources/net/sourceforge/pmd/lang/cpp/cpd/testdata/ignoreLiterals.cpp b/pmd-cpp/src/test/resources/net/sourceforge/pmd/lang/cpp/cpd/testdata/ignoreLiterals.cpp
new file mode 100644
index 0000000000..cbae7336ba
--- /dev/null
+++ b/pmd-cpp/src/test/resources/net/sourceforge/pmd/lang/cpp/cpd/testdata/ignoreLiterals.cpp
@@ -0,0 +1,43 @@
+ void main() {
+ char x = L'a'; // wide chars
+ x = '\0x05'; // hex
+ // x = L''; // empty character is an error
+
+ print("\ oMedia"); // whitespace escape
+
+
+ // char prefixes
+ char16_t c = u'\u00F6';
+ wchar_t b = L'\xFFEF';
+ char a = '\x30';
+ char32_t d = U'\U0010FFFF';
+
+ // string prefixes
+ char A[] = "Hello\x0A";
+ wchar_t B[] = L"Hell\xF6\x0A";
+ char16_t C[] = u"Hell\u00F6";
+ char32_t D[] = U"Hell\U000000F6\U0010FFFF";
+ auto E[] = u8"\u00F6\U0010FFFF";
+
+
+
+ char* rawString = R"(
+ [Sinks.1]
+ Destination=Console
+ AutoFlush=true
+ Format="[%TimeStamp%] %ThreadId% %QueryIdHigh% %QueryIdLow% %LoggerFile%:%Line% (%Severity%) - %Message%"
+ Filter="%Severity% >= WRN"
+ )";
+
+
+
+ // digit separators
+ auto integer_literal = 1'000''000;
+ auto floating_point_literal = 0.000'015'3;
+ auto hex_literal = 0x0F00'abcd'6f3d;
+ auto silly_example = 1'0'0'000'00;
+
+ // boolean literals
+ int b1 = 0B001101; // C++ 14 binary literal
+ int b2 = 0b000001; // C++ 14 binary literal
+}
\ No newline at end of file
diff --git a/pmd-cpp/src/test/resources/net/sourceforge/pmd/lang/cpp/cpd/testdata/ignoreLiterals.txt b/pmd-cpp/src/test/resources/net/sourceforge/pmd/lang/cpp/cpd/testdata/ignoreLiterals.txt
new file mode 100644
index 0000000000..ef31240e44
--- /dev/null
+++ b/pmd-cpp/src/test/resources/net/sourceforge/pmd/lang/cpp/cpd/testdata/ignoreLiterals.txt
@@ -0,0 +1,135 @@
+ [Image] or [Truncated image[ Bcol Ecol
+L1
+ [void] 2 6
+ [main] 7 11
+ [(] 11 12
+ [)] 12 13
+ [{] 14 15
+L2
+ [char] 5 9
+ [x] 10 11
+ [=] 12 13
+ [] 14 18
+ [;] 18 19
+L3
+ [x] 5 6
+ [=] 7 8
+ [] 9 16
+ [;] 16 17
+L6
+ [print] 5 10
+ [(] 10 11
+ [] 11 24
+ [)] 24 25
+ [;] 25 26
+L10
+ [char16_t] 5 13
+ [c] 14 15
+ [=] 16 17
+ [] 18 27
+ [;] 27 28
+L11
+ [wchar_t] 5 12
+ [b] 13 14
+ [=] 15 16
+ [] 17 26
+ [;] 26 27
+L12
+ [char] 5 9
+ [a] 10 11
+ [=] 12 13
+ [] 15 21
+ [;] 21 22
+L13
+ [char32_t] 5 13
+ [d] 14 15
+ [=] 16 17
+ [] 18 31
+ [;] 31 32
+L16
+ [char] 5 9
+ [A] 10 11
+ [\[] 11 12
+ [\]] 12 13
+ [=] 14 15
+ [] 16 27
+ [;] 27 28
+L17
+ [wchar_t] 5 12
+ [B] 13 14
+ [\[] 14 15
+ [\]] 15 16
+ [=] 17 18
+ [] 19 34
+ [;] 34 35
+L18
+ [char16_t] 5 13
+ [C] 14 15
+ [\[] 15 16
+ [\]] 16 17
+ [=] 18 19
+ [] 20 33
+ [;] 33 34
+L19
+ [char32_t] 5 13
+ [D] 14 15
+ [\[] 15 16
+ [\]] 16 17
+ [=] 18 19
+ [] 20 47
+ [;] 47 48
+L20
+ [auto] 5 9
+ [E] 10 11
+ [\[] 11 12
+ [\]] 12 13
+ [=] 14 15
+ [] 16 36
+ [;] 36 37
+L24
+ [char] 5 9
+ [*] 9 10
+ [rawString] 11 20
+ [=] 21 22
+ [] 23 7
+L30
+ [;] 7 8
+L35
+ [auto] 5 9
+ [integer_literal] 10 25
+ [=] 26 27
+ [] 28 38
+ [;] 38 39
+L36
+ [auto] 5 9
+ [floating_point_literal] 10 32
+ [=] 33 34
+ [] 35 46
+ [;] 46 47
+L37
+ [auto] 5 9
+ [hex_literal] 10 21
+ [=] 22 23
+ [] 24 40
+ [;] 40 41
+L38
+ [auto] 5 9
+ [silly_example] 10 23
+ [=] 24 25
+ [] 26 38
+ [;] 38 39
+L41
+ [int] 5 8
+ [b1] 9 11
+ [=] 12 13
+ [] 14 22
+ [;] 22 23
+L42
+ [int] 5 8
+ [b2] 9 11
+ [=] 12 13
+ [] 14 22
+ [;] 22 23
+L43
+ [}] 1 2
+EOF