Added strings as literal and ignore identifiers in sequences
This commit is contained in:
parent
0313ffdedd
commit
85a0d7f59f
@ -69,6 +69,9 @@ public class CpdCommand extends AbstractAnalysisPmdSubcommand {
|
||||
@Option(names = "--ignore-literal-sequences", description = "Ignore sequences of literals such as list initializers.")
|
||||
private boolean ignoreLiteralSequences;
|
||||
|
||||
@Option(names = "--ignore-sequences", description = "Ignore sequences of identifiers and literals")
|
||||
private boolean ignoreIdentifierAndLiteralSequences;
|
||||
|
||||
@Option(names = "--skip-lexical-errors",
|
||||
description = "Skip files which can't be tokenized due to invalid characters, instead of aborting with an error.")
|
||||
private boolean skipLexicalErrors;
|
||||
|
@ -65,6 +65,8 @@ public class CPDConfiguration extends AbstractConfiguration {
|
||||
|
||||
private boolean ignoreLiteralSequences = false;
|
||||
|
||||
private boolean ignoreIdentifierAndLiteralSequences = false;
|
||||
|
||||
private boolean skipLexicalErrors = false;
|
||||
|
||||
private boolean noSkipBlocks = false;
|
||||
@ -190,6 +192,11 @@ public class CPDConfiguration extends AbstractConfiguration {
|
||||
} else {
|
||||
properties.remove(Tokenizer.OPTION_IGNORE_LITERAL_SEQUENCES);
|
||||
}
|
||||
if (configuration.isIgnoreIdentifierAndLiteralSequences()) {
|
||||
properties.setProperty(Tokenizer.OPTION_IGNORE_IDENTIFIER_AND_LITERAL_SEQUENCES, "true");
|
||||
} else {
|
||||
properties.remove(Tokenizer.OPTION_IGNORE_IDENTIFIER_AND_LITERAL_SEQUENCES);
|
||||
}
|
||||
properties.setProperty(Tokenizer.OPTION_SKIP_BLOCKS, Boolean.toString(!configuration.isNoSkipBlocks()));
|
||||
properties.setProperty(Tokenizer.OPTION_SKIP_BLOCKS_PATTERN, configuration.getSkipBlocksPattern());
|
||||
configuration.getLanguage().setProperties(properties);
|
||||
@ -316,6 +323,14 @@ public class CPDConfiguration extends AbstractConfiguration {
|
||||
this.ignoreLiteralSequences = ignoreLiteralSequences;
|
||||
}
|
||||
|
||||
public boolean isIgnoreIdentifierAndLiteralSequences() {
|
||||
return ignoreIdentifierAndLiteralSequences;
|
||||
}
|
||||
|
||||
public void setIgnoreIdentifierAndLiteralSequences(boolean ignoreIdentifierAndLiteralSequences) {
|
||||
this.ignoreIdentifierAndLiteralSequences = ignoreIdentifierAndLiteralSequences;
|
||||
}
|
||||
|
||||
public boolean isSkipLexicalErrors() {
|
||||
return skipLexicalErrors;
|
||||
}
|
||||
|
@ -15,6 +15,12 @@ public interface Tokenizer {
|
||||
* Ignore sequences of literals (e.g, <code>0,0,0,0...</code>).
|
||||
*/
|
||||
String OPTION_IGNORE_LITERAL_SEQUENCES = "net.sourceforge.pmd.cpd.Tokenizer.skipLiteralSequences";
|
||||
|
||||
/**
|
||||
* Ignore comma separated sequences of identifies and literals (e.g, <code>0,0,0,0...</code>).
|
||||
*/
|
||||
String OPTION_IGNORE_IDENTIFIER_AND_LITERAL_SEQUENCES = "net.sourceforge.pmd.cpd.Tokenizer.skipSequences";
|
||||
|
||||
/**
|
||||
* Ignore using directives in C#. The default value is <code>false</code>.
|
||||
*/
|
||||
|
@ -26,6 +26,7 @@ public class CPPTokenizer extends JavaCCTokenizer {
|
||||
private boolean skipBlocks;
|
||||
private Pattern skipBlocksStart;
|
||||
private Pattern skipBlocksEnd;
|
||||
private boolean ignoreIdentifierAndLiteralSeqences = false;
|
||||
private boolean ignoreLiteralSequences = false;
|
||||
|
||||
public CPPTokenizer() {
|
||||
@ -52,8 +53,9 @@ public class CPPTokenizer extends JavaCCTokenizer {
|
||||
skipBlocksEnd = CppBlockSkipper.compileSkipMarker(split[1]);
|
||||
}
|
||||
}
|
||||
ignoreLiteralSequences = Boolean.parseBoolean(properties.getProperty(OPTION_IGNORE_LITERAL_SEQUENCES,
|
||||
Boolean.FALSE.toString()));
|
||||
ignoreLiteralSequences = Boolean.parseBoolean(properties.getProperty(OPTION_IGNORE_LITERAL_SEQUENCES, Boolean.FALSE.toString()));
|
||||
ignoreIdentifierAndLiteralSeqences =
|
||||
Boolean.parseBoolean(properties.getProperty(OPTION_IGNORE_IDENTIFIER_AND_LITERAL_SEQUENCES, Boolean.FALSE.toString()));
|
||||
}
|
||||
|
||||
|
||||
@ -78,42 +80,44 @@ public class CPPTokenizer extends JavaCCTokenizer {
|
||||
|
||||
@Override
|
||||
protected TokenFilter<JavaccToken> getTokenFilter(final TokenManager<JavaccToken> tokenManager) {
|
||||
return new CppTokenFilter(tokenManager, ignoreLiteralSequences);
|
||||
return new CppTokenFilter(tokenManager, ignoreLiteralSequences, ignoreIdentifierAndLiteralSeqences);
|
||||
}
|
||||
|
||||
private static class CppTokenFilter extends JavaCCTokenFilter {
|
||||
private final boolean ignoreLiteralSequences;
|
||||
private JavaccToken discardingLiteralsUntil = null;
|
||||
private final boolean ignoreIdentifierAndLiteralSeqences;
|
||||
private JavaccToken discardingTokensUntil = null;
|
||||
private boolean discardCurrent = false;
|
||||
|
||||
CppTokenFilter(final TokenManager<JavaccToken> tokenManager, final boolean ignoreLiteralSequences) {
|
||||
CppTokenFilter(final TokenManager<JavaccToken> tokenManager, final boolean ignoreLiteralSequences, final boolean ignoreIdentifierAndLiteralSeqences) {
|
||||
super(tokenManager);
|
||||
this.ignoreIdentifierAndLiteralSeqences = ignoreIdentifierAndLiteralSeqences;
|
||||
this.ignoreLiteralSequences = ignoreLiteralSequences;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void analyzeTokens(final JavaccToken currentToken, final Iterable<JavaccToken> remainingTokens) {
|
||||
discardCurrent = false;
|
||||
skipLiteralSequences(currentToken, remainingTokens);
|
||||
skipSequences(currentToken, remainingTokens);
|
||||
}
|
||||
|
||||
private void skipLiteralSequences(final JavaccToken currentToken, final Iterable<JavaccToken> remainingTokens) {
|
||||
if (ignoreLiteralSequences) {
|
||||
private void skipSequences(final JavaccToken currentToken, final Iterable<JavaccToken> remainingTokens) {
|
||||
if (ignoreLiteralSequences || ignoreIdentifierAndLiteralSeqences) {
|
||||
final int kind = currentToken.getKind();
|
||||
if (isDiscardingLiterals()) {
|
||||
if (currentToken == discardingLiteralsUntil) { // NOPMD - intentional check for reference equality
|
||||
discardingLiteralsUntil = null;
|
||||
if (isDiscardingToken()) {
|
||||
if (currentToken == discardingTokensUntil) { // NOPMD - intentional check for reference equality
|
||||
discardingTokensUntil = null;
|
||||
discardCurrent = true;
|
||||
}
|
||||
} else if (kind == CppTokenKinds.LCURLYBRACE) {
|
||||
final JavaccToken finalToken = findEndOfSequenceOfLiterals(remainingTokens);
|
||||
discardingLiteralsUntil = finalToken;
|
||||
final JavaccToken finalToken = findEndOfSequenceToDiscard(remainingTokens, ignoreIdentifierAndLiteralSeqences);
|
||||
discardingTokensUntil = finalToken;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static JavaccToken findEndOfSequenceOfLiterals(final Iterable<JavaccToken> remainingTokens) {
|
||||
boolean seenLiteral = false;
|
||||
private static JavaccToken findEndOfSequenceToDiscard(final Iterable<JavaccToken> remainingTokens, boolean ignoreIdentifierAndLiteralSeqences) {
|
||||
boolean seenAllowedToken = false;
|
||||
int braceCount = 0;
|
||||
for (final JavaccToken token : remainingTokens) {
|
||||
switch (token.getKind()) {
|
||||
@ -123,8 +127,18 @@ public class CPPTokenizer extends JavaCCTokenizer {
|
||||
case CppTokenKinds.HEXADECIMAL_INT_LITERAL:
|
||||
case CppTokenKinds.OCTAL_INT_LITERAL:
|
||||
case CppTokenKinds.ZERO:
|
||||
seenLiteral = true;
|
||||
case CppTokenKinds.STRING:
|
||||
seenAllowedToken = true;
|
||||
break; // can be skipped; continue to the next token
|
||||
case CppTokenKinds.ID:
|
||||
// Ignore identifiers if instructed
|
||||
if (ignoreIdentifierAndLiteralSeqences) {
|
||||
seenAllowedToken = true;
|
||||
break; // can be skipped; continue to the next token
|
||||
} else {
|
||||
// token not expected, other than identifier
|
||||
return null;
|
||||
}
|
||||
case CppTokenKinds.COMMA:
|
||||
break; // can be skipped; continue to the next token
|
||||
case CppTokenKinds.LCURLYBRACE:
|
||||
@ -134,7 +148,7 @@ public class CPPTokenizer extends JavaCCTokenizer {
|
||||
braceCount--;
|
||||
if (braceCount < 0) {
|
||||
// end of the list; skip all contents
|
||||
return seenLiteral ? token : null;
|
||||
return seenAllowedToken ? token : null;
|
||||
} else {
|
||||
// curly braces are not yet balanced; continue to the next token
|
||||
break;
|
||||
@ -147,13 +161,13 @@ public class CPPTokenizer extends JavaCCTokenizer {
|
||||
return null;
|
||||
}
|
||||
|
||||
private boolean isDiscardingLiterals() {
|
||||
return discardingLiteralsUntil != null;
|
||||
private boolean isDiscardingToken() {
|
||||
return discardingTokensUntil != null;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected boolean isLanguageSpecificDiscarding() {
|
||||
return isDiscardingLiterals() || discardCurrent;
|
||||
return isDiscardingToken() || discardCurrent;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -139,8 +139,18 @@ class CPPTokenizerTest extends CpdTextComparisonTest {
|
||||
doTest("listOfNumbers", "_ignored", skipLiteralSequences());
|
||||
}
|
||||
|
||||
@Test
|
||||
void testLongListsOfNumbersAndIdentifiersAreIgnored() {
|
||||
doTest("listOfNumbers", "_ignored_identifiers", skipIdentifierAndLiteralsSequences());
|
||||
}
|
||||
|
||||
@Test
|
||||
void testLongListsOfIdentifiersAreIgnored() {
|
||||
doTest("listOfNumbers", "_ignored_identifiers", skipIdentifierSequences());
|
||||
}
|
||||
|
||||
private static Properties skipBlocks(String skipPattern) {
|
||||
return properties(true, skipPattern, false);
|
||||
return properties(true, skipPattern, false, false);
|
||||
}
|
||||
|
||||
private static Properties skipBlocks() {
|
||||
@ -148,20 +158,29 @@ class CPPTokenizerTest extends CpdTextComparisonTest {
|
||||
}
|
||||
|
||||
private static Properties dontSkipBlocks() {
|
||||
return properties(false, null, false);
|
||||
return properties(false, null, false, false);
|
||||
}
|
||||
|
||||
private static Properties skipLiteralSequences() {
|
||||
return properties(false, null, true);
|
||||
return properties(false, null, true, false);
|
||||
}
|
||||
|
||||
private static Properties properties(boolean skipBlocks, String skipPattern, boolean skipLiteralSequences) {
|
||||
private static Properties skipIdentifierAndLiteralsSequences() {
|
||||
return properties(false, null, true, true);
|
||||
}
|
||||
|
||||
private static Properties skipIdentifierSequences() {
|
||||
return properties(false, null, false, true);
|
||||
}
|
||||
|
||||
private static Properties properties(boolean skipBlocks, String skipPattern, boolean skipLiteralSequences, boolean skipSequences) {
|
||||
Properties properties = new Properties();
|
||||
properties.setProperty(Tokenizer.OPTION_SKIP_BLOCKS, Boolean.toString(skipBlocks));
|
||||
if (skipPattern != null) {
|
||||
properties.setProperty(Tokenizer.OPTION_SKIP_BLOCKS_PATTERN, skipPattern);
|
||||
}
|
||||
properties.setProperty(Tokenizer.OPTION_IGNORE_LITERAL_SEQUENCES, Boolean.toString(skipLiteralSequences));
|
||||
properties.setProperty(Tokenizer.OPTION_IGNORE_IDENTIFIER_AND_LITERAL_SEQUENCES, Boolean.toString(skipSequences));
|
||||
return properties;
|
||||
}
|
||||
}
|
||||
|
@ -18,7 +18,7 @@ int main() {
|
||||
0b000001, // C++ 14 binary literal
|
||||
};
|
||||
int c[3][4] = {{0,1,2,3},{4,5,6,7},{8,9,10,11}}; // multi-dimensional array
|
||||
int d[3] = {a, a, a}; // identifiers should not be filtered out
|
||||
int d[3] = {a, a, a}; // identifiers should filtered out if identifiers are allowed in sequences
|
||||
int e[1][3] = {{a, a, a}}; // identifiers in multi-dimensional array
|
||||
int f[1] = {main()}; // method invocations should not be filtered out
|
||||
int g[1][1] = {{main()}}; // method invocation in multi-dimensional array
|
||||
|
@ -0,0 +1,92 @@
|
||||
[Image] or [Truncated image[ Bcol Ecol
|
||||
L2
|
||||
[int] 1 4
|
||||
[main] 5 9
|
||||
[(] 9 10
|
||||
[)] 10 11
|
||||
[{] 12 13
|
||||
L3
|
||||
[int] 3 6
|
||||
[a] 7 8
|
||||
[\[] 8 9
|
||||
[50] 9 11
|
||||
[\]] 11 12
|
||||
[=] 13 14
|
||||
[;] 116 117
|
||||
L4
|
||||
[double] 3 9
|
||||
[b] 10 11
|
||||
[\[] 11 12
|
||||
[14] 12 14
|
||||
[\]] 14 15
|
||||
[=] 16 17
|
||||
L19
|
||||
[;] 4 5
|
||||
L20
|
||||
[int] 3 6
|
||||
[c] 7 8
|
||||
[\[] 8 9
|
||||
[3] 9 10
|
||||
[\]] 10 11
|
||||
[\[] 11 12
|
||||
[4] 12 13
|
||||
[\]] 13 14
|
||||
[=] 15 16
|
||||
[;] 50 51
|
||||
L21
|
||||
[int] 3 6
|
||||
[d] 7 8
|
||||
[\[] 8 9
|
||||
[3] 9 10
|
||||
[\]] 10 11
|
||||
[=] 12 13
|
||||
[;] 23 24
|
||||
L22
|
||||
[int] 3 6
|
||||
[e] 7 8
|
||||
[\[] 8 9
|
||||
[1] 9 10
|
||||
[\]] 10 11
|
||||
[\[] 11 12
|
||||
[3] 12 13
|
||||
[\]] 13 14
|
||||
[=] 15 16
|
||||
[;] 28 29
|
||||
L23
|
||||
[int] 3 6
|
||||
[f] 7 8
|
||||
[\[] 8 9
|
||||
[1] 9 10
|
||||
[\]] 10 11
|
||||
[=] 12 13
|
||||
[{] 14 15
|
||||
[main] 15 19
|
||||
[(] 19 20
|
||||
[)] 20 21
|
||||
[}] 21 22
|
||||
[;] 22 23
|
||||
L24
|
||||
[int] 3 6
|
||||
[g] 7 8
|
||||
[\[] 8 9
|
||||
[1] 9 10
|
||||
[\]] 10 11
|
||||
[\[] 11 12
|
||||
[1] 12 13
|
||||
[\]] 13 14
|
||||
[=] 15 16
|
||||
[{] 17 18
|
||||
[{] 18 19
|
||||
[main] 19 23
|
||||
[(] 23 24
|
||||
[)] 24 25
|
||||
[}] 25 26
|
||||
[}] 26 27
|
||||
[;] 27 28
|
||||
L25
|
||||
[return] 3 9
|
||||
[0] 10 11
|
||||
[;] 11 12
|
||||
L26
|
||||
[}] 1 2
|
||||
EOF
|
Loading…
x
Reference in New Issue
Block a user