diff --git a/pmd-core/src/main/java/net/sourceforge/pmd/cpd/CPDConfiguration.java b/pmd-core/src/main/java/net/sourceforge/pmd/cpd/CPDConfiguration.java index ba9df79744..64e86c6088 100644 --- a/pmd-core/src/main/java/net/sourceforge/pmd/cpd/CPDConfiguration.java +++ b/pmd-core/src/main/java/net/sourceforge/pmd/cpd/CPDConfiguration.java @@ -62,6 +62,13 @@ public class CPDConfiguration extends AbstractConfiguration { @Parameter(names = "--skip-lexical-errors", description = "Skip files which can't be tokenized due to invalid characters instead of aborting CPD", required = false) private boolean skipLexicalErrors = false; + @Parameter(names = "--no-skip-blocks", description = "Do not skip code blocks marked with --skip-blocks-pattern (e.g. #if 0 until #endif)", required = false) + private boolean noSkipBlocks = false; + + @Parameter(names = "--skip-blocks-pattern", description = "Pattern to find the blocks to skip. Start and End pattern separated by |. " + + "Default is \"" + Tokenizer.DEFAULT_SKIP_BLOCKS_PATTERN + "\".", required = false) + private String skipBlocksPattern = Tokenizer.DEFAULT_SKIP_BLOCKS_PATTERN; + @Parameter(names = "--files", variableArity = true, description = "List of files and directories to process", required = false) private List files; @@ -180,6 +187,8 @@ public class CPDConfiguration extends AbstractConfiguration { } else { properties.remove(Tokenizer.IGNORE_ANNOTATIONS); } + properties.setProperty(Tokenizer.OPTION_SKIP_BLOCKS, Boolean.toString(!configuration.isNoSkipBlocks())); + properties.setProperty(Tokenizer.OPTION_SKIP_BLOCKS_PATTERN, configuration.getSkipBlocksPattern()); configuration.getLanguage().setProperties(properties); } @@ -341,4 +350,20 @@ public class CPDConfiguration extends AbstractConfiguration { public String getEncoding() { return encoding; } + + public boolean isNoSkipBlocks() { + return noSkipBlocks; + } + + public void setNoSkipBlocks(boolean noSkipBlocks) { + this.noSkipBlocks = noSkipBlocks; + } + + public String getSkipBlocksPattern() { + return skipBlocksPattern; + } + + public void setSkipBlocksPattern(String skipBlocksPattern) { + this.skipBlocksPattern = skipBlocksPattern; + } } diff --git a/pmd-core/src/main/java/net/sourceforge/pmd/cpd/CPDTask.java b/pmd-core/src/main/java/net/sourceforge/pmd/cpd/CPDTask.java index 266af0ae95..8c73a15f1e 100644 --- a/pmd-core/src/main/java/net/sourceforge/pmd/cpd/CPDTask.java +++ b/pmd-core/src/main/java/net/sourceforge/pmd/cpd/CPDTask.java @@ -49,6 +49,8 @@ public class CPDTask extends Task { private boolean ignoreAnnotations; private boolean skipLexicalErrors; private boolean skipDuplicateFiles; + private boolean skipBlocks = true; + private String skipBlocksPattern = Tokenizer.DEFAULT_SKIP_BLOCKS_PATTERN; private File outputFile; private String encoding = System.getProperty("file.encoding"); private List filesets = new ArrayList(); @@ -102,6 +104,8 @@ public class CPDTask extends Task { if (ignoreAnnotations) { p.setProperty(Tokenizer.IGNORE_ANNOTATIONS, "true"); } + p.setProperty(Tokenizer.OPTION_SKIP_BLOCKS, Boolean.toString(skipBlocks)); + p.setProperty(Tokenizer.OPTION_SKIP_BLOCKS_PATTERN, skipBlocksPattern); return LanguageFactory.createLanguage(language, p); } @@ -208,6 +212,14 @@ public class CPDTask extends Task { this.encoding = encoding; } + public void setSkipBlocks(boolean skipBlocks) { + this.skipBlocks = skipBlocks; + } + + public void setSkipBlocksPattern(String skipBlocksPattern) { + this.skipBlocksPattern = skipBlocksPattern; + } + public static class FormatAttribute extends EnumeratedAttribute { private static final String[] FORMATS = new String[]{XML_FORMAT, TEXT_FORMAT, CSV_FORMAT}; public String[] getValues() { diff --git a/pmd-core/src/main/java/net/sourceforge/pmd/cpd/Tokenizer.java b/pmd-core/src/main/java/net/sourceforge/pmd/cpd/Tokenizer.java index e0d6fc9e68..132886c266 100644 --- a/pmd-core/src/main/java/net/sourceforge/pmd/cpd/Tokenizer.java +++ b/pmd-core/src/main/java/net/sourceforge/pmd/cpd/Tokenizer.java @@ -9,6 +9,23 @@ public interface Tokenizer { String IGNORE_LITERALS = "ignore_literals"; String IGNORE_IDENTIFIERS = "ignore_identifiers"; String IGNORE_ANNOTATIONS = "ignore_annotations"; + /** + * Enables or disabled skipping of blocks like a pre-processor. + * It is a boolean property. + * The default value is true. + * @see #OPTION_SKIP_BLOCKS_PATTERN + */ + String OPTION_SKIP_BLOCKS = "net.sourceforge.pmd.cpd.Tokenizer.skipBlocks"; + /** + * Configures the pattern, to find the blocks to skip. + * It is a string property and contains of two parts, separated by {@code |}. + * The first part is the start pattern, the second part is the ending pattern. + * Default value is "{@code #if 0|#endif}". + * @see #DEFAULT_SKIP_BLOCKS_PATTERN + */ + String OPTION_SKIP_BLOCKS_PATTERN = "net.sourceforge.pmd.cpd.Tokenizer.skipBlocksPattern"; + + String DEFAULT_SKIP_BLOCKS_PATTERN = "#if 0|#endif"; void tokenize(SourceCode sourceCode, Tokens tokenEntries) throws IOException; } diff --git a/pmd-cpp/src/main/java/net/sourceforge/pmd/cpd/CPPLanguage.java b/pmd-cpp/src/main/java/net/sourceforge/pmd/cpd/CPPLanguage.java index 2f3a69ce4e..fbc0944e0e 100644 --- a/pmd-cpp/src/main/java/net/sourceforge/pmd/cpd/CPPLanguage.java +++ b/pmd-cpp/src/main/java/net/sourceforge/pmd/cpd/CPPLanguage.java @@ -3,6 +3,8 @@ */ package net.sourceforge.pmd.cpd; +import java.util.Properties; + /** * Defines the Language module for C/C++ */ @@ -14,4 +16,13 @@ public class CPPLanguage extends AbstractLanguage { public CPPLanguage() { super("C++", "cpp", new CPPTokenizer(), ".h", ".hpp", ".hxx", ".c", ".cpp", ".cxx", ".cc", ".C"); } + + /* (non-Javadoc) + * @see net.sourceforge.pmd.cpd.AbstractLanguage#setProperties(java.util.Properties) + */ + @Override + public void setProperties(Properties properties) { + super.setProperties(properties); + ((CPPTokenizer)getTokenizer()).setProperties(properties); + } } diff --git a/pmd-cpp/src/main/java/net/sourceforge/pmd/cpd/CPPTokenizer.java b/pmd-cpp/src/main/java/net/sourceforge/pmd/cpd/CPPTokenizer.java index d18f5c1506..3d457d4749 100644 --- a/pmd-cpp/src/main/java/net/sourceforge/pmd/cpd/CPPTokenizer.java +++ b/pmd-cpp/src/main/java/net/sourceforge/pmd/cpd/CPPTokenizer.java @@ -3,8 +3,12 @@ */ package net.sourceforge.pmd.cpd; +import java.io.BufferedReader; +import java.io.IOException; import java.io.StringReader; +import java.util.Properties; +import net.sourceforge.pmd.PMD; import net.sourceforge.pmd.lang.LanguageRegistry; import net.sourceforge.pmd.lang.LanguageVersionHandler; import net.sourceforge.pmd.lang.TokenManager; @@ -19,6 +23,30 @@ import org.apache.commons.io.IOUtils; */ public class CPPTokenizer implements Tokenizer { + private boolean skipBlocks = true; + private String skipBlocksStart; + private String skipBlocksEnd; + + /** + * Sets the possible options for the C++ tokenizer. + * @param properties the properties + * @see #OPTION_SKIP_BLOCKS + * @see #OPTION_SKIP_BLOCKS_PATTERN + */ + public void setProperties(Properties properties) { + skipBlocks = Boolean.parseBoolean(properties.getProperty(OPTION_SKIP_BLOCKS, Boolean.TRUE.toString())); + if (skipBlocks) { + String skipBlocksPattern = properties.getProperty(OPTION_SKIP_BLOCKS_PATTERN, DEFAULT_SKIP_BLOCKS_PATTERN); + String[] split = skipBlocksPattern.split("\\|", 2); + skipBlocksStart = split[0]; + if (split.length == 1) { + skipBlocksEnd = split[0]; + } else { + skipBlocksEnd = split[1]; + } + } + } + @Override public void tokenize(SourceCode sourceCode, Tokens tokenEntries) { StringBuilder buffer = sourceCode.getCodeBuffer(); @@ -26,7 +54,7 @@ public class CPPTokenizer implements Tokenizer { try { LanguageVersionHandler languageVersionHandler = LanguageRegistry.getLanguage(CppLanguageModule.NAME) .getDefaultVersion().getLanguageVersionHandler(); - reader = new StringReader(buffer.toString()); + reader = new StringReader(maybeSkipBlocks(buffer.toString())); TokenManager tokenManager = languageVersionHandler.getParser( languageVersionHandler.getDefaultParserOptions()).getTokenManager(sourceCode.getFileName(), reader); Token currentToken = (Token) tokenManager.getNextToken(); @@ -40,8 +68,35 @@ public class CPPTokenizer implements Tokenizer { err.printStackTrace(); System.err.println("Skipping " + sourceCode.getFileName() + " due to parse error"); tokenEntries.add(TokenEntry.getEOF()); + } catch (IOException e) { + e.printStackTrace(); + System.err.println("Skipping " + sourceCode.getFileName() + " due to parse error"); + tokenEntries.add(TokenEntry.getEOF()); } finally { IOUtils.closeQuietly(reader); } } + + private String maybeSkipBlocks(String test) throws IOException { + if (!skipBlocks) { + return test; + } + + BufferedReader reader = new BufferedReader(new StringReader(test)); + StringBuilder filtered = new StringBuilder(test.length()); + String line; + boolean skip = false; + while ((line = reader.readLine()) != null) { + if (skipBlocksStart.equalsIgnoreCase(line.trim())) { + skip = true; + } else if (skip && skipBlocksEnd.equalsIgnoreCase(line.trim())) { + skip = false; + } + if (!skip) { + filtered.append(line); + } + filtered.append(PMD.EOL); // always add a new line to keep the line-numbering + } + return filtered.toString(); + } } diff --git a/pmd-cpp/src/test/java/net/sourceforge/pmd/cpd/CPPTokenizerTest.java b/pmd-cpp/src/test/java/net/sourceforge/pmd/cpd/CPPTokenizerTest.java index 5baaf59d17..c59ac9d62b 100644 --- a/pmd-cpp/src/test/java/net/sourceforge/pmd/cpd/CPPTokenizerTest.java +++ b/pmd-cpp/src/test/java/net/sourceforge/pmd/cpd/CPPTokenizerTest.java @@ -5,8 +5,12 @@ package net.sourceforge.pmd.cpd; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; + +import java.util.Properties; + import net.sourceforge.pmd.PMD; +import org.apache.commons.io.IOUtils; import org.junit.Test; public class CPPTokenizerTest { @@ -51,8 +55,43 @@ public class CPPTokenizerTest { assertEquals(17, tokens.size()); } + @Test + public void testTokenizerWithSkipBlocks() throws Exception { + String test = IOUtils.toString(CPPTokenizerTest.class.getResourceAsStream("cpp/cpp_with_asm.cpp")); + Tokens tokens = parse(test, true); + assertEquals(19, tokens.size()); + } + + @Test + public void testTokenizerWithSkipBlocksPattern() throws Exception { + String test = IOUtils.toString(CPPTokenizerTest.class.getResourceAsStream("cpp/cpp_with_asm.cpp")); + Tokens tokens = parse(test, true, "#if debug|#endif"); + assertEquals(31, tokens.size()); + } + + @Test + public void testTokenizerWithoutSkipBlocks() throws Exception { + String test = IOUtils.toString(CPPTokenizerTest.class.getResourceAsStream("cpp/cpp_with_asm.cpp")); + Tokens tokens = parse(test, false); + assertEquals(37, tokens.size()); + } + private Tokens parse(String snippet) { + return parse(snippet, false); + } + private Tokens parse(String snippet, boolean skipBlocks) { + return parse(snippet, skipBlocks, null); + } + private Tokens parse(String snippet, boolean skipBlocks, String skipPattern) { + Properties properties = new Properties(); + properties.setProperty(Tokenizer.OPTION_SKIP_BLOCKS, Boolean.toString(skipBlocks)); + if (skipPattern != null) { + properties.setProperty(Tokenizer.OPTION_SKIP_BLOCKS_PATTERN, skipPattern); + } + CPPTokenizer tokenizer = new CPPTokenizer(); + tokenizer.setProperties(properties); + SourceCode code = new SourceCode(new SourceCode.StringCodeLoader(snippet)); Tokens tokens = new Tokens(); tokenizer.tokenize(code, tokens); diff --git a/pmd-cpp/src/test/resources/net/sourceforge/pmd/cpd/cpp/cpp_with_asm.cpp b/pmd-cpp/src/test/resources/net/sourceforge/pmd/cpd/cpp/cpp_with_asm.cpp new file mode 100644 index 0000000000..93361b4de6 --- /dev/null +++ b/pmd-cpp/src/test/resources/net/sourceforge/pmd/cpd/cpp/cpp_with_asm.cpp @@ -0,0 +1,28 @@ +int main() { +} + +#if DEBUG +int foobar() { +} +#endif + +#if 0 +static void my_memset(void *dest,int fill_value,int count) +{ + __asm __volatile__( + "cld\n" + "mov %ecx, %ebx\n" + "shr 2,%ecx\n" + "rep " + "stosl\n" + "mov %ebx,%ecx\n" + " // line 157 mentioned above + : + : "c" (count), "a" (fill_value), "D" (dest) + : "cc","%ebx" ); +} +#endif + + +int otherMethod() { +} \ No newline at end of file diff --git a/src/site/markdown/overview/changelog.md b/src/site/markdown/overview/changelog.md index d23ac697ad..21e18e57a5 100644 --- a/src/site/markdown/overview/changelog.md +++ b/src/site/markdown/overview/changelog.md @@ -8,8 +8,16 @@ **New/Modified Rules:** +**New Parameters for CPD:** + +For the language cpp, the following new parameters are supported: + +* `--no-skip-blocks`: Disables skipping of code blocks like a pre-processor. This is by default enabled. +* `--skip-blocks-pattern`: Pattern to find the blocks to skip. Start and End pattern separated by "`|`". Default value is "`#if 0|#endif`". + **Bugfixes:** +* [#1090](https://sourceforge.net/p/pmd/bugs/1090/): cpp parser exception with inline asm * [#1128](https://sourceforge.net/p/pmd/bugs/1128/): CompareObjectsWithEquals False Positive comparing boolean (primitive) values * [#1254](https://sourceforge.net/p/pmd/bugs/1254/): CPD run that worked in 5.1.2 fails in 5.1.3 with OOM * [#1276](https://sourceforge.net/p/pmd/bugs/1276/): False positive in UnusedPrivateMethod with inner enum diff --git a/src/site/xdoc/usage/cpd-usage.xml b/src/site/xdoc/usage/cpd-usage.xml index e69ceb6472..f1cb9a4b4e 100644 --- a/src/site/xdoc/usage/cpd-usage.xml +++ b/src/site/xdoc/usage/cpd-usage.xml @@ -49,16 +49,19 @@ Attribute Description + Applies for language Required encoding The character set encoding (e.g., UTF-8) to use when reading the source code files, but also when producing the report. A piece of warning, even if you set properly the encoding value, let's say to UTF-8, but you are running CPD encoded with CP1252, you may end up with not UTF-8 file. Indeed, CPD copy piece of source code in its report directly, therefore, the source files keep their encoding.
If not specified, CPD uses the system default encoding.
+ No format The format of the report (e.g. csv, text, xml); defaults to text. + No @@ -67,28 +70,47 @@ value differences when evaluating a duplicate block. This means that foo=42; and foo=43; will be seen as equivalent. You may want to run PMD with this option off to start with and then switch it on to see what it turns up; defaults to false. + java No ignoreIdentifiers Similar to ignoreLiterals but for identifiers; i.e., variable names, methods names, and so forth; defaults to false. + java No ignoreAnnotations Ignore annotations. More and more modern frameworks use annotations on classes and methods, which can be very redundant and trigger CPD matches. With J2EE (CDI, Transaction Handling, etc) and Spring (everything) annotations become very redundant. Often classes or methods have the same 5-6 lines of annotations. This causes false positives; defaults to false. + java No skipDuplicateFiles Ignore multiple copies of files of the same name and length in comparison; defaults to false. + No skipLexicalErrors Skip files which can't be tokenized due to invalid characters instead of aborting CPD; defaults to false. + No + + skipBlocks + Enables or disabled skipping of blocks like a pre-processor; defaults to true. See also option skipBlocksPattern. + cpp + No + + + skipBlocksPattern + Configures the pattern, to find the blocks to skip. It is a string property and contains of two parts, separated by |. + The first part is the start pattern, the second part is the ending pattern. + The default value is #if 0|#endif. + cpp + no + language @@ -97,16 +119,19 @@ ecmascript, and plsql); defaults to java. + No minimumtokencount A positive integer indicating the minimum duplicate size. + Yes outputfile The destination file for the report. If not specified the console will be used instead. + No