Merge branch 'pull-request-25'

2015-10-26 21:00:31 +01:00
parent 312d4c60c0 ba92715b19
commit c43b24a40f
9 changed files with 434 additions and 270 deletions
--- a/pmd-core/src/main/java/net/sourceforge/pmd/cpd/CPDConfiguration.java
+++ b/pmd-core/src/main/java/net/sourceforge/pmd/cpd/CPDConfiguration.java
@ -67,6 +67,9 @@ public class CPDConfiguration extends AbstractConfiguration {
    @Parameter(names = "--ignore-annotations", description = "Ignore language annotations when comparing text", required = false)
    private boolean ignoreAnnotations;

+    @Parameter(names = "--ignore-usings", description = "Ignore using directives in C#", required = false)
+    private boolean ignoreUsings;
+
    @Parameter(names = "--skip-lexical-errors", description = "Skip files which can't be tokenized due to invalid characters instead of aborting CPD", required = false)
    private boolean skipLexicalErrors = false;

@ -227,6 +230,11 @@ public class CPDConfiguration extends AbstractConfiguration {
        } else {
            properties.remove(Tokenizer.IGNORE_ANNOTATIONS);
        }
+        if (configuration.isIgnoreUsings()) {
+            properties.setProperty(Tokenizer.IGNORE_USINGS, "true");
+        } else {
+            properties.remove(Tokenizer.IGNORE_USINGS);
+        }
        properties.setProperty(Tokenizer.OPTION_SKIP_BLOCKS, Boolean.toString(!configuration.isNoSkipBlocks()));
        properties.setProperty(Tokenizer.OPTION_SKIP_BLOCKS_PATTERN, configuration.getSkipBlocksPattern());
        configuration.getLanguage().setProperties(properties);
@ -338,6 +346,14 @@ public class CPDConfiguration extends AbstractConfiguration {
        this.ignoreAnnotations = ignoreAnnotations;
    }

+    public boolean isIgnoreUsings() {
+        return ignoreUsings;
+    }
+
+    public void setIgnoreUsings(boolean ignoreUsings) {
+        this.ignoreUsings = ignoreUsings;
+    }
+
    public boolean isSkipLexicalErrors() {
        return skipLexicalErrors;
    }
--- a/pmd-core/src/main/java/net/sourceforge/pmd/cpd/CPDTask.java
+++ b/pmd-core/src/main/java/net/sourceforge/pmd/cpd/CPDTask.java
@ -47,6 +47,7 @@ public class CPDTask extends Task {
    private boolean ignoreLiterals;
    private boolean ignoreIdentifiers;
    private boolean ignoreAnnotations;
+    private boolean ignoreUsings;
    private boolean skipLexicalErrors;
    private boolean skipDuplicateFiles;
    private boolean skipBlocks = true;
@ -104,6 +105,9 @@ public class CPDTask extends Task {
        if (ignoreAnnotations) {
            p.setProperty(Tokenizer.IGNORE_ANNOTATIONS, "true");
        }
+        if (ignoreUsings) {
+            p.setProperty(Tokenizer.IGNORE_USINGS, "true");
+        }
        p.setProperty(Tokenizer.OPTION_SKIP_BLOCKS, Boolean.toString(skipBlocks));
        p.setProperty(Tokenizer.OPTION_SKIP_BLOCKS_PATTERN, skipBlocksPattern);
        return LanguageFactory.createLanguage(language, p);
@ -188,6 +192,10 @@ public class CPDTask extends Task {
        this.ignoreAnnotations = value;
    }

+    public void setIgnoreUsings(boolean value) {
+        this.ignoreUsings = value;
+    }
+
    public void setSkipLexicalErrors(boolean skipLexicalErrors) {
        this.skipLexicalErrors = skipLexicalErrors;
    }
--- a/pmd-core/src/main/java/net/sourceforge/pmd/cpd/GUI.java
+++ b/pmd-core/src/main/java/net/sourceforge/pmd/cpd/GUI.java
@ -85,6 +85,7 @@ public class GUI implements CPDListener {
 		public boolean canIgnoreIdentifiers() { return false; }
 		public boolean canIgnoreLiterals() { return false; }
 		public boolean canIgnoreAnnotations() { return false; }
+		public boolean canIgnoreUsings() { return false; }
 		public abstract String[] extensions();
 	}

@ -121,6 +122,10 @@ public class GUI implements CPDListener {
                public boolean canIgnoreLiterals() {
                    return "java".equals(terseName);
                }
+                @Override
+                public boolean canIgnoreUsings() {
+                    return "cs".equals(terseName);
+                }
            };
        }
        LANGUAGE_SETS[index][0] = "by extension...";
@ -277,6 +282,7 @@ public class GUI implements CPDListener {
    private JCheckBox ignoreIdentifiersCheckbox = new JCheckBox("", false);
    private JCheckBox ignoreLiteralsCheckbox = new JCheckBox("", false);
    private JCheckBox ignoreAnnotationsCheckbox = new JCheckBox("", false);
+    private JCheckBox ignoreUsingsCheckbox  = new JCheckBox("", false);
    private JComboBox languageBox			= new JComboBox();
    private JTextField extensionField		= new JTextField();
    private JLabel extensionLabel			= new JLabel("Extension:", SwingConstants.RIGHT);
@ -362,6 +368,7 @@ public class GUI implements CPDListener {
         ignoreIdentifiersCheckbox.setEnabled(current.canIgnoreIdentifiers());
         ignoreLiteralsCheckbox.setEnabled(current.canIgnoreLiterals());
         ignoreAnnotationsCheckbox.setEnabled(current.canIgnoreAnnotations());
+         ignoreUsingsCheckbox.setEnabled(current.canIgnoreUsings());
         extensionField.setText(current.extensions()[0]);
         boolean enableExtension = current.extensions()[0].length() == 0;
         extensionField.setEnabled(enableExtension);
@ -414,6 +421,13 @@ public class GUI implements CPDListener {
        helper.nextRow();
        helper.addLabel("Ignore annotations?");
        helper.add(ignoreAnnotationsCheckbox);
+        helper.addLabel("");
+        helper.addLabel("");
+        helper.nextRow();
+
+        helper.nextRow();
+        helper.addLabel("Ignore usings?");
+        helper.add(ignoreUsingsCheckbox);
        helper.add(goButton);
        helper.add(cxButton);
        helper.nextRow();
@ -590,6 +604,7 @@ public class GUI implements CPDListener {
            config.setIgnoreIdentifiers(ignoreIdentifiersCheckbox.isSelected());
            config.setIgnoreLiterals(ignoreLiteralsCheckbox.isSelected());
            config.setIgnoreAnnotations(ignoreAnnotationsCheckbox.isSelected());
+            config.setIgnoreUsings(ignoreUsingsCheckbox.isSelected());
            p.setProperty(LanguageFactory.EXTENSION, extensionField.getText());

            LanguageConfig conf = languageConfigFor((String)languageBox.getSelectedItem());
--- a/pmd-core/src/main/java/net/sourceforge/pmd/cpd/Tokenizer.java
+++ b/pmd-core/src/main/java/net/sourceforge/pmd/cpd/Tokenizer.java
@ -9,6 +9,13 @@ public interface Tokenizer {
    String IGNORE_LITERALS = "ignore_literals";
    String IGNORE_IDENTIFIERS = "ignore_identifiers";
    String IGNORE_ANNOTATIONS = "ignore_annotations";
+
+    /**
+     * Ignore using directives in C#.
+     * The default value is <code>false</code>.
+     */
+    String IGNORE_USINGS = "ignore_usings";
+
    /**
     * Enables or disabled skipping of blocks like a pre-processor.
     * It is a boolean property.
--- a/pmd-cs/src/main/java/net/sourceforge/pmd/cpd/CsLanguage.java
+++ b/pmd-cs/src/main/java/net/sourceforge/pmd/cpd/CsLanguage.java
@ -3,15 +3,24 @@
 */
 package net.sourceforge.pmd.cpd;

+import java.util.Properties;
+
 /**
 * Language implementation for C#
 */
 public class CsLanguage extends AbstractLanguage {

-    /**
-     * Creates a new C# Language instance.
-     */
    public CsLanguage() {
+        this(System.getProperties());
+    }
+
+    public CsLanguage(Properties properties) {
        super("C#", "cs", new CsTokenizer(), ".cs");
+        setProperties(properties);
+    }
+
+    public final void setProperties(Properties properties) {
+        CsTokenizer tokenizer = (CsTokenizer)getTokenizer();
+        tokenizer.setProperties(properties);
    }
 }
--- a/pmd-cs/src/main/java/net/sourceforge/pmd/cpd/CsTokenizer.java
+++ b/pmd-cs/src/main/java/net/sourceforge/pmd/cpd/CsTokenizer.java
@ -5,9 +5,13 @@ package net.sourceforge.pmd.cpd;

 import java.io.BufferedReader;
 import java.io.CharArrayReader;
+import java.io.Closeable;
 import java.io.IOException;
+import java.io.PushbackReader;
+import java.util.Properties;

 import org.apache.commons.io.IOUtils;
+import org.apache.commons.lang3.RandomStringUtils;

 /**
 * This class does a best-guess try-anything tokenization.
@ -16,13 +20,76 @@ import org.apache.commons.io.IOUtils;
 */
 public class CsTokenizer implements Tokenizer {

+    private boolean ignoreUsings = false;
+
+    public void setProperties(Properties properties) {
+        if (properties.containsKey(IGNORE_USINGS)) {
+            ignoreUsings = Boolean.parseBoolean(properties.getProperty(IGNORE_USINGS, "false"));
+        }
+    }
+
    @Override
    public void tokenize(SourceCode sourceCode, Tokens tokenEntries) {
-        BufferedReader reader = new BufferedReader(new CharArrayReader(sourceCode.getCodeBuffer().toString()
-                .toCharArray()));
+        Tokenizer tokenizer =
+                new Tokenizer(sourceCode.getCodeBuffer().toString());
+        Token token = tokenizer.getNextToken();
+
+        while (!token.equals(Token.EOF)) {
+            Token lookAhead = tokenizer.getNextToken();
+
+            // Ignore using directives
+            // Only using directives should be ignored, because these are used to import namespaces
+            //
+            // Using directive: 'using System.Math;'
+            // Using statement: 'using (Font font1 = new Font(..)) { .. }'
+            if (ignoreUsings &&
+                    "using".equals(token.image) &&
+                    !"(".equals(lookAhead.image)
+            ) {
+                // We replace the 'using' token by a random token, because it should not be part of
+                // any duplication block. When we omit it from the token stream, there is a change that
+                // we get a duplication block that starts before the 'using' directives and ends afterwards.
+                String randomTokenText =
+                    RandomStringUtils.randomAlphanumeric(20);
+
+                token = new Token(randomTokenText, token.lineNumber);
+                //Skip all other tokens of the using directive to prevent a partial matching
+                while (!";".equals(lookAhead.image) && !lookAhead.equals(Token.EOF)) {
+                    lookAhead = tokenizer.getNextToken();
+                }
+            }
+            if (!";".equals(token.image)) {
+                tokenEntries.add(new TokenEntry(token.image, sourceCode.getFileName(), token.lineNumber));
+            }
+            token = lookAhead;
+        }
+        tokenEntries.add(TokenEntry.getEOF());
+        IOUtils.closeQuietly(tokenizer);
+    }
+
+    public void setIgnoreUsings(boolean ignoreUsings) {
+        this.ignoreUsings = ignoreUsings;
+    }
+
+
+    private static class Tokenizer implements Closeable {
+        private boolean endOfFile;
+        private int line;
+        private final PushbackReader reader;
+
+        public Tokenizer(String sourceCode) {
+            endOfFile = false;
+            line = 1;
+            reader = new PushbackReader(new BufferedReader(new CharArrayReader(sourceCode.toCharArray())));
+        }
+
+        public Token getNextToken() {
+            if (endOfFile) {
+                return Token.EOF;
+            }
+
            try {
                int ic = reader.read();
-            int line = 1;
                char c;
                StringBuilder b;
                while (ic != -1) {
@ -41,32 +108,27 @@ public class CsTokenizer implements Tokenizer {
                        ic = reader.read();
                        break;

-                // ignore semicolons
                    case ';':
-                    ic = reader.read();
-                    break;
+                        return new Token(";", line);

                    // < << <= <<= > >> >= >>=
                    case '<':
                    case '>':
                        ic = reader.read();
                        if (ic == '=') {
-                        tokenEntries.add(new TokenEntry(c + "=", sourceCode.getFileName(), line));
-                        ic = reader.read();
+                            return new Token(c + "=", line);
                        } else if (ic == c) {
                            ic = reader.read();
                            if (ic == '=') {
-                            tokenEntries.add(new TokenEntry(c + c + "=", sourceCode
-                                    .getFileName(), line));
-                            ic = reader.read();
+                                return new Token(c +  c + "=", line);
                            } else {
-                            tokenEntries.add(new TokenEntry(String.valueOf(c) + c, sourceCode
-                                    .getFileName(), line));
+                                reader.unread(ic);
+                                return new Token(String.valueOf(c) + c, line);
                            }
                        } else {
-                        tokenEntries.add(new TokenEntry(String.valueOf(c), sourceCode.getFileName(), line));
+                            reader.unread(ic);
+                            return new Token(String.valueOf(c), line);
                        }
-                    break;

                    // = == & &= && | |= || + += ++ - -= --
                    case '=':
@ -76,13 +138,11 @@ public class CsTokenizer implements Tokenizer {
                    case '-':
                        ic = reader.read();
                        if (ic == '=' || ic == c) {
-                        tokenEntries.add(new TokenEntry(c + String.valueOf((char) ic), sourceCode
-                                .getFileName(), line));
-                        ic = reader.read();
+                            return new Token(c + String.valueOf((char) ic), line);
                        } else {
-                        tokenEntries.add(new TokenEntry(String.valueOf(c), sourceCode.getFileName(), line));
+                            reader.unread(ic);
+                            return new Token(String.valueOf(c), line);
                        }
-                    break;

                    // ! != * *= % %= ^ ^= ~ ~=
                    case '!':
@ -92,12 +152,11 @@ public class CsTokenizer implements Tokenizer {
                    case '~':
                        ic = reader.read();
                        if (ic == '=') {
-                        tokenEntries.add(new TokenEntry(c + "=", sourceCode.getFileName(), line));
-                        ic = reader.read();
+                            return new Token(c + "=", line);
                        } else {
-                        tokenEntries.add(new TokenEntry(String.valueOf(c), sourceCode.getFileName(), line));
+                            reader.unread(ic);
+                            return new Token(String.valueOf(c), line);
                        }
-                    break;

                    // strings & chars
                    case '"':
@ -126,9 +185,7 @@ public class CsTokenizer implements Tokenizer {
                        if (ic != -1) {
                            b.append((char) ic);
                        }
-                    tokenEntries.add(new TokenEntry(b.toString(), sourceCode.getFileName(), beginLine));
-                    ic = reader.read();
-                    break;
+                        return new Token(b.toString(), beginLine);

                    // / /= /*...*/ //...
                    case '/':
@ -180,13 +237,11 @@ public class CsTokenizer implements Tokenizer {
                            break;

                        case '=':
-                        tokenEntries.add(new TokenEntry("/=", sourceCode.getFileName(), line));
-                        ic = reader.read();
-                        break;
+                            return new Token("/=", line);

                        default:
-                        tokenEntries.add(new TokenEntry("/", sourceCode.getFileName(), line));
-                        break;
+                            reader.unread(ic);
+                            return new Token("/", line);
                        }
                        break;

@ -198,7 +253,8 @@ public class CsTokenizer implements Tokenizer {
                                b.append(c);
                                c = (char) (ic = reader.read());
                            } while (Character.isJavaIdentifierPart(c));
-                        tokenEntries.add(new TokenEntry(b.toString(), sourceCode.getFileName(), line));
+                            reader.unread(ic);
+                            return new Token(b.toString(), line);
                        }
                        // numbers
                        else if (Character.isDigit(c) || c == '.') {
@ -214,22 +270,37 @@ public class CsTokenizer implements Tokenizer {
                                }
                                c = (char) (ic = reader.read());
                            } while ("1234567890.iIlLfFdDsSuUeExX".indexOf(c) != -1);
-
-                        tokenEntries.add(new TokenEntry(b.toString(), sourceCode.getFileName(), line));
+                            reader.unread(ic);
+                            return new Token(b.toString(), line);
                        }
                        // anything else
                        else {
-                        tokenEntries.add(new TokenEntry(String.valueOf(c), sourceCode.getFileName(), line));
-                        ic = reader.read();
-                        break;
+                            return new Token(String.valueOf(c), line);
                        }
                    }
                }
            } catch (IOException e) {
                e.printStackTrace();
-        } finally {
-            IOUtils.closeQuietly(reader);
-            tokenEntries.add(TokenEntry.getEOF());
+            }
+            endOfFile = true;
+            return Token.EOF;
+        }
+
+        @Override
+        public void close() throws IOException {
+            reader.close();
+        }
+    }
+
+    private static class Token {
+        public static final Token EOF = new Token("EOF", -1);
+
+        public final String image;
+        public final int lineNumber;
+
+        public Token(String image, int lineNumber) {
+            this.image = image;
+            this.lineNumber = lineNumber;
        }
    }
 }
--- a/pmd-cs/src/test/java/net/sourceforge/pmd/cpd/CsTokenizerTest.java
+++ b/pmd-cs/src/test/java/net/sourceforge/pmd/cpd/CsTokenizerTest.java
@ -5,6 +5,7 @@
 package net.sourceforge.pmd.cpd;

 import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotEquals;

 import java.util.List;

@ -13,12 +14,13 @@ import org.junit.Test;

 public class CsTokenizerTest {

-    private CsTokenizer tokenizer = new CsTokenizer();
+    private CsTokenizer tokenizer;

    private Tokens tokens;

    @Before
    public void init() {
+        tokenizer = new CsTokenizer();
        tokens = new Tokens();
        TokenEntry.clearImages();
    }
@ -138,6 +140,25 @@ public class CsTokenizerTest {
        assertEquals(8, tokens.getTokens().get(14).getBeginLine());
    }

+    @Test
+    public void testIgnoreUsingDirectives() {
+        tokenizer.setIgnoreUsings(true);
+        tokenizer.tokenize(toSourceCode("using System.Text;\n"), tokens);
+        assertNotEquals("using", tokens.getTokens().get(0).toString());
+        assertEquals(2, tokens.size());
+    }
+
+    @Test
+    public void testUsingStatementsAreNotIgnored() {
+        tokenizer.setIgnoreUsings(true);
+        tokenizer.tokenize(toSourceCode(
+                "using (Font font1 = new Font(\"Arial\", 10.0f)) {\n"
+                        + "  byte charset = font1.GdiCharSet;\n"
+                        + "}\n"
+                ), tokens);
+        assertEquals("using", tokens.getTokens().get(0).toString());
+    }
+
    private SourceCode toSourceCode(String source) {
        return new SourceCode(new SourceCode.StringCodeLoader(source));
    }
--- a/src/site/markdown/overview/changelog.md
+++ b/src/site/markdown/overview/changelog.md
@ -6,6 +6,8 @@

 **Feature Request and Improvements:**

+*   CPD: New command line parameter `--ignore-usings`: Ignore using directives in C# when comparing text.
+
 **New/Modified/Deprecated Rules:**

 *   Java
@ -14,6 +16,7 @@

 **Pull Requests:**

+*   [#25](https://github.com/adangel/pmd/pull/25): Added option to exclude C# using directives from CPD analysis
 *   [#72](https://github.com/pmd/pmd/pull/72): Added capability in Java and JSP parser for tracking tokens.
 *   [#73](https://github.com/pmd/pmd/pull/73): Add rule to look for invalid message format in slf4j loggers
 *   [#74](https://github.com/pmd/pmd/pull/74): Fix rendering CommentDefaultAccessModifier description as code
--- a/src/site/markdown/usage/cpd-usage.md
+++ b/src/site/markdown/usage/cpd-usage.md
@ -154,6 +154,12 @@ The options "minimum-tokens" and "files" are the two required options; there are
        <td>no</td>
        <td>java</td>
    </tr>
+    <tr>
+        <td>--ignore-usings</td>
+        <td>Ignore using directives in C# when comparing text</td>
+        <td>no</td>
+        <td>C#</td>
+    </tr>
    <tr>
        <td>--no-skip-blocks</td>
        <td>Do not skip code blocks marked with --skip-blocks-pattern (e.g. #if 0 until #endif)</td>
@ -339,6 +345,14 @@ Andy Glover wrote an Ant task for CPD; here's how to use it:
    <td valign="top">java</td>
    <td valign="top" align="center">No</td>
  </tr>
+  <tr>
+    <td valign="top">ignoreUsings</td>
+    <td valign="top">
+        Ignore using directives in C#.
+    </td>
+    <td valign="top">C#</td>
+    <td valign="top" align="center">No</td>
+  </tr>
  <tr>
    <td valign="top">skipDuplicateFiles</td>
    <td valign="top">