Merge branch 'pull-request-25'

This commit is contained in:
Andreas Dangel
2015-10-26 21:00:31 +01:00
9 changed files with 434 additions and 270 deletions

View File

@ -67,6 +67,9 @@ public class CPDConfiguration extends AbstractConfiguration {
@Parameter(names = "--ignore-annotations", description = "Ignore language annotations when comparing text", required = false)
private boolean ignoreAnnotations;
@Parameter(names = "--ignore-usings", description = "Ignore using directives in C#", required = false)
private boolean ignoreUsings;
@Parameter(names = "--skip-lexical-errors", description = "Skip files which can't be tokenized due to invalid characters instead of aborting CPD", required = false)
private boolean skipLexicalErrors = false;
@ -227,6 +230,11 @@ public class CPDConfiguration extends AbstractConfiguration {
} else {
properties.remove(Tokenizer.IGNORE_ANNOTATIONS);
}
if (configuration.isIgnoreUsings()) {
properties.setProperty(Tokenizer.IGNORE_USINGS, "true");
} else {
properties.remove(Tokenizer.IGNORE_USINGS);
}
properties.setProperty(Tokenizer.OPTION_SKIP_BLOCKS, Boolean.toString(!configuration.isNoSkipBlocks()));
properties.setProperty(Tokenizer.OPTION_SKIP_BLOCKS_PATTERN, configuration.getSkipBlocksPattern());
configuration.getLanguage().setProperties(properties);
@ -338,6 +346,14 @@ public class CPDConfiguration extends AbstractConfiguration {
this.ignoreAnnotations = ignoreAnnotations;
}
public boolean isIgnoreUsings() {
return ignoreUsings;
}
public void setIgnoreUsings(boolean ignoreUsings) {
this.ignoreUsings = ignoreUsings;
}
public boolean isSkipLexicalErrors() {
return skipLexicalErrors;
}

View File

@ -47,6 +47,7 @@ public class CPDTask extends Task {
private boolean ignoreLiterals;
private boolean ignoreIdentifiers;
private boolean ignoreAnnotations;
private boolean ignoreUsings;
private boolean skipLexicalErrors;
private boolean skipDuplicateFiles;
private boolean skipBlocks = true;
@ -104,6 +105,9 @@ public class CPDTask extends Task {
if (ignoreAnnotations) {
p.setProperty(Tokenizer.IGNORE_ANNOTATIONS, "true");
}
if (ignoreUsings) {
p.setProperty(Tokenizer.IGNORE_USINGS, "true");
}
p.setProperty(Tokenizer.OPTION_SKIP_BLOCKS, Boolean.toString(skipBlocks));
p.setProperty(Tokenizer.OPTION_SKIP_BLOCKS_PATTERN, skipBlocksPattern);
return LanguageFactory.createLanguage(language, p);
@ -188,6 +192,10 @@ public class CPDTask extends Task {
this.ignoreAnnotations = value;
}
public void setIgnoreUsings(boolean value) {
this.ignoreUsings = value;
}
public void setSkipLexicalErrors(boolean skipLexicalErrors) {
this.skipLexicalErrors = skipLexicalErrors;
}

View File

@ -85,6 +85,7 @@ public class GUI implements CPDListener {
public boolean canIgnoreIdentifiers() { return false; }
public boolean canIgnoreLiterals() { return false; }
public boolean canIgnoreAnnotations() { return false; }
public boolean canIgnoreUsings() { return false; }
public abstract String[] extensions();
}
@ -121,6 +122,10 @@ public class GUI implements CPDListener {
public boolean canIgnoreLiterals() {
return "java".equals(terseName);
}
@Override
public boolean canIgnoreUsings() {
return "cs".equals(terseName);
}
};
}
LANGUAGE_SETS[index][0] = "by extension...";
@ -277,6 +282,7 @@ public class GUI implements CPDListener {
private JCheckBox ignoreIdentifiersCheckbox = new JCheckBox("", false);
private JCheckBox ignoreLiteralsCheckbox = new JCheckBox("", false);
private JCheckBox ignoreAnnotationsCheckbox = new JCheckBox("", false);
private JCheckBox ignoreUsingsCheckbox = new JCheckBox("", false);
private JComboBox languageBox = new JComboBox();
private JTextField extensionField = new JTextField();
private JLabel extensionLabel = new JLabel("Extension:", SwingConstants.RIGHT);
@ -362,6 +368,7 @@ public class GUI implements CPDListener {
ignoreIdentifiersCheckbox.setEnabled(current.canIgnoreIdentifiers());
ignoreLiteralsCheckbox.setEnabled(current.canIgnoreLiterals());
ignoreAnnotationsCheckbox.setEnabled(current.canIgnoreAnnotations());
ignoreUsingsCheckbox.setEnabled(current.canIgnoreUsings());
extensionField.setText(current.extensions()[0]);
boolean enableExtension = current.extensions()[0].length() == 0;
extensionField.setEnabled(enableExtension);
@ -414,6 +421,13 @@ public class GUI implements CPDListener {
helper.nextRow();
helper.addLabel("Ignore annotations?");
helper.add(ignoreAnnotationsCheckbox);
helper.addLabel("");
helper.addLabel("");
helper.nextRow();
helper.nextRow();
helper.addLabel("Ignore usings?");
helper.add(ignoreUsingsCheckbox);
helper.add(goButton);
helper.add(cxButton);
helper.nextRow();
@ -590,6 +604,7 @@ public class GUI implements CPDListener {
config.setIgnoreIdentifiers(ignoreIdentifiersCheckbox.isSelected());
config.setIgnoreLiterals(ignoreLiteralsCheckbox.isSelected());
config.setIgnoreAnnotations(ignoreAnnotationsCheckbox.isSelected());
config.setIgnoreUsings(ignoreUsingsCheckbox.isSelected());
p.setProperty(LanguageFactory.EXTENSION, extensionField.getText());
LanguageConfig conf = languageConfigFor((String)languageBox.getSelectedItem());

View File

@ -9,6 +9,13 @@ public interface Tokenizer {
String IGNORE_LITERALS = "ignore_literals";
String IGNORE_IDENTIFIERS = "ignore_identifiers";
String IGNORE_ANNOTATIONS = "ignore_annotations";
/**
* Ignore using directives in C#.
* The default value is <code>false</code>.
*/
String IGNORE_USINGS = "ignore_usings";
/**
* Enables or disabled skipping of blocks like a pre-processor.
* It is a boolean property.

View File

@ -3,15 +3,24 @@
*/
package net.sourceforge.pmd.cpd;
import java.util.Properties;
/**
* Language implementation for C#
*/
public class CsLanguage extends AbstractLanguage {
/**
* Creates a new C# Language instance.
*/
public CsLanguage() {
this(System.getProperties());
}
public CsLanguage(Properties properties) {
super("C#", "cs", new CsTokenizer(), ".cs");
setProperties(properties);
}
public final void setProperties(Properties properties) {
CsTokenizer tokenizer = (CsTokenizer)getTokenizer();
tokenizer.setProperties(properties);
}
}

View File

@ -5,9 +5,13 @@ package net.sourceforge.pmd.cpd;
import java.io.BufferedReader;
import java.io.CharArrayReader;
import java.io.Closeable;
import java.io.IOException;
import java.io.PushbackReader;
import java.util.Properties;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.RandomStringUtils;
/**
* This class does a best-guess try-anything tokenization.
@ -16,13 +20,76 @@ import org.apache.commons.io.IOUtils;
*/
public class CsTokenizer implements Tokenizer {
private boolean ignoreUsings = false;
public void setProperties(Properties properties) {
if (properties.containsKey(IGNORE_USINGS)) {
ignoreUsings = Boolean.parseBoolean(properties.getProperty(IGNORE_USINGS, "false"));
}
}
@Override
public void tokenize(SourceCode sourceCode, Tokens tokenEntries) {
BufferedReader reader = new BufferedReader(new CharArrayReader(sourceCode.getCodeBuffer().toString()
.toCharArray()));
Tokenizer tokenizer =
new Tokenizer(sourceCode.getCodeBuffer().toString());
Token token = tokenizer.getNextToken();
while (!token.equals(Token.EOF)) {
Token lookAhead = tokenizer.getNextToken();
// Ignore using directives
// Only using directives should be ignored, because these are used to import namespaces
//
// Using directive: 'using System.Math;'
// Using statement: 'using (Font font1 = new Font(..)) { .. }'
if (ignoreUsings &&
"using".equals(token.image) &&
!"(".equals(lookAhead.image)
) {
// We replace the 'using' token by a random token, because it should not be part of
// any duplication block. When we omit it from the token stream, there is a change that
// we get a duplication block that starts before the 'using' directives and ends afterwards.
String randomTokenText =
RandomStringUtils.randomAlphanumeric(20);
token = new Token(randomTokenText, token.lineNumber);
//Skip all other tokens of the using directive to prevent a partial matching
while (!";".equals(lookAhead.image) && !lookAhead.equals(Token.EOF)) {
lookAhead = tokenizer.getNextToken();
}
}
if (!";".equals(token.image)) {
tokenEntries.add(new TokenEntry(token.image, sourceCode.getFileName(), token.lineNumber));
}
token = lookAhead;
}
tokenEntries.add(TokenEntry.getEOF());
IOUtils.closeQuietly(tokenizer);
}
public void setIgnoreUsings(boolean ignoreUsings) {
this.ignoreUsings = ignoreUsings;
}
private static class Tokenizer implements Closeable {
private boolean endOfFile;
private int line;
private final PushbackReader reader;
public Tokenizer(String sourceCode) {
endOfFile = false;
line = 1;
reader = new PushbackReader(new BufferedReader(new CharArrayReader(sourceCode.toCharArray())));
}
public Token getNextToken() {
if (endOfFile) {
return Token.EOF;
}
try {
int ic = reader.read();
int line = 1;
char c;
StringBuilder b;
while (ic != -1) {
@ -41,32 +108,27 @@ public class CsTokenizer implements Tokenizer {
ic = reader.read();
break;
// ignore semicolons
case ';':
ic = reader.read();
break;
return new Token(";", line);
// < << <= <<= > >> >= >>=
case '<':
case '>':
ic = reader.read();
if (ic == '=') {
tokenEntries.add(new TokenEntry(c + "=", sourceCode.getFileName(), line));
ic = reader.read();
return new Token(c + "=", line);
} else if (ic == c) {
ic = reader.read();
if (ic == '=') {
tokenEntries.add(new TokenEntry(c + c + "=", sourceCode
.getFileName(), line));
ic = reader.read();
return new Token(c + c + "=", line);
} else {
tokenEntries.add(new TokenEntry(String.valueOf(c) + c, sourceCode
.getFileName(), line));
reader.unread(ic);
return new Token(String.valueOf(c) + c, line);
}
} else {
tokenEntries.add(new TokenEntry(String.valueOf(c), sourceCode.getFileName(), line));
reader.unread(ic);
return new Token(String.valueOf(c), line);
}
break;
// = == & &= && | |= || + += ++ - -= --
case '=':
@ -76,13 +138,11 @@ public class CsTokenizer implements Tokenizer {
case '-':
ic = reader.read();
if (ic == '=' || ic == c) {
tokenEntries.add(new TokenEntry(c + String.valueOf((char) ic), sourceCode
.getFileName(), line));
ic = reader.read();
return new Token(c + String.valueOf((char) ic), line);
} else {
tokenEntries.add(new TokenEntry(String.valueOf(c), sourceCode.getFileName(), line));
reader.unread(ic);
return new Token(String.valueOf(c), line);
}
break;
// ! != * *= % %= ^ ^= ~ ~=
case '!':
@ -92,12 +152,11 @@ public class CsTokenizer implements Tokenizer {
case '~':
ic = reader.read();
if (ic == '=') {
tokenEntries.add(new TokenEntry(c + "=", sourceCode.getFileName(), line));
ic = reader.read();
return new Token(c + "=", line);
} else {
tokenEntries.add(new TokenEntry(String.valueOf(c), sourceCode.getFileName(), line));
reader.unread(ic);
return new Token(String.valueOf(c), line);
}
break;
// strings & chars
case '"':
@ -126,9 +185,7 @@ public class CsTokenizer implements Tokenizer {
if (ic != -1) {
b.append((char) ic);
}
tokenEntries.add(new TokenEntry(b.toString(), sourceCode.getFileName(), beginLine));
ic = reader.read();
break;
return new Token(b.toString(), beginLine);
// / /= /*...*/ //...
case '/':
@ -180,13 +237,11 @@ public class CsTokenizer implements Tokenizer {
break;
case '=':
tokenEntries.add(new TokenEntry("/=", sourceCode.getFileName(), line));
ic = reader.read();
break;
return new Token("/=", line);
default:
tokenEntries.add(new TokenEntry("/", sourceCode.getFileName(), line));
break;
reader.unread(ic);
return new Token("/", line);
}
break;
@ -198,7 +253,8 @@ public class CsTokenizer implements Tokenizer {
b.append(c);
c = (char) (ic = reader.read());
} while (Character.isJavaIdentifierPart(c));
tokenEntries.add(new TokenEntry(b.toString(), sourceCode.getFileName(), line));
reader.unread(ic);
return new Token(b.toString(), line);
}
// numbers
else if (Character.isDigit(c) || c == '.') {
@ -214,22 +270,37 @@ public class CsTokenizer implements Tokenizer {
}
c = (char) (ic = reader.read());
} while ("1234567890.iIlLfFdDsSuUeExX".indexOf(c) != -1);
tokenEntries.add(new TokenEntry(b.toString(), sourceCode.getFileName(), line));
reader.unread(ic);
return new Token(b.toString(), line);
}
// anything else
else {
tokenEntries.add(new TokenEntry(String.valueOf(c), sourceCode.getFileName(), line));
ic = reader.read();
break;
return new Token(String.valueOf(c), line);
}
}
}
} catch (IOException e) {
e.printStackTrace();
} finally {
IOUtils.closeQuietly(reader);
tokenEntries.add(TokenEntry.getEOF());
}
endOfFile = true;
return Token.EOF;
}
@Override
public void close() throws IOException {
reader.close();
}
}
private static class Token {
public static final Token EOF = new Token("EOF", -1);
public final String image;
public final int lineNumber;
public Token(String image, int lineNumber) {
this.image = image;
this.lineNumber = lineNumber;
}
}
}

View File

@ -5,6 +5,7 @@
package net.sourceforge.pmd.cpd;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNotEquals;
import java.util.List;
@ -13,12 +14,13 @@ import org.junit.Test;
public class CsTokenizerTest {
private CsTokenizer tokenizer = new CsTokenizer();
private CsTokenizer tokenizer;
private Tokens tokens;
@Before
public void init() {
tokenizer = new CsTokenizer();
tokens = new Tokens();
TokenEntry.clearImages();
}
@ -138,6 +140,25 @@ public class CsTokenizerTest {
assertEquals(8, tokens.getTokens().get(14).getBeginLine());
}
@Test
public void testIgnoreUsingDirectives() {
tokenizer.setIgnoreUsings(true);
tokenizer.tokenize(toSourceCode("using System.Text;\n"), tokens);
assertNotEquals("using", tokens.getTokens().get(0).toString());
assertEquals(2, tokens.size());
}
@Test
public void testUsingStatementsAreNotIgnored() {
tokenizer.setIgnoreUsings(true);
tokenizer.tokenize(toSourceCode(
"using (Font font1 = new Font(\"Arial\", 10.0f)) {\n"
+ " byte charset = font1.GdiCharSet;\n"
+ "}\n"
), tokens);
assertEquals("using", tokens.getTokens().get(0).toString());
}
private SourceCode toSourceCode(String source) {
return new SourceCode(new SourceCode.StringCodeLoader(source));
}

View File

@ -6,6 +6,8 @@
**Feature Request and Improvements:**
* CPD: New command line parameter `--ignore-usings`: Ignore using directives in C# when comparing text.
**New/Modified/Deprecated Rules:**
* Java
@ -14,6 +16,7 @@
**Pull Requests:**
* [#25](https://github.com/adangel/pmd/pull/25): Added option to exclude C# using directives from CPD analysis
* [#72](https://github.com/pmd/pmd/pull/72): Added capability in Java and JSP parser for tracking tokens.
* [#73](https://github.com/pmd/pmd/pull/73): Add rule to look for invalid message format in slf4j loggers
* [#74](https://github.com/pmd/pmd/pull/74): Fix rendering CommentDefaultAccessModifier description as code

View File

@ -154,6 +154,12 @@ The options "minimum-tokens" and "files" are the two required options; there are
<td>no</td>
<td>java</td>
</tr>
<tr>
<td>--ignore-usings</td>
<td>Ignore using directives in C# when comparing text</td>
<td>no</td>
<td>C#</td>
</tr>
<tr>
<td>--no-skip-blocks</td>
<td>Do not skip code blocks marked with --skip-blocks-pattern (e.g. #if 0 until #endif)</td>
@ -339,6 +345,14 @@ Andy Glover wrote an Ant task for CPD; here's how to use it:
<td valign="top">java</td>
<td valign="top" align="center">No</td>
</tr>
<tr>
<td valign="top">ignoreUsings</td>
<td valign="top">
Ignore using directives in C#.
</td>
<td valign="top">C#</td>
<td valign="top" align="center">No</td>
</tr>
<tr>
<td valign="top">skipDuplicateFiles</td>
<td valign="top">