From 4f9628690261e5ed80e5868e92a7aaed3ff495a0 Mon Sep 17 00:00:00 2001 From: Fernando Cosso Date: Thu, 7 May 2020 11:49:52 -0400 Subject: [PATCH] Implementing changes from the discussion --- .../adding_new_cpd_language.md | 10 ++++++-- .../sourceforge/pmd/cpd/AntlrTokenizer.java | 2 +- .../pmd/it/BinaryDistributionIT.java | 4 +++- .../net/sourceforge/pmd/cpd/GoLanguage.java | 5 ---- .../net/sourceforge/pmd/cpd/GoTokenizer.java | 3 --- .../pmd/lang/xml/antlr4/XMLLexer.g4 | 24 +++++++++++++------ .../pmd/{ => xml}/cpd/XmlLanguage.java | 11 +++------ .../pmd/{ => xml}/cpd/XmlTokenizer.java | 7 +++--- .../services/net.sourceforge.pmd.cpd.Language | 1 + .../{ => xml}/cpd/XmlCPDTokenizerTest.java | 6 +++-- .../sourceforge/pmd/{ => xml}/cpd/hello.xml | 0 11 files changed, 40 insertions(+), 33 deletions(-) rename pmd-xml/src/main/java/net/sourceforge/pmd/{ => xml}/cpd/XmlLanguage.java (62%) rename pmd-xml/src/main/java/net/sourceforge/pmd/{ => xml}/cpd/XmlTokenizer.java (81%) create mode 100644 pmd-xml/src/main/resources/META-INF/services/net.sourceforge.pmd.cpd.Language rename pmd-xml/src/test/java/net/sourceforge/pmd/{ => xml}/cpd/XmlCPDTokenizerTest.java (84%) rename pmd-xml/src/test/resources/net/sourceforge/pmd/{ => xml}/cpd/hello.xml (100%) diff --git a/docs/pages/pmd/devdocs/major_contributions/adding_new_cpd_language.md b/docs/pages/pmd/devdocs/major_contributions/adding_new_cpd_language.md index 40d383c2e6..0e6f7b3c2b 100644 --- a/docs/pages/pmd/devdocs/major_contributions/adding_new_cpd_language.md +++ b/docs/pages/pmd/devdocs/major_contributions/adding_new_cpd_language.md @@ -50,12 +50,18 @@ All you need to do is follow this few steps: **You are almost there!** -4. Please don't forget to add some test, you can again.. look at Go implementation ;) +4. Update the list of supported languages + + - Write the fully-qualified name of your Language class to the file `src/main/resources/META-INF/services/net.sourceforge.pmd.cpd.Language` + + - Update the test that asserts the list of supported languages by updating the `SUPPORTED_LANGUAGES` constant in [BinaryDistributionIT](https://github.com/pmd/pmd/blob/master/pmd-dist/src/test/java/net/sourceforge/pmd/it/BinaryDistributionIT.java) + +5. Please don't forget to add some test, you can again.. look at Go implementation ;) If you read this far, I'm keen to think you would also love to support some extra CPD configuration (ignore imports or crazy things like that) If that's your case , you came to the right place! -5. You can add your custom properties using a Token filter +6. You can add your custom properties using a Token filter - For Antlr grammars all you need to do is implement your own [AntlrTokenFilter](https://github.com/pmd/pmd/blob/master/pmd-core/src/main/java/net/sourceforge/pmd/cpd/token/AntlrTokenFilter.java) diff --git a/pmd-core/src/main/java/net/sourceforge/pmd/cpd/AntlrTokenizer.java b/pmd-core/src/main/java/net/sourceforge/pmd/cpd/AntlrTokenizer.java index 05b18eea5b..e7fe98ca92 100644 --- a/pmd-core/src/main/java/net/sourceforge/pmd/cpd/AntlrTokenizer.java +++ b/pmd-core/src/main/java/net/sourceforge/pmd/cpd/AntlrTokenizer.java @@ -53,7 +53,7 @@ public abstract class AntlrTokenizer implements Tokenizer { return new AntlrTokenFilter(tokenManager); } - /* default */ static CharStream getCharStreamFromSourceCode(final SourceCode sourceCode) { + public /* default */ static CharStream getCharStreamFromSourceCode(final SourceCode sourceCode) { StringBuilder buffer = sourceCode.getCodeBuffer(); return CharStreams.fromString(buffer.toString()); } diff --git a/pmd-dist/src/test/java/net/sourceforge/pmd/it/BinaryDistributionIT.java b/pmd-dist/src/test/java/net/sourceforge/pmd/it/BinaryDistributionIT.java index 933c4565e4..c88f5a8c32 100644 --- a/pmd-dist/src/test/java/net/sourceforge/pmd/it/BinaryDistributionIT.java +++ b/pmd-dist/src/test/java/net/sourceforge/pmd/it/BinaryDistributionIT.java @@ -21,6 +21,8 @@ import net.sourceforge.pmd.PMDVersion; public class BinaryDistributionIT extends AbstractBinaryDistributionTest { + private static final String SUPPORTED_LANGUAGES = "Supported languages: [apex, cpp, cs, dart, ecmascript, fortran, go, groovy, java, jsp, kotlin, lua, matlab, modelica, objectivec, perl, php, plsql, python, ruby, scala, swift, vf, xml]"; + @Test public void testFileExistence() { assertTrue(getBinaryDistribution().exists()); @@ -81,7 +83,7 @@ public class BinaryDistributionIT extends AbstractBinaryDistributionTest { ExecutionResult result; result = CpdExecutor.runCpd(tempDir, "-h"); - result.assertExecutionResult(0, "Supported languages: [apex, cpp, cs, dart, ecmascript, fortran, go, groovy, java, jsp, kotlin, lua, matlab, modelica, objectivec, perl, php, plsql, python, ruby, scala, swift, vf]"); + result.assertExecutionResult(0, SUPPORTED_LANGUAGES); result = CpdExecutor.runCpd(tempDir, "--minimum-tokens", "10", "--format", "text", "--files", srcDir); result.assertExecutionResult(4, "Found a 10 line (55 tokens) duplication in the following files:"); diff --git a/pmd-go/src/main/java/net/sourceforge/pmd/cpd/GoLanguage.java b/pmd-go/src/main/java/net/sourceforge/pmd/cpd/GoLanguage.java index 124015b67e..2c4f6a4d21 100644 --- a/pmd-go/src/main/java/net/sourceforge/pmd/cpd/GoLanguage.java +++ b/pmd-go/src/main/java/net/sourceforge/pmd/cpd/GoLanguage.java @@ -5,15 +5,10 @@ package net.sourceforge.pmd.cpd; /** - * Implements the Go Language - * * @author oinume@gmail.com */ public class GoLanguage extends AbstractLanguage { - /** - * Creates a new instance of {@link GoLanguage} - */ public GoLanguage() { super("Go", "go", new GoTokenizer(), ".go"); } diff --git a/pmd-go/src/main/java/net/sourceforge/pmd/cpd/GoTokenizer.java b/pmd-go/src/main/java/net/sourceforge/pmd/cpd/GoTokenizer.java index 2d46bbf82b..5f63a7f15a 100644 --- a/pmd-go/src/main/java/net/sourceforge/pmd/cpd/GoTokenizer.java +++ b/pmd-go/src/main/java/net/sourceforge/pmd/cpd/GoTokenizer.java @@ -9,9 +9,6 @@ import org.antlr.v4.runtime.CharStream; import net.sourceforge.pmd.lang.antlr.AntlrTokenManager; import net.sourceforge.pmd.lang.go.antlr4.GolangLexer; -/** - * The Go tokenizer. - */ public class GoTokenizer extends AntlrTokenizer { @Override diff --git a/pmd-xml/src/main/antlr4/net/sourceforge/pmd/lang/xml/antlr4/XMLLexer.g4 b/pmd-xml/src/main/antlr4/net/sourceforge/pmd/lang/xml/antlr4/XMLLexer.g4 index ff47cd0a2b..6d4cbe8116 100644 --- a/pmd-xml/src/main/antlr4/net/sourceforge/pmd/lang/xml/antlr4/XMLLexer.g4 +++ b/pmd-xml/src/main/antlr4/net/sourceforge/pmd/lang/xml/antlr4/XMLLexer.g4 @@ -70,7 +70,7 @@ DIGIT : [0-9] ; fragment NameChar : NameStartChar - | '-' | '_' | '.' | DIGIT + | '-' | '.' | DIGIT | '\u00B7' | '\u0300'..'\u036F' | '\u203F'..'\u2040' @@ -78,12 +78,22 @@ NameChar : NameStartChar fragment NameStartChar - : [:a-zA-Z] - | '\u2070'..'\u218F' - | '\u2C00'..'\u2FEF' - | '\u3001'..'\uD7FF' - | '\uF900'..'\uFDCF' - | '\uFDF0'..'\uFFFD' + : ':' + | [A-Z] + | '_' + | [a-z] + | [\u{C0}-\u{D6}] + | [\u{D8}-\u{F6}] + | [\u{F8}-\u{2FF}] + | [\u{370}-\u{37D}] + | [\u{37F}-\u{1FFF}] + | [\u{200C}-\u{200D}] + | [\u{2070}-\u{218F}] + | [\u{2C00}-\u{2FEF}] + | [\u{3001}-\u{D7FF}] + | [\u{F900}-\u{FDCF}] + | [\u{FDF0}-\u{FFFD}] + | [\u{10000}-\u{EFFFF}] ; // ----------------- Handle --------------------- diff --git a/pmd-xml/src/main/java/net/sourceforge/pmd/cpd/XmlLanguage.java b/pmd-xml/src/main/java/net/sourceforge/pmd/xml/cpd/XmlLanguage.java similarity index 62% rename from pmd-xml/src/main/java/net/sourceforge/pmd/cpd/XmlLanguage.java rename to pmd-xml/src/main/java/net/sourceforge/pmd/xml/cpd/XmlLanguage.java index 9914e8770a..38b38c8eb3 100644 --- a/pmd-xml/src/main/java/net/sourceforge/pmd/cpd/XmlLanguage.java +++ b/pmd-xml/src/main/java/net/sourceforge/pmd/xml/cpd/XmlLanguage.java @@ -2,17 +2,12 @@ * BSD-style license; for more info see http://pmd.sourceforge.net/license.html */ -package net.sourceforge.pmd.cpd; +package net.sourceforge.pmd.xml.cpd; + +import net.sourceforge.pmd.cpd.AbstractLanguage; -/** - * Implements the Xml Language - * - */ public class XmlLanguage extends AbstractLanguage { - /** - * Creates a new instance of {@link XmlLanguage} - */ public XmlLanguage() { super("Xml", "xml", new XmlTokenizer(), ".xml"); } diff --git a/pmd-xml/src/main/java/net/sourceforge/pmd/cpd/XmlTokenizer.java b/pmd-xml/src/main/java/net/sourceforge/pmd/xml/cpd/XmlTokenizer.java similarity index 81% rename from pmd-xml/src/main/java/net/sourceforge/pmd/cpd/XmlTokenizer.java rename to pmd-xml/src/main/java/net/sourceforge/pmd/xml/cpd/XmlTokenizer.java index 43e6572f74..0ec42a1a83 100644 --- a/pmd-xml/src/main/java/net/sourceforge/pmd/cpd/XmlTokenizer.java +++ b/pmd-xml/src/main/java/net/sourceforge/pmd/xml/cpd/XmlTokenizer.java @@ -2,16 +2,15 @@ * BSD-style license; for more info see http://pmd.sourceforge.net/license.html */ -package net.sourceforge.pmd.cpd; +package net.sourceforge.pmd.xml.cpd; import org.antlr.v4.runtime.CharStream; +import net.sourceforge.pmd.cpd.AntlrTokenizer; +import net.sourceforge.pmd.cpd.SourceCode; import net.sourceforge.pmd.lang.antlr.AntlrTokenManager; import net.sourceforge.pmd.lang.xml.antlr4.XMLLexer; -/** - * The Xml tokenizer. - */ public class XmlTokenizer extends AntlrTokenizer { @Override diff --git a/pmd-xml/src/main/resources/META-INF/services/net.sourceforge.pmd.cpd.Language b/pmd-xml/src/main/resources/META-INF/services/net.sourceforge.pmd.cpd.Language new file mode 100644 index 0000000000..fad9c021ea --- /dev/null +++ b/pmd-xml/src/main/resources/META-INF/services/net.sourceforge.pmd.cpd.Language @@ -0,0 +1 @@ +net.sourceforge.pmd.xml.cpd.XmlLanguage diff --git a/pmd-xml/src/test/java/net/sourceforge/pmd/cpd/XmlCPDTokenizerTest.java b/pmd-xml/src/test/java/net/sourceforge/pmd/xml/cpd/XmlCPDTokenizerTest.java similarity index 84% rename from pmd-xml/src/test/java/net/sourceforge/pmd/cpd/XmlCPDTokenizerTest.java rename to pmd-xml/src/test/java/net/sourceforge/pmd/xml/cpd/XmlCPDTokenizerTest.java index d152cf29b5..6c31bc1aac 100644 --- a/pmd-xml/src/test/java/net/sourceforge/pmd/cpd/XmlCPDTokenizerTest.java +++ b/pmd-xml/src/test/java/net/sourceforge/pmd/xml/cpd/XmlCPDTokenizerTest.java @@ -2,14 +2,16 @@ * BSD-style license; for more info see http://pmd.sourceforge.net/license.html */ -package net.sourceforge.pmd.cpd; +package net.sourceforge.pmd.xml.cpd; import java.io.IOException; +import java.nio.charset.StandardCharsets; import org.apache.commons.io.IOUtils; import org.junit.Before; import org.junit.Test; +import net.sourceforge.pmd.cpd.SourceCode; import net.sourceforge.pmd.testframework.AbstractTokenizerTest; public class XmlCPDTokenizerTest extends AbstractTokenizerTest { @@ -25,7 +27,7 @@ public class XmlCPDTokenizerTest extends AbstractTokenizerTest { @Override public String getSampleCode() throws IOException { - return IOUtils.toString(XmlTokenizer.class.getResourceAsStream(FILENAME)); + return IOUtils.toString(XmlTokenizer.class.getResourceAsStream(FILENAME), StandardCharsets.UTF_8); } @Test diff --git a/pmd-xml/src/test/resources/net/sourceforge/pmd/cpd/hello.xml b/pmd-xml/src/test/resources/net/sourceforge/pmd/xml/cpd/hello.xml similarity index 100% rename from pmd-xml/src/test/resources/net/sourceforge/pmd/cpd/hello.xml rename to pmd-xml/src/test/resources/net/sourceforge/pmd/xml/cpd/hello.xml