Implementing changes from the discussion

This commit is contained in:
Fernando Cosso
2020-05-07 11:49:52 -04:00
parent a2cd3fd8e5
commit 4f96286902
11 changed files with 40 additions and 33 deletions

View File

@ -50,12 +50,18 @@ All you need to do is follow this few steps:
**You are almost there!**
4. Please don't forget to add some test, you can again.. look at Go implementation ;)
4. Update the list of supported languages
- Write the fully-qualified name of your Language class to the file `src/main/resources/META-INF/services/net.sourceforge.pmd.cpd.Language`
- Update the test that asserts the list of supported languages by updating the `SUPPORTED_LANGUAGES` constant in [BinaryDistributionIT](https://github.com/pmd/pmd/blob/master/pmd-dist/src/test/java/net/sourceforge/pmd/it/BinaryDistributionIT.java)
5. Please don't forget to add some test, you can again.. look at Go implementation ;)
If you read this far, I'm keen to think you would also love to support some extra CPD configuration (ignore imports or crazy things like that)
If that's your case , you came to the right place!
5. You can add your custom properties using a Token filter
6. You can add your custom properties using a Token filter
- For Antlr grammars all you need to do is implement your own [AntlrTokenFilter](https://github.com/pmd/pmd/blob/master/pmd-core/src/main/java/net/sourceforge/pmd/cpd/token/AntlrTokenFilter.java)

View File

@ -53,7 +53,7 @@ public abstract class AntlrTokenizer implements Tokenizer {
return new AntlrTokenFilter(tokenManager);
}
/* default */ static CharStream getCharStreamFromSourceCode(final SourceCode sourceCode) {
public /* default */ static CharStream getCharStreamFromSourceCode(final SourceCode sourceCode) {
StringBuilder buffer = sourceCode.getCodeBuffer();
return CharStreams.fromString(buffer.toString());
}

View File

@ -21,6 +21,8 @@ import net.sourceforge.pmd.PMDVersion;
public class BinaryDistributionIT extends AbstractBinaryDistributionTest {
private static final String SUPPORTED_LANGUAGES = "Supported languages: [apex, cpp, cs, dart, ecmascript, fortran, go, groovy, java, jsp, kotlin, lua, matlab, modelica, objectivec, perl, php, plsql, python, ruby, scala, swift, vf, xml]";
@Test
public void testFileExistence() {
assertTrue(getBinaryDistribution().exists());
@ -81,7 +83,7 @@ public class BinaryDistributionIT extends AbstractBinaryDistributionTest {
ExecutionResult result;
result = CpdExecutor.runCpd(tempDir, "-h");
result.assertExecutionResult(0, "Supported languages: [apex, cpp, cs, dart, ecmascript, fortran, go, groovy, java, jsp, kotlin, lua, matlab, modelica, objectivec, perl, php, plsql, python, ruby, scala, swift, vf]");
result.assertExecutionResult(0, SUPPORTED_LANGUAGES);
result = CpdExecutor.runCpd(tempDir, "--minimum-tokens", "10", "--format", "text", "--files", srcDir);
result.assertExecutionResult(4, "Found a 10 line (55 tokens) duplication in the following files:");

View File

@ -5,15 +5,10 @@
package net.sourceforge.pmd.cpd;
/**
* Implements the Go Language
*
* @author oinume@gmail.com
*/
public class GoLanguage extends AbstractLanguage {
/**
* Creates a new instance of {@link GoLanguage}
*/
public GoLanguage() {
super("Go", "go", new GoTokenizer(), ".go");
}

View File

@ -9,9 +9,6 @@ import org.antlr.v4.runtime.CharStream;
import net.sourceforge.pmd.lang.antlr.AntlrTokenManager;
import net.sourceforge.pmd.lang.go.antlr4.GolangLexer;
/**
* The Go tokenizer.
*/
public class GoTokenizer extends AntlrTokenizer {
@Override

View File

@ -70,7 +70,7 @@ DIGIT : [0-9] ;
fragment
NameChar : NameStartChar
| '-' | '_' | '.' | DIGIT
| '-' | '.' | DIGIT
| '\u00B7'
| '\u0300'..'\u036F'
| '\u203F'..'\u2040'
@ -78,12 +78,22 @@ NameChar : NameStartChar
fragment
NameStartChar
: [:a-zA-Z]
| '\u2070'..'\u218F'
| '\u2C00'..'\u2FEF'
| '\u3001'..'\uD7FF'
| '\uF900'..'\uFDCF'
| '\uFDF0'..'\uFFFD'
: ':'
| [A-Z]
| '_'
| [a-z]
| [\u{C0}-\u{D6}]
| [\u{D8}-\u{F6}]
| [\u{F8}-\u{2FF}]
| [\u{370}-\u{37D}]
| [\u{37F}-\u{1FFF}]
| [\u{200C}-\u{200D}]
| [\u{2070}-\u{218F}]
| [\u{2C00}-\u{2FEF}]
| [\u{3001}-\u{D7FF}]
| [\u{F900}-\u{FDCF}]
| [\u{FDF0}-\u{FFFD}]
| [\u{10000}-\u{EFFFF}]
;
// ----------------- Handle <? ... ?> ---------------------

View File

@ -2,17 +2,12 @@
* BSD-style license; for more info see http://pmd.sourceforge.net/license.html
*/
package net.sourceforge.pmd.cpd;
package net.sourceforge.pmd.xml.cpd;
import net.sourceforge.pmd.cpd.AbstractLanguage;
/**
* Implements the Xml Language
*
*/
public class XmlLanguage extends AbstractLanguage {
/**
* Creates a new instance of {@link XmlLanguage}
*/
public XmlLanguage() {
super("Xml", "xml", new XmlTokenizer(), ".xml");
}

View File

@ -2,16 +2,15 @@
* BSD-style license; for more info see http://pmd.sourceforge.net/license.html
*/
package net.sourceforge.pmd.cpd;
package net.sourceforge.pmd.xml.cpd;
import org.antlr.v4.runtime.CharStream;
import net.sourceforge.pmd.cpd.AntlrTokenizer;
import net.sourceforge.pmd.cpd.SourceCode;
import net.sourceforge.pmd.lang.antlr.AntlrTokenManager;
import net.sourceforge.pmd.lang.xml.antlr4.XMLLexer;
/**
* The Xml tokenizer.
*/
public class XmlTokenizer extends AntlrTokenizer {
@Override

View File

@ -0,0 +1 @@
net.sourceforge.pmd.xml.cpd.XmlLanguage

View File

@ -2,14 +2,16 @@
* BSD-style license; for more info see http://pmd.sourceforge.net/license.html
*/
package net.sourceforge.pmd.cpd;
package net.sourceforge.pmd.xml.cpd;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import org.apache.commons.io.IOUtils;
import org.junit.Before;
import org.junit.Test;
import net.sourceforge.pmd.cpd.SourceCode;
import net.sourceforge.pmd.testframework.AbstractTokenizerTest;
public class XmlCPDTokenizerTest extends AbstractTokenizerTest {
@ -25,7 +27,7 @@ public class XmlCPDTokenizerTest extends AbstractTokenizerTest {
@Override
public String getSampleCode() throws IOException {
return IOUtils.toString(XmlTokenizer.class.getResourceAsStream(FILENAME));
return IOUtils.toString(XmlTokenizer.class.getResourceAsStream(FILENAME), StandardCharsets.UTF_8);
}
@Test