From ac2dcd8c4df5aae3ef2f3a93f7fd9d4fa5ebbc8a Mon Sep 17 00:00:00 2001 From: Anne Brouwers Date: Wed, 1 Jun 2022 15:11:08 +0200 Subject: [PATCH] Add Gherkin support to CPD --- pmd-dist/pom.xml | 5 + .../pmd/it/BinaryDistributionIT.java | 4 +- pmd-gherkin/pom.xml | 59 ++ .../pmd/lang/gherkin/antlr4/Gherkin.g4 | 118 ++++ .../sourceforge/pmd/cpd/GherkinLanguage.java | 18 + .../sourceforge/pmd/cpd/GherkinTokenizer.java | 28 + .../pmd/lang/gherkin/antlr4/package-info.java | 11 + .../services/net.sourceforge.pmd.cpd.Language | 1 + .../pmd/cpd/GherkinTokenizerTest.java | 43 ++ .../cpd/testdata/annotatedSource.feature | 73 ++ .../gherkin/cpd/testdata/annotatedSource.txt | 637 ++++++++++++++++++ .../gherkin/cpd/testdata/docstring.feature | 6 + .../lang/gherkin/cpd/testdata/docstring.txt | 15 + pom.xml | 1 + 14 files changed, 1017 insertions(+), 2 deletions(-) create mode 100644 pmd-gherkin/pom.xml create mode 100644 pmd-gherkin/src/main/antlr4/net/sourceforge/pmd/lang/gherkin/antlr4/Gherkin.g4 create mode 100644 pmd-gherkin/src/main/java/net/sourceforge/pmd/cpd/GherkinLanguage.java create mode 100644 pmd-gherkin/src/main/java/net/sourceforge/pmd/cpd/GherkinTokenizer.java create mode 100644 pmd-gherkin/src/main/java/net/sourceforge/pmd/lang/gherkin/antlr4/package-info.java create mode 100644 pmd-gherkin/src/main/resources/META-INF/services/net.sourceforge.pmd.cpd.Language create mode 100644 pmd-gherkin/src/test/java/net/sourceforge/pmd/cpd/GherkinTokenizerTest.java create mode 100644 pmd-gherkin/src/test/resources/net/sourceforge/pmd/lang/gherkin/cpd/testdata/annotatedSource.feature create mode 100644 pmd-gherkin/src/test/resources/net/sourceforge/pmd/lang/gherkin/cpd/testdata/annotatedSource.txt create mode 100644 pmd-gherkin/src/test/resources/net/sourceforge/pmd/lang/gherkin/cpd/testdata/docstring.feature create mode 100644 pmd-gherkin/src/test/resources/net/sourceforge/pmd/lang/gherkin/cpd/testdata/docstring.txt diff --git a/pmd-dist/pom.xml b/pmd-dist/pom.xml index dd53c4f671..2cdabb29b2 100644 --- a/pmd-dist/pom.xml +++ b/pmd-dist/pom.xml @@ -149,6 +149,11 @@ pmd-fortran ${project.version} + + net.sourceforge.pmd + pmd-gherkin + ${project.version} + net.sourceforge.pmd pmd-go diff --git a/pmd-dist/src/test/java/net/sourceforge/pmd/it/BinaryDistributionIT.java b/pmd-dist/src/test/java/net/sourceforge/pmd/it/BinaryDistributionIT.java index fdb2a97706..c39fea9b74 100644 --- a/pmd-dist/src/test/java/net/sourceforge/pmd/it/BinaryDistributionIT.java +++ b/pmd-dist/src/test/java/net/sourceforge/pmd/it/BinaryDistributionIT.java @@ -28,10 +28,10 @@ public class BinaryDistributionIT extends AbstractBinaryDistributionTest { static { // note: apex, javascript, visualforce, and scala require java8 if (PMDExecutor.isJava7Test()) { - SUPPORTED_LANGUAGES_CPD = "Supported languages: [cpp, cs, dart, fortran, go, groovy, java, jsp, kotlin, lua, matlab, modelica, objectivec, perl, php, plsql, python, ruby, swift, xml]"; + SUPPORTED_LANGUAGES_CPD = "Supported languages: [cpp, cs, dart, fortran, gherkin, go, groovy, java, jsp, kotlin, lua, matlab, modelica, objectivec, perl, php, plsql, python, ruby, swift, xml]"; SUPPORTED_LANGUAGES_PMD = "java, jsp, modelica, plsql, pom, vm, wsdl, xml, xsl"; } else { - SUPPORTED_LANGUAGES_CPD = "Supported languages: [apex, cpp, cs, dart, ecmascript, fortran, go, groovy, html, java, jsp, kotlin, lua, matlab, modelica, objectivec, perl, php, plsql, python, ruby, scala, swift, vf, xml]"; + SUPPORTED_LANGUAGES_CPD = "Supported languages: [apex, cpp, cs, dart, ecmascript, fortran, gherkin, go, groovy, html, java, jsp, kotlin, lua, matlab, modelica, objectivec, perl, php, plsql, python, ruby, scala, swift, vf, xml]"; SUPPORTED_LANGUAGES_PMD = "apex, ecmascript, html, java, jsp, modelica, plsql, pom, scala, vf, vm, wsdl, xml, xsl"; } } diff --git a/pmd-gherkin/pom.xml b/pmd-gherkin/pom.xml new file mode 100644 index 0000000000..a9563a1e9d --- /dev/null +++ b/pmd-gherkin/pom.xml @@ -0,0 +1,59 @@ + + + 4.0.0 + pmd-gherkin + PMD Gherkin + + + net.sourceforge.pmd + pmd + 6.46.0 + ../ + + + + + + org.antlr + antlr4-maven-plugin + + + + maven-resources-plugin + + false + + ${*} + + + + + + + + + net.sourceforge.pmd + pmd-core + + + org.antlr + antlr4-runtime + + + + junit + junit + test + + + net.sourceforge.pmd + pmd-test + test + + + net.sourceforge.pmd + pmd-lang-test + test + + + diff --git a/pmd-gherkin/src/main/antlr4/net/sourceforge/pmd/lang/gherkin/antlr4/Gherkin.g4 b/pmd-gherkin/src/main/antlr4/net/sourceforge/pmd/lang/gherkin/antlr4/Gherkin.g4 new file mode 100644 index 0000000000..c5e1db52c0 --- /dev/null +++ b/pmd-gherkin/src/main/antlr4/net/sourceforge/pmd/lang/gherkin/antlr4/Gherkin.g4 @@ -0,0 +1,118 @@ +grammar Gherkin; + +// PARSER + +main + // start comment needed because each comment should start on a new line except for the start comment + : STARTCOMMENT? feature description* instructionLine* NL* EOF + ; + +feature + : (NL* tagline)* NL* FEATURE? + ; + +instructionLine + : NL+ (instruction | datatable) + ; + +instruction + : rulex description* // the name "rule" is not allowed by ANTLR (used for internal usage), so calling it rulex + | stepInstruction description* (NL+ stepDescription description*)* (NL+ step)* + | tagline + | instructionDescription description* + ; + +stepInstruction + : background + | scenario + | scenarioOutline + ; + +background: BACKGROUND ; +rulex: RULEX ; +scenario: SCENARIO ; +scenarioOutline : SCENARIOOUTLINE ; + +step : stepItem description*; + +stepItem + : and + | anystep + | but + | datatable + | given + | then + | when + | (NL* tagline )* NL* examples + ; + +tagline + : TAG+ + ; + +and: AND ; +anystep: ANYSTEP ; +but: BUT ; +datatable: DATATABLE+ ; +given: GIVEN ; +then: THEN ; +when: WHEN ; +examples: EXAMPLES ; + +// Descriptions +instructionDescription: text | PARAMETER | AND | ANYSTEP | BUT | GIVEN | THEN | WHEN | SCENARIO ; // We have to deal with overlaps with keywords +stepDescription: text | PARAMETER ; // We have to deal with overlaps with keywords +description: text | PARAMETER | TAG | AND | ANYSTEP | BUT | DATATABLE | GIVEN | THEN | WHEN | SCENARIO | SCENARIOOUTLINE | STARTCOMMENT ; // We have to deal with overlaps with keywords + +text: TOKEN+ ; + +// LEXER + +// skipped + +BOMUTF8 : '\u00EF\u00BB\u00BF' -> skip ; + +BOMUTF16 : '\uFEFF' -> skip ; + +WHITESPACE: [ \t]+ -> channel(1) ; + +COMMENT: '\r'?'\n' [ \t]* '#' ~[\r\n]* -> channel(2) ; + +STARTCOMMENT: '#' ~[\r\n]* ; + +DOCSTRING1 + : '"""' .*? '"""' ; + +DOCSTRING2 + : '```' .*? '```' ; + +// Instructions +BACKGROUND: 'Background:' ; +EXAMPLES: ('Examples:' | 'Scenarios:') ; +FEATURE: 'Feature:'; +RULEX: 'Rule:' ; +SCENARIO: ('Example:' | 'Scenario:') ; +SCENARIOOUTLINE : 'Scenario ' ('Outline:' | 'Template:') ; + +// Steps +AND: 'And' ; +ANYSTEP: '*' ; +BUT: 'But' ; +DATATABLE: '|' DATATABLEID? ; // must be an ID because it can contain a space +GIVEN: 'Given' ; +THEN: 'Then' ; +WHEN: 'When' ; + +TAG: '@' ELEMENT+ ; +PARAMETER: '<' PARID '>' | '"' '<' PARID '>' '"' | '\'' '<' PARID '>' '\''; +fragment PARID: [A-Za-z0-9] ([!-=?-~ ]* [!-=?-~])?; // start with an alpha numerical and then all printable characters and end with a non-space +fragment ID: (IDELEMENT | ' ')* IDELEMENT (IDELEMENT | ' ')*; // ID should contain at least one non-whitespace character otherwise the trailing | with a trailing space will match +fragment DATATABLEID: (DATATABLEELEMENT | ' ')* DATATABLEELEMENT (DATATABLEELEMENT | ' ')*; // ID should contain at least one non-whitespace character otherwise the trailing | with a trailing space will match +fragment DATATABLEELEMENT: ELEMENT | '<' | '>' | '"' | '\'' | '\\|' ; +fragment IDELEMENT: ELEMENT | '|' ; +fragment ELEMENT: [!-&(-;=?-{}~\u00A0-\uFFFF] ; + +NL: '\r'? '\n' ; +TOKEN: [!-{}-~\u00A0-\uFFFF]+ ; // match everything that isn't matched yet + + diff --git a/pmd-gherkin/src/main/java/net/sourceforge/pmd/cpd/GherkinLanguage.java b/pmd-gherkin/src/main/java/net/sourceforge/pmd/cpd/GherkinLanguage.java new file mode 100644 index 0000000000..85f50da4cc --- /dev/null +++ b/pmd-gherkin/src/main/java/net/sourceforge/pmd/cpd/GherkinLanguage.java @@ -0,0 +1,18 @@ +/** + * BSD-style license; for more info see http://pmd.sourceforge.net/license.html + */ + +package net.sourceforge.pmd.cpd; + +/** + * Language implementation for Gherkin + */ +public class GherkinLanguage extends AbstractLanguage { + + /** + * Creates a new Gherkin Language instance. + */ + public GherkinLanguage() { + super("Gherkin", "gherkin", new GherkinTokenizer(), ".feature"); + } +} diff --git a/pmd-gherkin/src/main/java/net/sourceforge/pmd/cpd/GherkinTokenizer.java b/pmd-gherkin/src/main/java/net/sourceforge/pmd/cpd/GherkinTokenizer.java new file mode 100644 index 0000000000..20b7da848c --- /dev/null +++ b/pmd-gherkin/src/main/java/net/sourceforge/pmd/cpd/GherkinTokenizer.java @@ -0,0 +1,28 @@ +/** + * BSD-style license; for more info see http://pmd.sourceforge.net/license.html + */ + +package net.sourceforge.pmd.cpd; + +import org.antlr.v4.runtime.CharStream; + +import net.sourceforge.pmd.cpd.token.AntlrTokenFilter; +import net.sourceforge.pmd.lang.antlr.AntlrTokenManager; +import net.sourceforge.pmd.lang.gherkin.antlr4.GherkinLexer; + +/** + * The Gherkin Tokenizer + */ +public class GherkinTokenizer extends AntlrTokenizer { + + @Override + protected AntlrTokenManager getLexerForSource(SourceCode sourceCode) { + CharStream charStream = AntlrTokenizer.getCharStreamFromSourceCode(sourceCode); + return new AntlrTokenManager(new GherkinLexer(charStream), sourceCode.getFileName()); + } + + @Override + protected AntlrTokenFilter getTokenFilter(final AntlrTokenManager tokenManager) { + return new AntlrTokenFilter(tokenManager); + } +} diff --git a/pmd-gherkin/src/main/java/net/sourceforge/pmd/lang/gherkin/antlr4/package-info.java b/pmd-gherkin/src/main/java/net/sourceforge/pmd/lang/gherkin/antlr4/package-info.java new file mode 100644 index 0000000000..3f06008e46 --- /dev/null +++ b/pmd-gherkin/src/main/java/net/sourceforge/pmd/lang/gherkin/antlr4/package-info.java @@ -0,0 +1,11 @@ +/* + * BSD-style license; for more info see http://pmd.sourceforge.net/license.html + */ + +/** + * The class {@link net.sourceforge.pmd.lang.gherkin.antlr4.GherkinLexer} will be moved to + * package {@code net.sourceforge.pmd.lang.gherkin.ast} with PMD 7. + * + *

All other classes will be removed. + */ +package net.sourceforge.pmd.lang.gherkin.antlr4; diff --git a/pmd-gherkin/src/main/resources/META-INF/services/net.sourceforge.pmd.cpd.Language b/pmd-gherkin/src/main/resources/META-INF/services/net.sourceforge.pmd.cpd.Language new file mode 100644 index 0000000000..313dd4ddac --- /dev/null +++ b/pmd-gherkin/src/main/resources/META-INF/services/net.sourceforge.pmd.cpd.Language @@ -0,0 +1 @@ +net.sourceforge.pmd.cpd.GherkinLanguage diff --git a/pmd-gherkin/src/test/java/net/sourceforge/pmd/cpd/GherkinTokenizerTest.java b/pmd-gherkin/src/test/java/net/sourceforge/pmd/cpd/GherkinTokenizerTest.java new file mode 100644 index 0000000000..8dbc3bac22 --- /dev/null +++ b/pmd-gherkin/src/test/java/net/sourceforge/pmd/cpd/GherkinTokenizerTest.java @@ -0,0 +1,43 @@ +/* + * BSD-style license; for more info see http://pmd.sourceforge.net/license.html + */ + +package net.sourceforge.pmd.cpd; + +import java.util.Properties; + +import org.junit.Test; +import org.junit.rules.ExpectedException; + +import net.sourceforge.pmd.cpd.test.CpdTextComparisonTest; + +public class GherkinTokenizerTest extends CpdTextComparisonTest { + + @org.junit.Rule + public ExpectedException ex = ExpectedException.none(); + + public GherkinTokenizerTest() { + super(".feature"); + } + + @Override + protected String getResourcePrefix() { + return "../lang/gherkin/cpd/testdata"; + } + + @Override + public Tokenizer newTokenizer(Properties properties) { + GherkinTokenizer tok = new GherkinTokenizer(); + return tok; + } + + @Test + public void testAnnotatedSource() { + doTest("annotatedSource"); + } + + @Test + public void testDocstring() { + doTest("docstring"); + } +} diff --git a/pmd-gherkin/src/test/resources/net/sourceforge/pmd/lang/gherkin/cpd/testdata/annotatedSource.feature b/pmd-gherkin/src/test/resources/net/sourceforge/pmd/lang/gherkin/cpd/testdata/annotatedSource.feature new file mode 100644 index 0000000000..45008f6c71 --- /dev/null +++ b/pmd-gherkin/src/test/resources/net/sourceforge/pmd/lang/gherkin/cpd/testdata/annotatedSource.feature @@ -0,0 +1,73 @@ +@AnnotatedSource +Feature: Annotated Source + The annotated source displays violations in the source file. It opens in a new window. + + Rules: + 1 Annotation metrics can be selected with a dropdown menu. Only enabled metrics must be shown. + 2 When the annotated source is opened while a specific metric is selected, only that metric should be selected. + 3 A user can scroll through the violations using two buttons (illustrated by up and down arrows). The arrows wrap around the document. + 4 The table collumn "Type" is only shown when multiple metrics are selected + + Scenario: Select a metric type + Given the Annotated Source for file "HIE://11261-37/main/monop/execute.c" + When a user opens the dropdown menu containing "Metric:" + And the user clicks on the dropdown option "Violations/Coding Standard Violations" + Then the selected annotation in the source code should be on line 38 + And the selected annotation in the table should be on line 38 + + Scenario: The user can use the arrows, or "a" and "z" keys, to scroll through the annotations + Given the Annotated Source for file "HIE://11261-37/main/monop/execute.c" + And metric "Coding Standard Violation Annotations" is selected + When the user clicks on the down arrow + And the user presses the "Z" key + And the user clicks on the up arrow + And the user clicks on the up arrow + And the user presses the "A" key + Then the selected annotation in the source code should be on line 254 + + @Rule2 + Scenario Outline: If the user opens the annotated source from e.g. the dashboard for a metric, + only the related annotations should be shown. + Given the Dashboard + And filtering by Project "17607" + And grouping by "File" + And metric "" is selected + When a user opens the file "clalgorithm_settings.c" using the metric table + Then a new browser window with an "Annotated Source" should be opened + Then only the annotations "" should be selected + + Examples: + | metric | annotations | + | TQI Coding Standards | Coding Standard Violation Annotations | + | Coding Standard Violations | Coding Standard Violation Annotations | + | TQI Compiler Warnings | Compiler Warning Annotations | + | Fan Out (%) | Fan Out Annotations | + | TQI Dead Code | Dead Code Annotations | + | TQI Code Duplication | Code Duplication Annotations | + + Scenario: The user should be able to filter Coding Standard Violations by Level + Given the Annotated Source for file "HIE://11514/trunk/components/java/BuildUtil/src/com/tiobe/util/BuildProperties.java" + And the metric "Coding Standard Violations" is selected + When the user opens the dropdown menu "Level" + And the user clicks on the dropdown option "5" + Then there should be 1 violation + And the selected annotation in the source code should be on line 57 + And the annotation should be of level 5 + + @PR27030 + Scenario Outline: The user should be able to filter Coding Standard Violations by Level, Category, Rule, etc + Given the Annotated Source for file "HIE://12939/main/Implementatie/DRGL/src/DirectDoorvoerenAdmin.cpp" + And metric "" is selected + When the user opens the dropdown menu containing "" inside the filter bar + And the user clicks on the dropdown option "