diff --git a/docs/pages/release_notes.md b/docs/pages/release_notes.md
index 8abb75f161..80884b18b6 100644
--- a/docs/pages/release_notes.md
+++ b/docs/pages/release_notes.md
@@ -19,11 +19,33 @@ This is a {{ site.pmd.release_type }} release.
### New and noteworthy
+#### Gherkin support
+Thanks to the contribution from [Anne Brouwers](https://github.com/ASBrouwers) PMD now has CPD support
+for the [Gherkin](https://cucumber.io/docs/gherkin/) language. It is used to defined test cases for the
+[Cucumber](https://cucumber.io/) testing tool for behavior-driven development.
+
+Being based on a proper Antlr grammar, CPD can:
+
+* ignore comments
+* honor [comment-based suppressions](pmd_userdocs_cpd.html#suppression)
+
### Fixed Issues
+* java-bestpractices
+ * [#3455](https://github.com/pmd/pmd/issues/3455): \[java] WhileLoopWithLiteralBoolean - false negative with complex expressions
+* java-design
+ * [#3729](https://github.com/pmd/pmd/issues/3729): \[java] TooManyMethods ignores "real" methods which are named like getters or setters
+ * [#3949](https://github.com/pmd/pmd/issues/3949): \[java] FinalFieldCouldBeStatic - false negative with unnecessary parenthesis
+* java-performance
+ * [#3625](https://github.com/pmd/pmd/issues/3625): \[java] AddEmptyString - false negative with empty var
### API Changes
### External Contributions
+* [#3984](https://github.com/pmd/pmd/pull/3984): \[java] Fix AddEmptyString false-negative issue - [@LiGaOg](https://github.com/LiGaOg)
+* [#3988](https://github.com/pmd/pmd/pull/3988): \[java] Modify WhileLoopWithLiteralBoolean to meet the missing case #3455 - [@VoidxHoshi](https://github.com/VoidxHoshi)
+* [#3992](https://github.com/pmd/pmd/pull/3992): \[java] FinalFieldCouldBeStatic - fix false negative with unnecessary parenthesis - [@dalizi007](https://github.com/dalizi007)
+* [#3994](https://github.com/pmd/pmd/pull/3994): \[java] TooManyMethods - improve getter/setter detection (#3729) - [@341816041](https://github.com/341816041)
+* [#4017](https://github.com/pmd/pmd/pull/4017): Add Gherkin support to CPD - [@ASBrouwers](https://github.com/ASBrouwers)
{% endtocmaker %}
diff --git a/pmd-core/src/main/java/net/sourceforge/pmd/lang/PlainTextLanguage.java b/pmd-core/src/main/java/net/sourceforge/pmd/lang/PlainTextLanguage.java
new file mode 100644
index 0000000000..6a2cc25a05
--- /dev/null
+++ b/pmd-core/src/main/java/net/sourceforge/pmd/lang/PlainTextLanguage.java
@@ -0,0 +1,121 @@
+/*
+ * BSD-style license; for more info see http://pmd.sourceforge.net/license.html
+ */
+
+package net.sourceforge.pmd.lang;
+
+import net.sourceforge.pmd.annotation.Experimental;
+import net.sourceforge.pmd.lang.ast.AstInfo;
+import net.sourceforge.pmd.lang.ast.Parser;
+import net.sourceforge.pmd.lang.ast.RootNode;
+import net.sourceforge.pmd.lang.ast.SourceCodePositioner;
+import net.sourceforge.pmd.lang.ast.impl.AbstractNode;
+
+/**
+ * A dummy language implementation whose parser produces a single node.
+ * This is provided for cases where a non-null language is required, but
+ * the parser is not useful. This is useful eg to mock rules when no other
+ * language is on the classpath. This language is not exposed by {@link LanguageRegistry}
+ * and can only be used explicitly with {@link #getInstance()}.
+ *
+ * @author Clรฉment Fournier
+ * @since 6.48.0
+ */
+@Experimental
+public final class PlainTextLanguage extends BaseLanguageModule {
+
+ private static final Language INSTANCE = new PlainTextLanguage();
+
+ static final String TERSE_NAME = "text";
+
+ private PlainTextLanguage() {
+ super("Plain text", "Plain text", TERSE_NAME, "plain-text-file-goo-extension");
+ addVersion("default", new TextLvh(), true);
+ }
+
+ /**
+ * Returns the singleton instance of this language.
+ */
+ public static Language getInstance() {
+ return INSTANCE;
+ }
+
+ private static final class TextLvh implements LanguageVersionHandler {
+ @Override
+ public Parser getParser() {
+ return parserTask -> new PlainTextFile(parserTask);
+ }
+ }
+
+ /**
+ * The only node produced by the parser of {@link PlainTextLanguage}.
+ */
+ public static final class PlainTextFile extends AbstractNode implements RootNode {
+ private final int beginLine;
+ private final int beginColumn;
+ private final int endLine;
+ private final int endColumn;
+
+ private final AstInfo astInfo;
+
+ PlainTextFile(Parser.ParserTask parserTask) {
+ SourceCodePositioner positioner = new SourceCodePositioner(parserTask.getSourceText());
+ this.beginLine = 1;
+ this.beginColumn = 1;
+ this.endLine = positioner.getLastLine();
+ this.endColumn = positioner.getLastLineColumn();
+ this.astInfo = new AstInfo<>(parserTask, this);
+ }
+
+ @Override
+ public String getXPathNodeName() {
+ return "TextFile";
+ }
+
+ @Override
+ public String getImage() {
+ return null;
+ }
+
+ @Override
+ public int getBeginLine() {
+ return beginLine;
+ }
+
+ @Override
+ public int getBeginColumn() {
+ return beginColumn;
+ }
+
+ @Override
+ public int getEndLine() {
+ return endLine;
+ }
+
+ @Override
+ public int getEndColumn() {
+ return endColumn;
+ }
+
+ @Override
+ public void remove() {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public void removeChildAtIndex(int childIndex) {
+ throw new IndexOutOfBoundsException();
+ }
+
+ @Override
+ public String toString() {
+ return "Plain text file (" + endLine + "lines)";
+ }
+
+ @Override
+ public AstInfo extends RootNode> getAstInfo() {
+ return astInfo;
+ }
+ }
+
+}
diff --git a/pmd-dist/pom.xml b/pmd-dist/pom.xml
index 39efc28454..2f2062dd91 100644
--- a/pmd-dist/pom.xml
+++ b/pmd-dist/pom.xml
@@ -154,6 +154,11 @@
pmd-fortran${project.version}
+
+ net.sourceforge.pmd
+ pmd-gherkin
+ ${project.version}
+ net.sourceforge.pmdpmd-go
diff --git a/pmd-dist/src/test/java/net/sourceforge/pmd/it/BinaryDistributionIT.java b/pmd-dist/src/test/java/net/sourceforge/pmd/it/BinaryDistributionIT.java
index 02650b99c7..a011878546 100644
--- a/pmd-dist/src/test/java/net/sourceforge/pmd/it/BinaryDistributionIT.java
+++ b/pmd-dist/src/test/java/net/sourceforge/pmd/it/BinaryDistributionIT.java
@@ -26,7 +26,7 @@ public class BinaryDistributionIT extends AbstractBinaryDistributionTest {
private static final String SUPPORTED_LANGUAGES_PMD;
static {
- SUPPORTED_LANGUAGES_CPD = "Supported languages: [apex, cpp, cs, dart, ecmascript, fortran, go, groovy, html, java, jsp, kotlin, lua, matlab, modelica, objectivec, perl, php, plsql, python, ruby, scala, swift, vf, xml]";
+ SUPPORTED_LANGUAGES_CPD = "Supported languages: [apex, cpp, cs, dart, ecmascript, fortran, gherkin, go, groovy, html, java, jsp, kotlin, lua, matlab, modelica, objectivec, perl, php, plsql, python, ruby, scala, swift, vf, xml]";
SUPPORTED_LANGUAGES_PMD = "apex, ecmascript, html, java, jsp, kotlin, modelica, plsql, pom, scala, swift, vf, vm, wsdl, xml, xsl";
}
diff --git a/pmd-gherkin/pom.xml b/pmd-gherkin/pom.xml
new file mode 100644
index 0000000000..0d9cb40112
--- /dev/null
+++ b/pmd-gherkin/pom.xml
@@ -0,0 +1,59 @@
+
+
+ 4.0.0
+ pmd-gherkin
+ PMD Gherkin
+
+
+ net.sourceforge.pmd
+ pmd
+ 7.0.0-SNAPSHOT
+ ../pom.xml
+
+
+
+
+
+ org.antlr
+ antlr4-maven-plugin
+
+
+
+ maven-resources-plugin
+
+ false
+
+ ${*}
+
+
+
+
+
+
+
+
+ net.sourceforge.pmd
+ pmd-core
+
+
+ org.antlr
+ antlr4-runtime
+
+
+
+ junit
+ junit
+ test
+
+
+ net.sourceforge.pmd
+ pmd-test
+ test
+
+
+ net.sourceforge.pmd
+ pmd-lang-test
+ test
+
+
+
diff --git a/pmd-gherkin/src/main/antlr4/net/sourceforge/pmd/lang/gherkin/ast/Gherkin.g4 b/pmd-gherkin/src/main/antlr4/net/sourceforge/pmd/lang/gherkin/ast/Gherkin.g4
new file mode 100644
index 0000000000..c5e1db52c0
--- /dev/null
+++ b/pmd-gherkin/src/main/antlr4/net/sourceforge/pmd/lang/gherkin/ast/Gherkin.g4
@@ -0,0 +1,118 @@
+grammar Gherkin;
+
+// PARSER
+
+main
+ // start comment needed because each comment should start on a new line except for the start comment
+ : STARTCOMMENT? feature description* instructionLine* NL* EOF
+ ;
+
+feature
+ : (NL* tagline)* NL* FEATURE?
+ ;
+
+instructionLine
+ : NL+ (instruction | datatable)
+ ;
+
+instruction
+ : rulex description* // the name "rule" is not allowed by ANTLR (used for internal usage), so calling it rulex
+ | stepInstruction description* (NL+ stepDescription description*)* (NL+ step)*
+ | tagline
+ | instructionDescription description*
+ ;
+
+stepInstruction
+ : background
+ | scenario
+ | scenarioOutline
+ ;
+
+background: BACKGROUND ;
+rulex: RULEX ;
+scenario: SCENARIO ;
+scenarioOutline : SCENARIOOUTLINE ;
+
+step : stepItem description*;
+
+stepItem
+ : and
+ | anystep
+ | but
+ | datatable
+ | given
+ | then
+ | when
+ | (NL* tagline )* NL* examples
+ ;
+
+tagline
+ : TAG+
+ ;
+
+and: AND ;
+anystep: ANYSTEP ;
+but: BUT ;
+datatable: DATATABLE+ ;
+given: GIVEN ;
+then: THEN ;
+when: WHEN ;
+examples: EXAMPLES ;
+
+// Descriptions
+instructionDescription: text | PARAMETER | AND | ANYSTEP | BUT | GIVEN | THEN | WHEN | SCENARIO ; // We have to deal with overlaps with keywords
+stepDescription: text | PARAMETER ; // We have to deal with overlaps with keywords
+description: text | PARAMETER | TAG | AND | ANYSTEP | BUT | DATATABLE | GIVEN | THEN | WHEN | SCENARIO | SCENARIOOUTLINE | STARTCOMMENT ; // We have to deal with overlaps with keywords
+
+text: TOKEN+ ;
+
+// LEXER
+
+// skipped
+
+BOMUTF8 : '\u00EF\u00BB\u00BF' -> skip ;
+
+BOMUTF16 : '\uFEFF' -> skip ;
+
+WHITESPACE: [ \t]+ -> channel(1) ;
+
+COMMENT: '\r'?'\n' [ \t]* '#' ~[\r\n]* -> channel(2) ;
+
+STARTCOMMENT: '#' ~[\r\n]* ;
+
+DOCSTRING1
+ : '"""' .*? '"""' ;
+
+DOCSTRING2
+ : '```' .*? '```' ;
+
+// Instructions
+BACKGROUND: 'Background:' ;
+EXAMPLES: ('Examples:' | 'Scenarios:') ;
+FEATURE: 'Feature:';
+RULEX: 'Rule:' ;
+SCENARIO: ('Example:' | 'Scenario:') ;
+SCENARIOOUTLINE : 'Scenario ' ('Outline:' | 'Template:') ;
+
+// Steps
+AND: 'And' ;
+ANYSTEP: '*' ;
+BUT: 'But' ;
+DATATABLE: '|' DATATABLEID? ; // must be an ID because it can contain a space
+GIVEN: 'Given' ;
+THEN: 'Then' ;
+WHEN: 'When' ;
+
+TAG: '@' ELEMENT+ ;
+PARAMETER: '<' PARID '>' | '"' '<' PARID '>' '"' | '\'' '<' PARID '>' '\'';
+fragment PARID: [A-Za-z0-9] ([!-=?-~ ]* [!-=?-~])?; // start with an alpha numerical and then all printable characters and end with a non-space
+fragment ID: (IDELEMENT | ' ')* IDELEMENT (IDELEMENT | ' ')*; // ID should contain at least one non-whitespace character otherwise the trailing | with a trailing space will match
+fragment DATATABLEID: (DATATABLEELEMENT | ' ')* DATATABLEELEMENT (DATATABLEELEMENT | ' ')*; // ID should contain at least one non-whitespace character otherwise the trailing | with a trailing space will match
+fragment DATATABLEELEMENT: ELEMENT | '<' | '>' | '"' | '\'' | '\\|' ;
+fragment IDELEMENT: ELEMENT | '|' ;
+fragment ELEMENT: [!-&(-;=?-{}~\u00A0-\uFFFF] ;
+
+NL: '\r'? '\n' ;
+TOKEN: [!-{}-~\u00A0-\uFFFF]+ ; // match everything that isn't matched yet
+
+
diff --git a/pmd-gherkin/src/main/java/net/sourceforge/pmd/lang/gherkin/ast/package-info.java b/pmd-gherkin/src/main/java/net/sourceforge/pmd/lang/gherkin/ast/package-info.java
new file mode 100644
index 0000000000..109abde01a
--- /dev/null
+++ b/pmd-gherkin/src/main/java/net/sourceforge/pmd/lang/gherkin/ast/package-info.java
@@ -0,0 +1,8 @@
+/*
+ * BSD-style license; for more info see http://pmd.sourceforge.net/license.html
+ */
+
+/**
+ * Contains the Antlr grammar for Gherkin.
+ */
+package net.sourceforge.pmd.lang.gherkin.ast;
diff --git a/pmd-gherkin/src/main/java/net/sourceforge/pmd/lang/gherkin/cpd/GherkinLanguage.java b/pmd-gherkin/src/main/java/net/sourceforge/pmd/lang/gherkin/cpd/GherkinLanguage.java
new file mode 100644
index 0000000000..d95282944b
--- /dev/null
+++ b/pmd-gherkin/src/main/java/net/sourceforge/pmd/lang/gherkin/cpd/GherkinLanguage.java
@@ -0,0 +1,20 @@
+/**
+ * BSD-style license; for more info see http://pmd.sourceforge.net/license.html
+ */
+
+package net.sourceforge.pmd.lang.gherkin.cpd;
+
+import net.sourceforge.pmd.cpd.AbstractLanguage;
+
+/**
+ * Language implementation for Gherkin.
+ */
+public class GherkinLanguage extends AbstractLanguage {
+
+ /**
+ * Creates a new Gherkin Language instance.
+ */
+ public GherkinLanguage() {
+ super("Gherkin", "gherkin", new GherkinTokenizer(), ".feature");
+ }
+}
diff --git a/pmd-gherkin/src/main/java/net/sourceforge/pmd/lang/gherkin/cpd/GherkinTokenizer.java b/pmd-gherkin/src/main/java/net/sourceforge/pmd/lang/gherkin/cpd/GherkinTokenizer.java
new file mode 100644
index 0000000000..485be4bf2d
--- /dev/null
+++ b/pmd-gherkin/src/main/java/net/sourceforge/pmd/lang/gherkin/cpd/GherkinTokenizer.java
@@ -0,0 +1,24 @@
+/**
+ * BSD-style license; for more info see http://pmd.sourceforge.net/license.html
+ */
+
+package net.sourceforge.pmd.lang.gherkin.cpd;
+
+import org.antlr.v4.runtime.CharStream;
+
+import net.sourceforge.pmd.cpd.SourceCode;
+import net.sourceforge.pmd.cpd.internal.AntlrTokenizer;
+import net.sourceforge.pmd.lang.ast.impl.antlr4.AntlrTokenManager;
+import net.sourceforge.pmd.lang.gherkin.ast.GherkinLexer;
+
+/**
+ * The Gherkin Tokenizer.
+ */
+public class GherkinTokenizer extends AntlrTokenizer {
+
+ @Override
+ protected AntlrTokenManager getLexerForSource(SourceCode sourceCode) {
+ CharStream charStream = AntlrTokenizer.getCharStreamFromSourceCode(sourceCode);
+ return new AntlrTokenManager(new GherkinLexer(charStream), sourceCode.getFileName());
+ }
+}
diff --git a/pmd-gherkin/src/main/java/net/sourceforge/pmd/lang/gherkin/cpd/package-info.java b/pmd-gherkin/src/main/java/net/sourceforge/pmd/lang/gherkin/cpd/package-info.java
new file mode 100644
index 0000000000..9a25b68105
--- /dev/null
+++ b/pmd-gherkin/src/main/java/net/sourceforge/pmd/lang/gherkin/cpd/package-info.java
@@ -0,0 +1,8 @@
+/*
+ * BSD-style license; for more info see http://pmd.sourceforge.net/license.html
+ */
+
+/**
+ * Contains Gherkin tokenizer and language classes.
+ */
+package net.sourceforge.pmd.lang.gherkin.cpd;
diff --git a/pmd-gherkin/src/main/resources/META-INF/services/net.sourceforge.pmd.cpd.Language b/pmd-gherkin/src/main/resources/META-INF/services/net.sourceforge.pmd.cpd.Language
new file mode 100644
index 0000000000..fbe21e1710
--- /dev/null
+++ b/pmd-gherkin/src/main/resources/META-INF/services/net.sourceforge.pmd.cpd.Language
@@ -0,0 +1 @@
+net.sourceforge.pmd.lang.gherkin.cpd.GherkinLanguage
diff --git a/pmd-gherkin/src/test/java/net/sourceforge/pmd/cpd/GherkinTokenizerTest.java b/pmd-gherkin/src/test/java/net/sourceforge/pmd/cpd/GherkinTokenizerTest.java
new file mode 100644
index 0000000000..f8f064ea41
--- /dev/null
+++ b/pmd-gherkin/src/test/java/net/sourceforge/pmd/cpd/GherkinTokenizerTest.java
@@ -0,0 +1,39 @@
+/*
+ * BSD-style license; for more info see http://pmd.sourceforge.net/license.html
+ */
+
+package net.sourceforge.pmd.cpd;
+
+import java.util.Properties;
+
+import org.junit.Test;
+
+import net.sourceforge.pmd.cpd.test.CpdTextComparisonTest;
+import net.sourceforge.pmd.lang.gherkin.cpd.GherkinTokenizer;
+
+public class GherkinTokenizerTest extends CpdTextComparisonTest {
+ public GherkinTokenizerTest() {
+ super(".feature");
+ }
+
+ @Override
+ protected String getResourcePrefix() {
+ return "../lang/gherkin/cpd/testdata";
+ }
+
+ @Override
+ public Tokenizer newTokenizer(Properties properties) {
+ GherkinTokenizer tok = new GherkinTokenizer();
+ return tok;
+ }
+
+ @Test
+ public void testAnnotatedSource() {
+ doTest("annotatedSource");
+ }
+
+ @Test
+ public void testDocstring() {
+ doTest("docstring");
+ }
+}
diff --git a/pmd-gherkin/src/test/resources/net/sourceforge/pmd/lang/gherkin/cpd/testdata/annotatedSource.feature b/pmd-gherkin/src/test/resources/net/sourceforge/pmd/lang/gherkin/cpd/testdata/annotatedSource.feature
new file mode 100644
index 0000000000..45008f6c71
--- /dev/null
+++ b/pmd-gherkin/src/test/resources/net/sourceforge/pmd/lang/gherkin/cpd/testdata/annotatedSource.feature
@@ -0,0 +1,73 @@
+@AnnotatedSource
+Feature: Annotated Source
+ The annotated source displays violations in the source file. It opens in a new window.
+
+ Rules:
+ 1 Annotation metrics can be selected with a dropdown menu. Only enabled metrics must be shown.
+ 2 When the annotated source is opened while a specific metric is selected, only that metric should be selected.
+ 3 A user can scroll through the violations using two buttons (illustrated by up and down arrows). The arrows wrap around the document.
+ 4 The table collumn "Type" is only shown when multiple metrics are selected
+
+ Scenario: Select a metric type
+ Given the Annotated Source for file "HIE://11261-37/main/monop/execute.c"
+ When a user opens the dropdown menu containing "Metric:"
+ And the user clicks on the dropdown option "Violations/Coding Standard Violations"
+ Then the selected annotation in the source code should be on line 38
+ And the selected annotation in the table should be on line 38
+
+ Scenario: The user can use the arrows, or "a" and "z" keys, to scroll through the annotations
+ Given the Annotated Source for file "HIE://11261-37/main/monop/execute.c"
+ And metric "Coding Standard Violation Annotations" is selected
+ When the user clicks on the down arrow
+ And the user presses the "Z" key
+ And the user clicks on the up arrow
+ And the user clicks on the up arrow
+ And the user presses the "A" key
+ Then the selected annotation in the source code should be on line 254
+
+ @Rule2
+ Scenario Outline: If the user opens the annotated source from e.g. the dashboard for a metric,
+ only the related annotations should be shown.
+ Given the Dashboard
+ And filtering by Project "17607"
+ And grouping by "File"
+ And metric "" is selected
+ When a user opens the file "clalgorithm_settings.c" using the metric table
+ Then a new browser window with an "Annotated Source" should be opened
+ Then only the annotations "" should be selected
+
+ Examples:
+ | metric | annotations |
+ | TQI Coding Standards | Coding Standard Violation Annotations |
+ | Coding Standard Violations | Coding Standard Violation Annotations |
+ | TQI Compiler Warnings | Compiler Warning Annotations |
+ | Fan Out (%) | Fan Out Annotations |
+ | TQI Dead Code | Dead Code Annotations |
+ | TQI Code Duplication | Code Duplication Annotations |
+
+ Scenario: The user should be able to filter Coding Standard Violations by Level
+ Given the Annotated Source for file "HIE://11514/trunk/components/java/BuildUtil/src/com/tiobe/util/BuildProperties.java"
+ And the metric "Coding Standard Violations" is selected
+ When the user opens the dropdown menu "Level"
+ And the user clicks on the dropdown option "5"
+ Then there should be 1 violation
+ And the selected annotation in the source code should be on line 57
+ And the annotation should be of level 5
+
+ @PR27030
+ Scenario Outline: The user should be able to filter Coding Standard Violations by Level, Category, Rule, etc
+ Given the Annotated Source for file "HIE://12939/main/Implementatie/DRGL/src/DirectDoorvoerenAdmin.cpp"
+ And metric "" is selected
+ When the user opens the dropdown menu containing "" inside the filter bar
+ And the user clicks on the dropdown option "