diff --git a/.all-contributorsrc b/.all-contributorsrc index 99708783e8..908c5cc8cb 100644 --- a/.all-contributorsrc +++ b/.all-contributorsrc @@ -1053,7 +1053,8 @@ "avatar_url": "https://avatars.githubusercontent.com/u/86377278?v=4", "profile": "https://github.com/aaronhurst-google", "contributions": [ - "bug" + "bug", + "code" ] }, { @@ -5917,7 +5918,8 @@ "avatar_url": "https://avatars.githubusercontent.com/u/35368290?v=4", "profile": "https://github.com/tprouvot", "contributions": [ - "bug" + "bug", + "code" ] }, { @@ -6768,6 +6770,42 @@ "code", "financial" ] + }, + { + "login": "abyss638", + "name": "Simon Abykov", + "avatar_url": "https://avatars.githubusercontent.com/u/90252673?v=4", + "profile": "https://github.com/abyss638", + "contributions": [ + "code" + ] + }, + { + "login": "eklimo", + "name": "Edward Klimoshenko", + "avatar_url": "https://avatars.githubusercontent.com/u/39220927?v=4", + "profile": "https://github.com/eklimo", + "contributions": [ + "bug", + "code" + ] + }, + { + "login": "nvuillam", + "name": "Nicolas Vuillamy", + "avatar_url": "https://avatars.githubusercontent.com/u/17500430?v=4", + "profile": "https://github.com/nvuillam", + "contributions": [ + "doc" + ] + },{ + "login": "pacvz", + "name": "pacvz", + "avatar_url": "https://avatars.githubusercontent.com/u/35453365?v=4", + "profile": "https://github.com/pacvz", + "contributions": [ + "code" + ] } ], "contributorsPerLine": 7, diff --git a/.ci/build.sh b/.ci/build.sh index 828fcb9bbf..6c935f0a52 100755 --- a/.ci/build.sh +++ b/.ci/build.sh @@ -28,6 +28,15 @@ function build() { pmd_ci_utils_determine_build_env pmd/pmd echo + if ! pmd_ci_utils_is_fork_or_pull_request; then + if [ "${PMD_CI_BRANCH}" = "experimental-apex-parser" ]; then + pmd_ci_log_group_start "Build with mvnw" + ./mvnw clean install --show-version --errors --batch-mode --no-transfer-progress "${PMD_MAVEN_EXTRA_OPTS[@]}" + pmd_ci_log_group_end + exit 0 + fi + fi + if pmd_ci_utils_is_fork_or_pull_request; then pmd_ci_log_group_start "Build with mvnw" ./mvnw clean install --show-version --errors --batch-mode --no-transfer-progress "${PMD_MAVEN_EXTRA_OPTS[@]}" diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 01b91ce2d5..862bb5234b 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -6,6 +6,7 @@ on: - main - master - pmd/7.0.x + - experimental-apex-parser tags: - '**' pull_request: diff --git a/SPONSORS.md b/SPONSORS.md new file mode 100644 index 0000000000..4bbe1c7b55 --- /dev/null +++ b/SPONSORS.md @@ -0,0 +1,10 @@ +# PMD's sponsors + +Many thanks to all our sponsors: + +* [Matt Hargett](https://github.com/matthargett) (@matthargett) + +If you also want to sponsor PMD, you have two options: + +* [Sponsor @pmd on GitHub Sponsors](https://github.com/sponsors/pmd) +* [PMD - Open Collective](https://opencollective.com/pmd) diff --git a/docs/_config.yml b/docs/_config.yml index f89896c168..841070517d 100644 --- a/docs/_config.yml +++ b/docs/_config.yml @@ -2,7 +2,7 @@ repository: pmd/pmd pmd: version: 7.0.0-SNAPSHOT - previous_version: 6.47.0 + previous_version: 6.49.0 date: ??-?????-2022 release_type: major diff --git a/docs/pages/next_major_development.md b/docs/pages/next_major_development.md index deccde46f6..4f054eff8d 100644 --- a/docs/pages/next_major_development.md +++ b/docs/pages/next_major_development.md @@ -246,6 +246,76 @@ the breaking API changes will be performed in 7.0.0. an API is tagged as `@Deprecated` or not in the latest minor release. During the development of 7.0.0, we may decide to remove some APIs that were not tagged as deprecated, though we'll try to avoid it." %} +#### 6.49.0 + +##### Deprecated API + +* In order to reduce the dependency on Apex Jorje classes, the following methods have been deprecated. + These methods all leaked internal Jorje enums. These enums have been replaced now by enums the + PMD's AST package. + * {% jdoc !!apex::lang.apex.ast.ASTAssignmentExpression#getOperator() %} + * {% jdoc !!apex::lang.apex.ast.ASTBinaryExpression#getOperator() %} + * {% jdoc !!apex::lang.apex.ast.ASTBooleanExpression#getOperator() %} + * {% jdoc !!apex::lang.apex.ast.ASTPostfixExpression#getOperator() %} + * {% jdoc !!apex::lang.apex.ast.ASTPrefixExpression#getOperator() %} + + All these classes have now a new `getOp()` method. Existing code should be refactored to use this method instead. + It returns the new enums, like {% jdoc apex::lang.apex.ast.AssignmentOperator %}, and avoids + the dependency to Jorje. + +#### 6.48.0 + +##### CPD CLI + +* CPD has a new CLI option `--debug`. This option has the same behavior as in PMD. It enables more verbose + logging output. + +##### Rule Test Framework + +* The module "pmd-test", which contains support classes to write rule tests, now **requires Java 8**. If you depend on + this module for testing your own custom rules, you'll need to make sure to use at least Java 8. +* The new module "pmd-test-schema" contains now the XSD schema and the code to parse the rule test XML files. The + schema has been extracted in order to easily share it with other tools like the Rule Designer or IDE plugins. +* Test schema changes: + * The attribute `isRegressionTest` of `test-code` is deprecated. The new + attribute `disabled` should be used instead for defining whether a rule test should be skipped or not. + * The attributes `reinitializeRule` and `useAuxClasspath` of `test-code` are deprecated and assumed true. + They will not be replaced. + * The new attribute `focused` of `test-code` allows disabling all tests except the focused one temporarily. +* More information about the rule test framework can be found in the documentation: + [Testing your rules](pmd_userdocs_extending_testing.html) + +##### Deprecated API + +* The experimental Java AST class {% jdoc java::lang.java.ast.ASTGuardedPattern %} has been deprecated and + will be removed. It was introduced for Java 17 and Java 18 Preview as part of pattern matching for switch, + but it is no longer supported with Java 19 Preview. +* The interface {% jdoc core::cpd.renderer.CPDRenderer %} is deprecated. For custom CPD renderers + the new interface {% jdoc core::cpd.renderer.CPDReportRenderer %} should be used. +* The class {% jdoc test::testframework.TestDescriptor %} is deprecated, replaced with {% jdoc test-schema::test.schema.RuleTestDescriptor %}. +* Many methods of {% jdoc test::testframework.RuleTst %} have been deprecated as internal API. + +##### Experimental APIs + +* To support the Java preview language features "Pattern Matching for Switch" and "Record Patterns", the following + AST nodes have been introduced as experimental: + * {% jdoc java::lang.java.ast.ASTSwitchGuard %} + * {% jdoc java::lang.java.ast.ASTRecordPattern %} + * {% jdoc java::lang.java.ast.ASTComponentPatternList %} + +##### Internal API + +Those APIs are not intended to be used by clients, and will be hidden or removed with PMD 7.0.0. +You can identify them with the `@InternalApi` annotation. You'll also get a deprecation warning. + +* {%jdoc !!core::cpd.CPDConfiguration#setRenderer(net.sourceforge.pmd.cpd.Renderer) %} +* {%jdoc !!core::cpd.CPDConfiguration#setCPDRenderer(net.sourceforge.pmd.cpd.renderer.CPDRenderer) %} +* {%jdoc !!core::cpd.CPDConfiguration#getRenderer() %} +* {%jdoc !!core::cpd.CPDConfiguration#getCPDRenderer() %} +* {%jdoc !!core::cpd.CPDConfiguration#getRendererFromString(java.lang.String,java.lang.String) %} +* {%jdoc !!core::cpd.CPDConfiguration#getCPDRendererFromString(java.lang.String,java.lang.String) %} +* {%jdoc core::cpd.renderer.CPDRendererAdapter %} + #### 6.47.0 No changes. diff --git a/docs/pages/pmd/projectdocs/credits.md b/docs/pages/pmd/projectdocs/credits.md index 1af5d1cf1c..4017071bc9 100644 --- a/docs/pages/pmd/projectdocs/credits.md +++ b/docs/pages/pmd/projectdocs/credits.md @@ -231,737 +231,743 @@ Thanks goes to these wonderful people ([emoji key](https://allcontributors.org/d
ASTAssignmentExpression#getOperator
+ * ASTBinaryExpression#getOperator
+ * ASTBooleanExpression#getOperator
+ * ASTPostfixExpression#getOperator
+ * ASTPrefixExpression#getOperator
+
+ All these classes have now a new `getOp()` method. Existing code should be refactored to use this method instead.
+ It returns the new enums, like AssignmentOperator
, and avoids
+ the dependency to Jorje.
+
+### External Contributions
+
+* [#4081](https://github.com/pmd/pmd/pull/4081): \[apex] Remove Jorje leaks outside `ast` package - [@eklimo](https://github.com/eklimo)
+* [#4083](https://github.com/pmd/pmd/pull/4083): \[java] UnnecessaryImport false positive for on-demand imports of nested classes (fix for #4082) - [@abyss638](https://github.com/abyss638)
+* [#4092](https://github.com/pmd/pmd/pull/4092): \[apex] Implement ApexQualifiableNode for ASTUserEnum - [@aaronhurst-google](https://github.com/aaronhurst-google)
+* [#4095](https://github.com/pmd/pmd/pull/4095): \[core] CPD: Added begin and end token to XML reports - [@pacvz](https://github.com/pacvz)
+* [#4097](https://github.com/pmd/pmd/pull/4097): \[apex] ApexUnitTestClassShouldHaveAssertsRule: Support new Assert class (Apex v56.0) - [@tprouvot](https://github.com/tprouvot)
+* [#4104](https://github.com/pmd/pmd/pull/4104): \[doc] Add MegaLinter in the list of integrations - [@nvuillam](https://github.com/nvuillam)
+
+### Stats
+* 49 commits
+* 10 closed tickets & PRs
+* Days since last release: 32
+
+## 30-July-2022 - 6.48.0
+
+The PMD team is pleased to announce PMD 6.48.0.
+
+This is a minor release.
+
+### Table Of Contents
+
+* [New and noteworthy](#new-and-noteworthy)
+ * [Java 19 Support](#java-19-support)
+ * [Gherkin support](#gherkin-support)
+* [Fixed Issues](#fixed-issues)
+* [API Changes](#api-changes)
+ * [CPD CLI](#cpd-cli)
+ * [Rule Test Framework](#rule-test-framework)
+ * [Deprecated API](#deprecated-api)
+ * [Experimental APIs](#experimental-apis)
+ * [Internal API](#internal-api)
+* [Financial Contributions](#financial-contributions)
+* [External Contributions](#external-contributions)
+* [Stats](#stats)
+
+### New and noteworthy
+
+#### Java 19 Support
+
+This release of PMD brings support for Java 19. There are no new standard language features.
+
+PMD supports [JEP 427: Pattern Matching for switch (Third Preview)](https://openjdk.org/jeps/427) and
+[JEP 405: Record Patterns (Preview)](https://openjdk.org/jeps/405) as preview language features.
+
+In order to analyze a project with PMD that uses these language features,
+you'll need to enable it via the environment variable `PMD_JAVA_OPTS` and select the new language
+version `19-preview`:
+
+ export PMD_JAVA_OPTS=--enable-preview
+ ./run.sh pmd -language java -version 19-preview ...
+
+Note: Support for Java 17 preview language features have been removed. The version "17-preview" is no longer available.
+
+#### Gherkin support
+Thanks to the contribution from [Anne Brouwers](https://github.com/ASBrouwers) PMD now has CPD support
+for the [Gherkin](https://cucumber.io/docs/gherkin/) language. It is used to defined test cases for the
+[Cucumber](https://cucumber.io/) testing tool for behavior-driven development.
+
+Being based on a proper Antlr grammar, CPD can:
+
+* ignore comments
+* honor [comment-based suppressions](pmd_userdocs_cpd.html#suppression)
+
+### Fixed Issues
+* apex
+ * [#4056](https://github.com/pmd/pmd/pull/4056): \[apex] ApexSOQLInjection: Add support count query
+* core
+ * [#3796](https://github.com/pmd/pmd/issues/3796): \[core] CPD should also provide a `--debug` flag
+ * [#4021](https://github.com/pmd/pmd/pull/4021): \[core] CPD: Add total number of tokens to XML reports
+ * [#4031](https://github.com/pmd/pmd/issues/4031): \[core] If report is written to stdout, stdout should not be closed
+ * [#4051](https://github.com/pmd/pmd/issues/4051): \[doc] Additional rulesets are not listed in documentation
+ * [#4053](https://github.com/pmd/pmd/pull/4053): \[core] Allow building PMD under Java 18+
+* java
+ * [#4015](https://github.com/pmd/pmd/issues/4015): \[java] Support JDK 19
+* java-bestpractices
+ * [#3455](https://github.com/pmd/pmd/issues/3455): \[java] WhileLoopWithLiteralBoolean - false negative with complex expressions
+* java-design
+ * [#3729](https://github.com/pmd/pmd/issues/3729): \[java] TooManyMethods ignores "real" methods which are named like getters or setters
+ * [#3949](https://github.com/pmd/pmd/issues/3949): \[java] FinalFieldCouldBeStatic - false negative with unnecessary parenthesis
+* java-performance
+ * [#3625](https://github.com/pmd/pmd/issues/3625): \[java] AddEmptyString - false negative with empty var
+* lua
+ * [#4061](https://github.com/pmd/pmd/pull/4061): \[lua] Fix several related Lua parsing issues found when using CPD
+* test
+ * [#3302](https://github.com/pmd/pmd/pull/3302): \[test] Improve xml test schema
+ * [#3758](https://github.com/pmd/pmd/issues/3758): \[test] Move pmd-test to java 8
+ * [#3976](https://github.com/pmd/pmd/pull/3976): \[test] Extract xml schema module
+
+### API Changes
+
+#### CPD CLI
+
+* CPD has a new CLI option `--debug`. This option has the same behavior as in PMD. It enables more verbose
+ logging output.
+
+#### Rule Test Framework
+
+* The module "pmd-test", which contains support classes to write rule tests, now **requires Java 8**. If you depend on
+ this module for testing your own custom rules, you'll need to make sure to use at least Java 8.
+* The new module "pmd-test-schema" contains now the XSD schema and the code to parse the rule test XML files. The
+ schema has been extracted in order to easily share it with other tools like the Rule Designer or IDE plugins.
+* Test schema changes:
+ * The attribute `isRegressionTest` of `test-code` is deprecated. The new
+ attribute `disabled` should be used instead for defining whether a rule test should be skipped or not.
+ * The attributes `reinitializeRule` and `useAuxClasspath` of `test-code` are deprecated and assumed true.
+ They will not be replaced.
+ * The new attribute `focused` of `test-code` allows disabling all tests except the focused one temporarily.
+* More information about the rule test framework can be found in the documentation:
+ [Testing your rules](pmd_userdocs_extending_testing.html)
+
+#### Deprecated API
+
+* The experimental Java AST class ASTGuardedPattern
has been deprecated and
+ will be removed. It was introduced for Java 17 and Java 18 Preview as part of pattern matching for switch,
+ but it is no longer supported with Java 19 Preview.
+* The interface CPDRenderer
is deprecated. For custom CPD renderers
+ the new interface CPDReportRenderer
should be used.
+* The class TestDescriptor
is deprecated, replaced with RuleTestDescriptor
.
+* Many methods of RuleTst
have been deprecated as internal API.
+
+#### Experimental APIs
+
+* To support the Java preview language features "Pattern Matching for Switch" and "Record Patterns", the following
+ AST nodes have been introduced as experimental:
+ * ASTSwitchGuard
+ * ASTRecordPattern
+ * ASTComponentPatternList
+
+#### Internal API
+
+Those APIs are not intended to be used by clients, and will be hidden or removed with PMD 7.0.0.
+You can identify them with the `@InternalApi` annotation. You'll also get a deprecation warning.
+
+* CPDConfiguration#setRenderer
+* CPDConfiguration#setCPDRenderer
+* CPDConfiguration#getRenderer
+* CPDConfiguration#getCPDRenderer
+* CPDConfiguration#getRendererFromString
+* CPDConfiguration#getCPDRendererFromString
+* CPDRendererAdapter
+
+### Financial Contributions
+
+Many thanks to our sponsors:
+
+* [Matt Hargett](https://github.com/matthargett) (@matthargett)
+
+### External Contributions
+* [#3984](https://github.com/pmd/pmd/pull/3984): \[java] Fix AddEmptyString false-negative issue - [@LiGaOg](https://github.com/LiGaOg)
+* [#3988](https://github.com/pmd/pmd/pull/3988): \[java] Modify WhileLoopWithLiteralBoolean to meet the missing case #3455 - [@VoidxHoshi](https://github.com/VoidxHoshi)
+* [#3992](https://github.com/pmd/pmd/pull/3992): \[java] FinalFieldCouldBeStatic - fix false negative with unnecessary parenthesis - [@dalizi007](https://github.com/dalizi007)
+* [#3994](https://github.com/pmd/pmd/pull/3994): \[java] TooManyMethods - improve getter/setter detection (#3729) - [@341816041](https://github.com/341816041)
+* [#4017](https://github.com/pmd/pmd/pull/4017): Add Gherkin support to CPD - [@ASBrouwers](https://github.com/ASBrouwers)
+* [#4021](https://github.com/pmd/pmd/pull/4021): \[core] CPD: Add total number of tokens to XML reports - [@maikelsteneker](https://github.com/maikelsteneker)
+* [#4056](https://github.com/pmd/pmd/pull/4056): \[apex] ApexSOQLInjection: Add support count query - [@gwilymatgearset](https://github.com/gwilymatgearset)
+* [#4061](https://github.com/pmd/pmd/pull/4061): \[lua] Fix several related Lua parsing issues found when using CPD - [@matthargett](https://github.com/matthargett)
+
+### Stats
+* 102 commits
+* 26 closed tickets & PRs
+* Days since last release: 35
+
## 25-June-2022 - 6.47.0
The PMD team is pleased to announce PMD 6.47.0.
diff --git a/javacc-wrapper.xml b/javacc-wrapper.xml
index 404c8c2fd8..c7f1d7eead 100644
--- a/javacc-wrapper.xml
+++ b/javacc-wrapper.xml
@@ -59,8 +59,8 @@
+
+ Example usage: *
- * Language javaLanguage = LanguageRegistry.{@link LanguageRegistry#getLanguage(String) getLanguage}("Java"); + * Language javaLanguage = LanguageRegistry.PMD.{@link LanguageRegistry#getLanguageById(String) getLanguageById}("java"); * LanguageVersion java11 = javaLanguage.{@link Language#getVersion(String) getVersion}("11"); * LanguageVersionHandler handler = java11.getLanguageVersionHandler(); * Parser parser = handler.getParser(handler.getDefaultParserOptions()); diff --git a/pmd-core/src/main/java/net/sourceforge/pmd/lang/LanguageVersionDiscoverer.java b/pmd-core/src/main/java/net/sourceforge/pmd/lang/LanguageVersionDiscoverer.java index 4e22b03a4f..927e6572d3 100644 --- a/pmd-core/src/main/java/net/sourceforge/pmd/lang/LanguageVersionDiscoverer.java +++ b/pmd-core/src/main/java/net/sourceforge/pmd/lang/LanguageVersionDiscoverer.java @@ -9,9 +9,12 @@ import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Objects; +import java.util.stream.Collectors; import org.apache.commons.lang3.StringUtils; +import org.checkerframework.checker.nullness.qual.Nullable; +import net.sourceforge.pmd.annotation.DeprecatedUntil700; import net.sourceforge.pmd.internal.util.AssertionUtil; /** @@ -20,13 +23,11 @@ import net.sourceforge.pmd.internal.util.AssertionUtil; * here. */ public class LanguageVersionDiscoverer { - private MaplanguageToLanguageVersion = new HashMap<>(); + private final LanguageRegistry languageRegistry; + private final Map languageToLanguageVersion = new HashMap<>(); private LanguageVersion forcedVersion; - public LanguageVersionDiscoverer() { - this(null); - } /** * Build a new instance. @@ -35,10 +36,18 @@ public class LanguageVersionDiscoverer { * The methods of this class still work as usual and do not * care about the forced language version. */ - public LanguageVersionDiscoverer(LanguageVersion forcedVersion) { + public LanguageVersionDiscoverer(LanguageRegistry registry, LanguageVersion forcedVersion) { + this.languageRegistry = registry; this.forcedVersion = forcedVersion; } + /** + * Build a new instance with no forced version. + */ + public LanguageVersionDiscoverer(LanguageRegistry registry) { + this(registry, null); + } + /** * Set the given LanguageVersion as the current default for it's Language. * @@ -96,7 +105,7 @@ public class LanguageVersionDiscoverer { * null
if there are no supported Languages for the * file. */ - public LanguageVersion getDefaultLanguageVersionForFile(String fileName) { + public @Nullable LanguageVersion getDefaultLanguageVersionForFile(String fileName) { Listlanguages = getLanguagesForFile(fileName); LanguageVersion languageVersion = null; if (!languages.isEmpty()) { @@ -119,7 +128,11 @@ public class LanguageVersionDiscoverer { * @param sourceFile * The file. * @return The Languages for the source file, may be empty. + * + * @deprecated PMD 7 avoids using {@link File}. */ + @Deprecated + @DeprecatedUntil700 public List getLanguagesForFile(File sourceFile) { return getLanguagesForFile(sourceFile.getName()); } @@ -133,7 +146,9 @@ public class LanguageVersionDiscoverer { */ public List getLanguagesForFile(String fileName) { String extension = getExtension(fileName); - return LanguageRegistry.findByExtension(extension); + return languageRegistry.getLanguages().stream() + .filter(it -> it.hasExtension(extension)) + .collect(Collectors.toList()); } // Get the extensions from a file diff --git a/pmd-core/src/main/java/net/sourceforge/pmd/lang/ast/CharStream.java b/pmd-core/src/main/java/net/sourceforge/pmd/lang/ast/CharStream.java deleted file mode 100644 index 5cf6044ae7..0000000000 --- a/pmd-core/src/main/java/net/sourceforge/pmd/lang/ast/CharStream.java +++ /dev/null @@ -1,120 +0,0 @@ -/* - * BSD-style license; for more info see http://pmd.sourceforge.net/license.html - */ - -package net.sourceforge.pmd.lang.ast; - - -import java.io.IOException; - -import net.sourceforge.pmd.lang.ast.impl.javacc.JavaccTokenDocument; - -/** - * PMD flavour of character streams used by JavaCC parsers. - * - * TODO for when all JavaCC languages are aligned: - * * rename methods to match decent naming conventions - * * move to impl.javacc package - */ -public interface CharStream { - - /** - * Returns the next character from the input. After a {@link #backup(int)}, - * some of the already read chars must be spit out again. - * - * @return The next character - * - * @throws IOException If the underlying char stream throws - */ - char readChar() throws IOException; - - - /** - * Calls {@link #readChar()} and returns its value, marking its position - * as the beginning of the next token. All characters must remain in - * the buffer between two successive calls to this method to implement - * backup correctly. - */ - char BeginToken() throws IOException; // SUPPRESS CHECKSTYLE we'll rename it later - - - /** - * Returns a string made up of characters from the token mark up to - * to the current buffer position. - */ - String GetImage(); // SUPPRESS CHECKSTYLE we'll rename it later - - - /** - * Returns an array of characters that make up the suffix of length 'len' for - * the current token. This is used to build up the matched string - * for use in actions in the case of MORE. A simple and inefficient - * implementation of this is as follows : - * - * {@code - * String t = tokenImage(); - * return t.substring(t.length() - len, t.length()).toCharArray(); - * }- * - * @param len Length of the returned array - * - * @return The suffix - * - * @throws IndexOutOfBoundsException If len is greater than the length of the - * current token - */ - char[] GetSuffix(int len); // SUPPRESS CHECKSTYLE we'll rename it later - - - /** - * Pushes a given number of already read chars into the buffer. - * Subsequent calls to {@link #readChar()} will read those characters - * before proceeding to read the underlying char stream. - * - *A lexer calls this method if it has already read some characters, - * but cannot use them to match a (longer) token. So, they will - * be used again as the prefix of the next token. - * - * @throws AssertionError If the requested amount is greater than the - * number of read chars - */ - void backup(int amount); - - @Deprecated - int getBeginColumn(); - - @Deprecated - int getBeginLine(); - - - /** Returns the column number of the last character for the current token. */ - int getEndColumn(); - - - /** Returns the line number of the last character for current token. */ - int getEndLine(); - - // These methods are added by PMD - - - /** - * Returns the token document for the tokens being built. Having it - * here is the most convenient place for the time being. - */ - default JavaccTokenDocument getTokenDocument() { - return null; // for VelocityCharStream - } - - - /** Returns the start offset of the current token (in the original source), inclusive. */ - default int getStartOffset() { - return -1; - } - - - /** Returns the end offset of the current token (in the original source), exclusive. */ - default int getEndOffset() { - return -1; - } - -} diff --git a/pmd-core/src/main/java/net/sourceforge/pmd/lang/ast/FileAnalysisException.java b/pmd-core/src/main/java/net/sourceforge/pmd/lang/ast/FileAnalysisException.java index 1619de12b8..0c7f9de52c 100644 --- a/pmd-core/src/main/java/net/sourceforge/pmd/lang/ast/FileAnalysisException.java +++ b/pmd-core/src/main/java/net/sourceforge/pmd/lang/ast/FileAnalysisException.java @@ -6,6 +6,7 @@ package net.sourceforge.pmd.lang.ast; import java.util.Objects; +import org.apache.commons.lang3.StringUtils; import org.checkerframework.checker.nullness.qual.NonNull; import net.sourceforge.pmd.lang.document.TextFile; @@ -39,7 +40,7 @@ public class FileAnalysisException extends RuntimeException { super(message, cause); } - FileAnalysisException setFileName(String filename) { + public FileAnalysisException setFileName(String filename) { this.filename = Objects.requireNonNull(filename); return this; } @@ -55,6 +56,22 @@ public class FileAnalysisException extends RuntimeException { return filename; } + @Override + public String getMessage() { + return errorKind() + StringUtils.uncapitalize(positionToString()) + ": " + super.getMessage(); + } + + protected String errorKind() { + return "Error"; + } + + protected String positionToString() { + if (hasFileName()) { + return " in file '" + getFileName() + "'"; + } + return ""; + } + /** * Wraps the cause into an analysis exception. If it is itself an analysis diff --git a/pmd-core/src/main/java/net/sourceforge/pmd/lang/ast/GenericToken.java b/pmd-core/src/main/java/net/sourceforge/pmd/lang/ast/GenericToken.java index f7256447bb..0912714a99 100644 --- a/pmd-core/src/main/java/net/sourceforge/pmd/lang/ast/GenericToken.java +++ b/pmd-core/src/main/java/net/sourceforge/pmd/lang/ast/GenericToken.java @@ -6,8 +6,11 @@ package net.sourceforge.pmd.lang.ast; import java.util.Iterator; +import org.apache.commons.lang3.StringUtils; + import net.sourceforge.pmd.annotation.Experimental; import net.sourceforge.pmd.internal.util.IteratorUtil; +import net.sourceforge.pmd.lang.document.Chars; import net.sourceforge.pmd.lang.document.TextRegion; import net.sourceforge.pmd.reporting.Reportable; @@ -37,18 +40,37 @@ public interface GenericToken
> extends Comparable , T getPreviousComment(); /** - * Returns the token's text. + * Returns the token's text as a string. */ default String getImage() { return getImageCs().toString(); } + /** - * Returns the image as a {@link CharSequence}. + * Returns the text of the token as a char sequence. + * This should be preferred when you can use eg {@link StringUtils} + * to do some processing, without having to create a string. */ CharSequence getImageCs(); + /** + * Returns true if the image of this token equals + * the given charsequence. This does not create a + * string. + * + * @param charSeq A character sequence + */ + default boolean imageEquals(CharSequence charSeq) { + CharSequence imageCs = getImageCs(); + if (imageCs instanceof Chars) { + return ((Chars) imageCs).contentEquals(charSeq); + } + return StringUtils.equals(imageCs, charSeq); + } + + /** Returns a text region with the coordinates of this token. */ TextRegion getRegion(); @@ -58,6 +80,7 @@ public interface GenericToken > extends Comparable , */ boolean isEof(); + /** * Returns true if this token is implicit, ie was inserted artificially * and has a zero-length image. diff --git a/pmd-core/src/main/java/net/sourceforge/pmd/lang/ast/Node.java b/pmd-core/src/main/java/net/sourceforge/pmd/lang/ast/Node.java index c90e342a97..c9c2e14816 100644 --- a/pmd-core/src/main/java/net/sourceforge/pmd/lang/ast/Node.java +++ b/pmd-core/src/main/java/net/sourceforge/pmd/lang/ast/Node.java @@ -132,7 +132,7 @@ public interface Node extends Reportable { // Those are kept here because they're handled specially as XPath - // attributes + // attributes, for now @Override default int getBeginLine() { diff --git a/pmd-core/src/main/java/net/sourceforge/pmd/lang/ast/ParseException.java b/pmd-core/src/main/java/net/sourceforge/pmd/lang/ast/ParseException.java index ece028bb13..8b27689406 100644 --- a/pmd-core/src/main/java/net/sourceforge/pmd/lang/ast/ParseException.java +++ b/pmd-core/src/main/java/net/sourceforge/pmd/lang/ast/ParseException.java @@ -40,11 +40,6 @@ public class ParseException extends FileAnalysisException { this.currentToken = null; } - public ParseException(String message, Throwable cause) { - super(message, cause); - this.currentToken = null; - } - public ParseException(String message, JavaccToken token) { super(message); this.currentToken = token; @@ -59,6 +54,11 @@ public class ParseException extends FileAnalysisException { currentToken = currentTokenVal; } + @Override + protected String errorKind() { + return "Parse exception"; + } + /** * It uses "currentToken" and "expectedTokenSequences" to generate a parse * error message and returns it. If this object has been created diff --git a/pmd-core/src/main/java/net/sourceforge/pmd/lang/ast/Parser.java b/pmd-core/src/main/java/net/sourceforge/pmd/lang/ast/Parser.java index ca252c0200..0aa8419fb4 100644 --- a/pmd-core/src/main/java/net/sourceforge/pmd/lang/ast/Parser.java +++ b/pmd-core/src/main/java/net/sourceforge/pmd/lang/ast/Parser.java @@ -46,21 +46,28 @@ public interface Parser { private final SemanticErrorReporter reporter; private final ClassLoader auxclasspathClassLoader; - private final PropertySource propertySource; + private final ParserTaskProperties propertySource; public ParserTask(TextDocument textDoc, SemanticErrorReporter reporter, ClassLoader auxclasspathClassLoader) { - this.textDoc = Objects.requireNonNull(textDoc, "Text document was null"); - this.reporter = Objects.requireNonNull(reporter, "reporter was null"); - this.auxclasspathClassLoader = Objects.requireNonNull(auxclasspathClassLoader, "auxclasspathClassLoader was null"); - - this.propertySource = new ParserTaskProperties(); - propertySource.definePropertyDescriptor(COMMENT_MARKER); + this(textDoc, reporter, new ParserTaskProperties(), auxclasspathClassLoader); } public ParserTask(TextDocument textDoc, SemanticErrorReporter reporter) { this(textDoc, reporter, Parser.class.getClassLoader()); } + private ParserTask(TextDocument textDoc, + SemanticErrorReporter reporter, + ParserTaskProperties source, + ClassLoader auxclasspathClassLoader) { + this.textDoc = Objects.requireNonNull(textDoc, "Text document was null"); + this.reporter = Objects.requireNonNull(reporter, "reporter was null"); + this.auxclasspathClassLoader = Objects.requireNonNull(auxclasspathClassLoader, "auxclasspathClassLoader was null"); + + this.propertySource = new ParserTaskProperties(source); + } + + public static final PropertyDescriptor COMMENT_MARKER = PropertyFactory.stringProperty("suppressionCommentMarker") .desc("deprecated! NOPMD") @@ -117,9 +124,33 @@ public interface Parser { return getProperties().getProperty(COMMENT_MARKER); } + /** + * Replace the text document with another. + */ + public ParserTask withTextDocument(TextDocument doc) { + return new ParserTask(doc, this.reporter, this.propertySource, this.auxclasspathClassLoader); + } + private static final class ParserTaskProperties extends AbstractPropertySource { + ParserTaskProperties() { + definePropertyDescriptor(COMMENT_MARKER); + } + + ParserTaskProperties(ParserTaskProperties toCopy) { + for (PropertyDescriptor> prop : toCopy.getPropertyDescriptors()) { + definePropertyDescriptor(prop); + } + toCopy.getOverriddenPropertyDescriptors().forEach( + prop -> copyProperty(prop, toCopy, this) + ); + } + + static void copyProperty(PropertyDescriptor prop, PropertySource source, PropertySource target) { + target.setProperty(prop, source.getProperty(prop)); + } + @Override protected String getPropertySourceType() { return "ParserOptions"; diff --git a/pmd-core/src/main/java/net/sourceforge/pmd/lang/ast/TextAvailableNode.java b/pmd-core/src/main/java/net/sourceforge/pmd/lang/ast/TextAvailableNode.java index 708b40db4b..8f400cbfb6 100644 --- a/pmd-core/src/main/java/net/sourceforge/pmd/lang/ast/TextAvailableNode.java +++ b/pmd-core/src/main/java/net/sourceforge/pmd/lang/ast/TextAvailableNode.java @@ -4,6 +4,8 @@ package net.sourceforge.pmd.lang.ast; +import net.sourceforge.pmd.lang.document.Chars; +import net.sourceforge.pmd.lang.document.TextDocument; import net.sourceforge.pmd.lang.document.TextRegion; import net.sourceforge.pmd.lang.rule.xpath.NoAttribute; @@ -17,23 +19,39 @@ public interface TextAvailableNode extends Node { /** - * Returns the exact region of text delimiting - * the node in the underlying text document. Note - * that {@link #getReportLocation()} does not need - * to match this region. {@link #getReportLocation()} - * can be scoped down to a specific token, eg the - * class identifier. + * Returns the exact region of text delimiting the node in the underlying + * text document. Note that {@link #getReportLocation()} does not need + * to match this region. {@link #getReportLocation()} can be scoped down + * to a specific token, eg the class identifier. This region uses + * the translated coordinate system, ie the coordinate system of + * {@link #getTextDocument()}. */ @Override TextRegion getTextRegion(); /** - * Returns the original source code underlying this node. In - * particular, for a {@link RootNode}, returns the whole text - * of the file. + * Returns the original source code underlying this node, before + * any escapes have been translated. In particular, for a {@link RootNode}, + * returns the whole text of the file. + * + * @see TextDocument#sliceOriginalText(TextRegion) */ @NoAttribute - CharSequence getText(); + default Chars getOriginalText() { + return getTextDocument().sliceOriginalText(getTextRegion()); + } + + /** + * Returns the source code underlying this node, after any escapes + * have been translated. In particular, for a {@link RootNode}, returns + * the whole text of the file. + * + * @see TextDocument#sliceTranslatedText(TextRegion) + */ + @NoAttribute + default Chars getText() { + return getTextDocument().sliceTranslatedText(getTextRegion()); + } } diff --git a/pmd-core/src/main/java/net/sourceforge/pmd/lang/ast/TokenMgrError.java b/pmd-core/src/main/java/net/sourceforge/pmd/lang/ast/TokenMgrError.java index 3625251bbe..06dd3b01a1 100644 --- a/pmd-core/src/main/java/net/sourceforge/pmd/lang/ast/TokenMgrError.java +++ b/pmd-core/src/main/java/net/sourceforge/pmd/lang/ast/TokenMgrError.java @@ -53,12 +53,14 @@ public final class TokenMgrError extends FileAnalysisException { return column; } - + @Override + protected String positionToString() { + return super.positionToString() + " at line " + line + ", column " + column; + } @Override - public String getMessage() { - String leader = hasFileName() ? "Lexical error in file " + getFileName() : "Lexical error"; - return leader + " at line " + line + ", column " + column + ". Encountered: " + super.getMessage(); + protected String errorKind() { + return "Lexical error"; } /** diff --git a/pmd-core/src/main/java/net/sourceforge/pmd/lang/ast/impl/javacc/AbstractJjtreeNode.java b/pmd-core/src/main/java/net/sourceforge/pmd/lang/ast/impl/javacc/AbstractJjtreeNode.java index 5c7073e4bb..bd11611c28 100644 --- a/pmd-core/src/main/java/net/sourceforge/pmd/lang/ast/impl/javacc/AbstractJjtreeNode.java +++ b/pmd-core/src/main/java/net/sourceforge/pmd/lang/ast/impl/javacc/AbstractJjtreeNode.java @@ -7,7 +7,6 @@ package net.sourceforge.pmd.lang.ast.impl.javacc; import net.sourceforge.pmd.annotation.Experimental; import net.sourceforge.pmd.lang.ast.Node; import net.sourceforge.pmd.lang.ast.impl.AbstractNode; -import net.sourceforge.pmd.lang.document.Chars; import net.sourceforge.pmd.lang.document.FileLocation; import net.sourceforge.pmd.lang.document.TextRegion; import net.sourceforge.pmd.util.StringUtil; @@ -48,11 +47,6 @@ public abstract class AbstractJjtreeNode, N e this.image = image; } - @Override - public final Chars getText() { - return getTextDocument().sliceText(getTextRegion()); - } - @Override public final TextRegion getTextRegion() { return TextRegion.fromBothOffsets(getFirstToken().getStartOffset(), diff --git a/pmd-core/src/main/java/net/sourceforge/pmd/lang/ast/AbstractTokenManager.java b/pmd-core/src/main/java/net/sourceforge/pmd/lang/ast/impl/javacc/AbstractTokenManager.java similarity index 83% rename from pmd-core/src/main/java/net/sourceforge/pmd/lang/ast/AbstractTokenManager.java rename to pmd-core/src/main/java/net/sourceforge/pmd/lang/ast/impl/javacc/AbstractTokenManager.java index 8bb6bf0469..9635ccc94e 100644 --- a/pmd-core/src/main/java/net/sourceforge/pmd/lang/ast/AbstractTokenManager.java +++ b/pmd-core/src/main/java/net/sourceforge/pmd/lang/ast/impl/javacc/AbstractTokenManager.java @@ -2,19 +2,16 @@ * BSD-style license; for more info see http://pmd.sourceforge.net/license.html */ -package net.sourceforge.pmd.lang.ast; +package net.sourceforge.pmd.lang.ast.impl.javacc; import java.util.HashMap; import java.util.Map; import net.sourceforge.pmd.PMD; import net.sourceforge.pmd.lang.TokenManager; -import net.sourceforge.pmd.lang.ast.impl.javacc.JavaccToken; /** * A base class for the token managers generated by JavaCC. - * - * TODO move to impl.javacc package */ public abstract class AbstractTokenManager implements TokenManager { diff --git a/pmd-core/src/main/java/net/sourceforge/pmd/lang/ast/impl/javacc/BackslashEscapeTranslator.java b/pmd-core/src/main/java/net/sourceforge/pmd/lang/ast/impl/javacc/BackslashEscapeTranslator.java new file mode 100644 index 0000000000..4b9c0b258f --- /dev/null +++ b/pmd-core/src/main/java/net/sourceforge/pmd/lang/ast/impl/javacc/BackslashEscapeTranslator.java @@ -0,0 +1,71 @@ +/* + * BSD-style license; for more info see http://pmd.sourceforge.net/license.html + */ + +package net.sourceforge.pmd.lang.ast.impl.javacc; + +import static java.lang.Integer.min; + +import net.sourceforge.pmd.lang.document.Chars; +import net.sourceforge.pmd.lang.document.TextDocument; + +/** + * A base class for readers that handle escapes starting with a backslash. + */ +public abstract class BackslashEscapeTranslator extends EscapeTranslator { + + private static final char BACKSLASH = '\\'; + + /** + * An offset until which we read backslashes and decided they were not + * an escape. The read procedure may cut off in the middle of the escape, + * and turn an even num of backslashes into an odd one, so until we crossed + * this offset, backslashes are not treated specially. + */ + private int savedNotEscapeSpecialEnd = Integer.MAX_VALUE; + + + public BackslashEscapeTranslator(TextDocument builder) { + super(builder); + } + + @Override + protected int gobbleMaxWithoutEscape(final int maxOff) throws MalformedSourceException { + int off = this.bufpos; + boolean seenBackslash = true; + int notEscapeEnd = this.savedNotEscapeSpecialEnd; + while (off < maxOff) { + seenBackslash = input.charAt(off) == BACKSLASH && notEscapeEnd >= off; + if (seenBackslash) { + break; + } + off++; + } + + if (!seenBackslash || off == maxOff) { + this.bufpos = off; + return off; + } + + return handleBackslash(maxOff, off); + } + + protected abstract int handleBackslash(int maxOff, int firstBackslashOff) throws MalformedSourceException; + + @Override + protected int recordEscape(int startOffsetInclusive, int endOffsetExclusive, Chars translation) { + this.savedNotEscapeSpecialEnd = Integer.MAX_VALUE; + return super.recordEscape(startOffsetInclusive, endOffsetExclusive, translation); + } + + protected int abortEscape(int off, int maxOff) { + // not an escape sequence + int min = min(maxOff, off); + // save the number of backslashes that are part of the escape, + // might have been cut in half by the maxReadahead + this.savedNotEscapeSpecialEnd = min < off ? off : Integer.MAX_VALUE; + this.bufpos = min; + return min; + } + +} diff --git a/pmd-core/src/main/java/net/sourceforge/pmd/lang/ast/impl/javacc/CharStream.java b/pmd-core/src/main/java/net/sourceforge/pmd/lang/ast/impl/javacc/CharStream.java new file mode 100644 index 0000000000..d598bff263 --- /dev/null +++ b/pmd-core/src/main/java/net/sourceforge/pmd/lang/ast/impl/javacc/CharStream.java @@ -0,0 +1,174 @@ +/* + * BSD-style license; for more info see http://pmd.sourceforge.net/license.html + */ + +package net.sourceforge.pmd.lang.ast.impl.javacc; + + +import java.io.EOFException; + +import net.sourceforge.pmd.lang.ast.impl.javacc.JavaccTokenDocument.TokenDocumentBehavior; +import net.sourceforge.pmd.lang.document.Chars; +import net.sourceforge.pmd.lang.document.FileLocation; +import net.sourceforge.pmd.lang.document.TextDocument; +import net.sourceforge.pmd.lang.document.TextRegion; + +/** + * PMD flavour of character streams used by JavaCC parsers. + */ +public final class CharStream { + + private final JavaccTokenDocument tokenDoc; + private final TextDocument textDoc; + private final Chars chars; + private final boolean useMarkSuffix; + private int curOffset; + private int markOffset; + + private CharStream(JavaccTokenDocument tokenDoc) { + this.tokenDoc = tokenDoc; + this.textDoc = tokenDoc.getTextDocument(); + this.chars = textDoc.getText(); + this.useMarkSuffix = tokenDoc.useMarkSuffix(); + } + + /** + * Create a new char stream for the given document. This may create + * a new {@link TextDocument} view over the original, which reflects + * its character escapes. + */ + public static CharStream create(TextDocument doc, TokenDocumentBehavior behavior) throws MalformedSourceException { + TextDocument translated = behavior.translate(doc); + return new CharStream(new JavaccTokenDocument(translated, behavior)); + } + + /** + * Returns the next character from the input. After a {@link #backup(int)}, + * some of the already read chars must be spit out again. + * + * @return The next character + * + * @throws EOFException Upon EOF + */ + public char readChar() throws EOFException { + if (curOffset == chars.length()) { + throw new EOFException(); + } + return chars.charAt(curOffset++); + } + + + /** + * Calls {@link #readChar()} and returns its value, marking its position + * as the beginning of the next token. All characters must remain in + * the buffer between two successive calls to this method to implement + * backup correctly. + */ + public char markTokenStart() throws EOFException { + markOffset = curOffset; + return readChar(); + } + + + /** + * Returns a string made up of characters from the token mark up to + * to the current buffer position. + */ + public String getTokenImage() { + return getTokenImageCs().toString(); + } + + /** + * Returns a string made up of characters from the token mark up to + * to the current buffer position. + */ + public Chars getTokenImageCs() { + assert markOffset >= 0; + return chars.slice(markOffset, markLen()); + } + + private int markLen() { + return curOffset - markOffset; + } + + + /** + * Appends the suffix of length 'len' of the current token to the given + * string builder. This is used to build up the matched string + * for use in actions in the case of MORE. + * + * @param len Length of the returned array + * + * @throws IndexOutOfBoundsException If len is greater than the length of the current token + */ + public void appendSuffix(StringBuilder sb, int len) { + if (useMarkSuffix) { + assert len <= markLen() : "Suffix is greater than the mark length? " + len + " > " + markLen(); + chars.appendChars(sb, curOffset - len, len); + } // otherwise dead code, kept because Javacc's argument expressions do side effects + } + + + /** + * Pushes a given number of already read chars into the buffer. + * Subsequent calls to {@link #readChar()} will read those characters + * before proceeding to read the underlying char stream. + * + * A lexer calls this method if it has already read some characters, + * but cannot use them to match a (longer) token. So, they will + * be used again as the prefix of the next token. + * + * @throws AssertionError If the requested amount is greater than the + * length of the mark + */ + public void backup(int amount) { + if (amount > markLen()) { + throw new IllegalArgumentException(); + } + curOffset -= amount; + } + + /** + * Returns the column number of the last character for the current token. + * This is only used for parse exceptions and is very inefficient. + */ + public int getEndColumn() { + return endLocation().getEndColumn(); + } + + + /** + * Returns the line number of the last character for current token. + * This is only used for parse exceptions and is very inefficient. + */ + public int getEndLine() { + return endLocation().getEndLine(); + } + + + private FileLocation endLocation() { + return textDoc.toLocation(TextRegion.caretAt(getEndOffset())); + } + + + /** Returns the start offset of the current token (in the translated source), inclusive. */ + public int getStartOffset() { + return markOffset; + } + + + /** Returns the end offset of the current token (in the translated source), exclusive. */ + public int getEndOffset() { + return curOffset; + } + + + /** + * Returns the token document for the tokens being built. Having it + * here is the most convenient place for the time being. + */ + public JavaccTokenDocument getTokenDocument() { + return tokenDoc; + } + +} diff --git a/pmd-core/src/main/java/net/sourceforge/pmd/lang/ast/impl/javacc/CharStreamFactory.java b/pmd-core/src/main/java/net/sourceforge/pmd/lang/ast/impl/javacc/CharStreamFactory.java deleted file mode 100644 index 333cb1cd9b..0000000000 --- a/pmd-core/src/main/java/net/sourceforge/pmd/lang/ast/impl/javacc/CharStreamFactory.java +++ /dev/null @@ -1,49 +0,0 @@ -/* - * BSD-style license; for more info see http://pmd.sourceforge.net/license.html - */ - -package net.sourceforge.pmd.lang.ast.impl.javacc; - -import java.util.function.Function; - -import net.sourceforge.pmd.lang.ast.CharStream; -import net.sourceforge.pmd.lang.document.TextDocument; - -public final class CharStreamFactory { - - private CharStreamFactory() { - // util class - } - - /** - * A char stream that doesn't perform any escape translation. - */ - public static CharStream simpleCharStream(TextDocument input) { - return simpleCharStream(input, JavaccTokenDocument::new); - } - - /** - * A char stream that doesn't perform any escape translation. - */ - public static CharStream simpleCharStream(TextDocument input, - Function super TextDocument, ? extends JavaccTokenDocument> documentMaker) { - JavaccTokenDocument document = documentMaker.apply(input); - return new SimpleCharStream(document); - } - - /** - * A char stream that translates java unicode sequences. - */ - public static CharStream javaCharStream(TextDocument input) { - return javaCharStream(input, JavaccTokenDocument::new); - } - - /** - * A char stream that translates java unicode sequences. - */ - public static CharStream javaCharStream(TextDocument input, Function super TextDocument, ? extends JavaccTokenDocument> documentMaker) { - JavaccTokenDocument document = documentMaker.apply(input); - return new JavaCharStream(document); - } - -} diff --git a/pmd-core/src/main/java/net/sourceforge/pmd/lang/ast/impl/javacc/EscapeTranslator.java b/pmd-core/src/main/java/net/sourceforge/pmd/lang/ast/impl/javacc/EscapeTranslator.java new file mode 100644 index 0000000000..30431d995d --- /dev/null +++ b/pmd-core/src/main/java/net/sourceforge/pmd/lang/ast/impl/javacc/EscapeTranslator.java @@ -0,0 +1,155 @@ +/* + * BSD-style license; for more info see http://pmd.sourceforge.net/license.html + */ + +package net.sourceforge.pmd.lang.ast.impl.javacc; + +import static java.lang.Integer.min; + +import net.sourceforge.pmd.internal.util.AssertionUtil; +import net.sourceforge.pmd.lang.document.Chars; +import net.sourceforge.pmd.lang.document.FileLocation; +import net.sourceforge.pmd.lang.document.FragmentedDocBuilder; +import net.sourceforge.pmd.lang.document.TextDocument; + +/** + * An object that can translate an input document into an output document, + * typically by replacing escape sequences with the character they represent. + * + *
This is an abstract class because the default implementation does not + * perform any escape processing. Subclasses refine this behavior. + */ +@SuppressWarnings("PMD.AssignmentInOperand") +public abstract class EscapeTranslator { + // Note that this can easily be turned into a java.io.Reader with + // efficient block IO, optimized for the common case where there are + // few or no escapes. This is part of the history of this file, but + // was removed for simplicity. + + /** + * Source characters. When there is an escape, eg \ u00a0, the + * first backslash is replaced with the translated value of the + * escape. The bufpos is updated so that we read the next char + * after the escape. + */ + protected Chars input; + /** Position of the next char to read in the input. */ + protected int bufpos; + /** Keep track of adjustments to make to the offsets, caused by unicode escapes. */ + final FragmentedDocBuilder builder; + + private Chars curEscape; + private int offInEscape; + + /** + * Create a translator that will read from the given document. + * + * @param original Original document + * + * @throws NullPointerException If the parameter is null + */ + public EscapeTranslator(TextDocument original) { + AssertionUtil.requireParamNotNull("builder", original); + this.input = original.getText(); + this.bufpos = 0; + this.builder = new FragmentedDocBuilder(original); + } + + + /** + * Translate all the input in the buffer. This consumes this object. + * + * @return The translated text document. If there is no escape, returns the original text + * + * @throws IllegalStateException If this method is called more than once on the same object + * @throws MalformedSourceException If there are invalid escapes in the source + */ + public TextDocument translateDocument() throws MalformedSourceException { + ensureOpen(); + try { + return translateImpl(); + } finally { + close(); + } + } + + private TextDocument translateImpl() { + if (this.bufpos == input.length()) { + return builder.build(); + } + + final int len = input.length(); // remove Integer.MAX_VALUE + + int readChars = 0; + while (readChars < len && (this.bufpos < input.length() || curEscape != null)) { + if (curEscape != null) { + int toRead = min(len - readChars, curEscape.length() - offInEscape); + + readChars += toRead; + offInEscape += toRead; + + if (curEscape.length() == offInEscape) { + curEscape = null; + continue; + } else { + break; // len cut us off, we'll retry next time + } + } + + int bpos = this.bufpos; + int nextJump = gobbleMaxWithoutEscape(min(input.length(), bpos + len - readChars)); + int newlyReadChars = nextJump - bpos; + + assert newlyReadChars >= 0 && (readChars + newlyReadChars) <= len; + + if (newlyReadChars == 0 && nextJump == input.length()) { + // eof + break; + } + readChars += newlyReadChars; + } + return builder.build(); + } + + /** + * Returns the max offset, EXclusive, up to which we can cut the input + * array from the bufpos to dump it into the output array. + * + * @param maxOff Max offset up to which to read ahead + */ + protected int gobbleMaxWithoutEscape(int maxOff) throws MalformedSourceException { + this.bufpos = maxOff; + return maxOff; + } + + protected int recordEscape(final int startOffsetInclusive, int endOffsetExclusive, Chars translation) { + assert endOffsetExclusive > startOffsetInclusive && startOffsetInclusive >= 0; + this.builder.recordDelta(startOffsetInclusive, endOffsetExclusive, translation); + this.bufpos = endOffsetExclusive; + this.curEscape = translation; + this.offInEscape = 0; + return startOffsetInclusive; + } + + /** + * Closing a translator does not close the underlying document, it just + * clears the intermediary state. + */ + private void close() { + this.bufpos = -1; + this.input = null; + } + + + /** Check to make sure that the stream has not been closed */ + protected final void ensureOpen() { + if (input == null) { + throw new IllegalStateException("Closed"); + } + } + + protected FileLocation locationAt(int indexInInput) { + return builder.toLocation(indexInInput); + } + +} diff --git a/pmd-core/src/main/java/net/sourceforge/pmd/lang/ast/impl/javacc/JavaCharStream.java b/pmd-core/src/main/java/net/sourceforge/pmd/lang/ast/impl/javacc/JavaCharStream.java deleted file mode 100644 index 0169d8a16a..0000000000 --- a/pmd-core/src/main/java/net/sourceforge/pmd/lang/ast/impl/javacc/JavaCharStream.java +++ /dev/null @@ -1,119 +0,0 @@ -/* - * BSD-style license; for more info see http://pmd.sourceforge.net/license.html - */ - -package net.sourceforge.pmd.lang.ast.impl.javacc; - -import java.io.EOFException; -import java.io.IOException; - -import net.sourceforge.pmd.lang.document.Chars; - -/** - * This stream buffers the whole file in memory before parsing, - * and track start/end offsets of tokens. This allows building {@link JavaccToken}. - * The buffer is assumed to be composed of only ASCII characters, - * and the stream unescapes Unicode escapes. The {@link #getTokenDocument() token document} - * stores the original file with escapes and all. - */ -public class JavaCharStream extends JavaCharStreamBase { - - // full text with nothing escaped and all - private final Chars fullText; - private final JavaccTokenDocument document; - - private int[] startOffsets; - - public JavaCharStream(JavaccTokenDocument document) { - super(document.getTextDocument().newReader()); - this.fullText = document.getFullText(); - this.document = document; - this.startOffsets = new int[bufsize]; - maxNextCharInd = fullText.length(); - - nextCharBuf = null; - } - - @Override - protected void ExpandBuff(boolean wrapAround) { - int[] newStartOffsets = new int[bufsize + 2048]; - - if (wrapAround) { - System.arraycopy(startOffsets, tokenBegin, newStartOffsets, 0, bufsize - tokenBegin); - System.arraycopy(startOffsets, 0, newStartOffsets, bufsize - tokenBegin, bufpos); - startOffsets = newStartOffsets; - } else { - System.arraycopy(startOffsets, tokenBegin, newStartOffsets, 0, bufsize - tokenBegin); - startOffsets = newStartOffsets; - } - - super.ExpandBuff(wrapAround); - } - - @Override - protected void UpdateLineColumn(char c) { - startOffsets[bufpos] = nextCharInd; - super.UpdateLineColumn(c); - } - - @Override - public int getStartOffset() { - return startOffsets[tokenBegin]; - } - - @Override - public int getEndOffset() { - if (isAtEof()) { - return fullText.length(); - } else { - return startOffsets[bufpos] + 1; // + 1 for exclusive - } - } - - @Override - public JavaccTokenDocument getTokenDocument() { - return document; - } - - @Override - public String GetImage() { - if (bufpos >= tokenBegin) { - return new String(buffer, tokenBegin, bufpos - tokenBegin + 1); - } else { - return new String(buffer, tokenBegin, bufsize - tokenBegin) - + new String(buffer, 0, bufpos + 1); - } - } - - @Override - protected char ReadByte() throws IOException { - ++nextCharInd; - - if (isAtEof()) { - if (bufpos != 0) { - --bufpos; - if (bufpos < 0) { - bufpos += bufsize; - } - } else { - bufline[bufpos] = line; - bufcolumn[bufpos] = column; - startOffsets[bufpos] = fullText.length(); - } - throw new EOFException(); - } - - return fullText.charAt(nextCharInd); - } - - private boolean isAtEof() { - return nextCharInd >= fullText.length(); - } - - - @Override - protected void FillBuff() { - throw new IllegalStateException("Buffer shouldn't be refilled"); - } - -} diff --git a/pmd-core/src/main/java/net/sourceforge/pmd/lang/ast/impl/javacc/JavaEscapeTranslator.java b/pmd-core/src/main/java/net/sourceforge/pmd/lang/ast/impl/javacc/JavaEscapeTranslator.java new file mode 100644 index 0000000000..f230b15daa --- /dev/null +++ b/pmd-core/src/main/java/net/sourceforge/pmd/lang/ast/impl/javacc/JavaEscapeTranslator.java @@ -0,0 +1,94 @@ +/* + * BSD-style license; for more info see http://pmd.sourceforge.net/license.html + */ + +package net.sourceforge.pmd.lang.ast.impl.javacc; + +import net.sourceforge.pmd.lang.document.Chars; +import net.sourceforge.pmd.lang.document.TextDocument; + +/** + * An implementation of {@link EscapeTranslator} that translates Java + * unicode escapes. + */ +@SuppressWarnings("PMD.AssignmentInOperand") +public final class JavaEscapeTranslator extends BackslashEscapeTranslator { + + public JavaEscapeTranslator(TextDocument input) { + super(input); + } + + @Override + protected int handleBackslash(final int maxOff, final int firstBackslashOff) throws MalformedSourceException { + int off = firstBackslashOff; + while (off < input.length() && input.charAt(off) == '\\') { + off++; + } + + int bslashCount = off - firstBackslashOff; + // is there an escape at offset firstBslashOff? + if ((bslashCount & 1) == 1 // odd number of backslashes + && off < input.length() && input.charAt(off) == 'u') { // at least one 'u' + // this is enough to expect an escape or throw an exception + while (off < input.length() && input.charAt(off) == 'u') { + // consume all the 'u's + off++; + } + Chars value = escapeValue(firstBackslashOff, off - 1); + int endOffset = off + 4; // + 4 hex digits + return recordEscape(firstBackslashOff, endOffset, value); + } else { + return abortEscape(off, maxOff); + } + } + + private Chars escapeValue(int posOfFirstBackSlash, final int offOfTheU) throws MalformedSourceException { + int off = offOfTheU; + try { + char c = (char) + ( hexVal(input.charAt(++off)) << 12 // SUPPRESS CHECKSTYLE paren pad + | hexVal(input.charAt(++off)) << 8 + | hexVal(input.charAt(++off)) << 4 + | hexVal(input.charAt(++off)) + ); + + return Chars.wrap(Character.toString(c)); + } catch (NumberFormatException | IndexOutOfBoundsException e) { + // cut off u and 4 digits + String escape = input.substring(offOfTheU, Math.min(input.length(), offOfTheU + 5)); + throw new MalformedSourceException("Invalid unicode escape \\" + escape, e, locationAt(posOfFirstBackSlash)); + } + } + + private static int hexVal(char c) { + switch (c) { + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + return c - '0'; + case 'A': + case 'B': + case 'C': + case 'D': + case 'E': + case 'F': + return c - ('A' - 10); + case 'a': + case 'b': + case 'c': + case 'd': + case 'e': + case 'f': + return c - ('a' - 10); + default: + throw new NumberFormatException("Character '" + c + "' is not a valid hexadecimal digit"); + } + } +} diff --git a/pmd-core/src/main/java/net/sourceforge/pmd/lang/ast/impl/javacc/JavaccToken.java b/pmd-core/src/main/java/net/sourceforge/pmd/lang/ast/impl/javacc/JavaccToken.java index 28707814fc..957bb8ad14 100644 --- a/pmd-core/src/main/java/net/sourceforge/pmd/lang/ast/impl/javacc/JavaccToken.java +++ b/pmd-core/src/main/java/net/sourceforge/pmd/lang/ast/impl/javacc/JavaccToken.java @@ -4,8 +4,8 @@ package net.sourceforge.pmd.lang.ast.impl.javacc; -import net.sourceforge.pmd.lang.ast.CharStream; import net.sourceforge.pmd.lang.ast.GenericToken; +import net.sourceforge.pmd.lang.document.Chars; import net.sourceforge.pmd.lang.document.FileLocation; import net.sourceforge.pmd.lang.document.TextRegion; @@ -76,6 +76,19 @@ public class JavaccToken implements GenericToken
{ public JavaccToken specialToken; + // common constructor, with a CharSequence parameter + JavaccToken(int kind, CharSequence image, int startInclusive, int endExclusive, JavaccTokenDocument document) { + assert document != null : "Null document"; + assert image instanceof String || image instanceof Chars : "Null image"; + assert TextRegion.isValidRegion(startInclusive, endExclusive, document.getTextDocument()); + + this.kind = kind; + this.image = image; + this.startOffset = startInclusive; + this.endOffset = endExclusive; + this.document = document; + } + /** * Builds a new token of the specified kind. * @@ -85,19 +98,15 @@ public class JavaccToken implements GenericToken { * @param endExclusive End of the token in the text file (before translating escapes) * @param document Document owning the token */ - public JavaccToken(int kind, - CharSequence image, - int startInclusive, - int endExclusive, - JavaccTokenDocument document) { - assert document != null : "Null document"; - assert TextRegion.isValidRegion(startInclusive, endExclusive, document.getTextDocument()); + public JavaccToken(int kind, Chars image, int startInclusive, int endExclusive, JavaccTokenDocument document) { + this(kind, (CharSequence) image, startInclusive, endExclusive, document); + } - this.kind = kind; - this.image = image; - this.startOffset = startInclusive; - this.endOffset = endExclusive; - this.document = document; + /** + * Constructor with a {@link String} image (see {@link #JavaccToken(int, Chars, int, int, JavaccTokenDocument) the other ctor}). + */ + public JavaccToken(int kind, String image, int startInclusive, int endExclusive, JavaccTokenDocument document) { + this(kind, (CharSequence) image, startInclusive, endExclusive, document); } /** @@ -128,12 +137,18 @@ public class JavaccToken implements GenericToken { } @Override - public CharSequence getImageCs() { - return image; + public Chars getImageCs() { + // wrap it: it's zero cost (images are either Chars or String) and Chars has a nice API + return Chars.wrap(image); } @Override - public TextRegion getRegion() { + public String getImage() { + return image.toString(); + } + + @Override + public final TextRegion getRegion() { return TextRegion.fromBothOffsets(startOffset, endOffset); } @@ -171,24 +186,13 @@ public class JavaccToken implements GenericToken { public JavaccToken replaceImage(CharStream charStream) { return new JavaccToken( this.kind, - charStream.GetImage(), + charStream.getTokenImageCs(), this.startOffset, charStream.getEndOffset(), this.document ); } - public JavaccToken withImage(String image) { - return new JavaccToken( - this.kind, - image, - this.startOffset, - this.endOffset, - this.document - ); - } - - /** * Returns a new token with the given kind, and all other parameters diff --git a/pmd-core/src/main/java/net/sourceforge/pmd/lang/ast/impl/javacc/JavaccTokenDocument.java b/pmd-core/src/main/java/net/sourceforge/pmd/lang/ast/impl/javacc/JavaccTokenDocument.java index 1e9d1f6a8f..6b13d723d4 100644 --- a/pmd-core/src/main/java/net/sourceforge/pmd/lang/ast/impl/javacc/JavaccTokenDocument.java +++ b/pmd-core/src/main/java/net/sourceforge/pmd/lang/ast/impl/javacc/JavaccTokenDocument.java @@ -4,23 +4,137 @@ package net.sourceforge.pmd.lang.ast.impl.javacc; +import java.util.Collections; +import java.util.List; + import org.checkerframework.checker.nullness.qual.NonNull; import org.checkerframework.checker.nullness.qual.Nullable; -import net.sourceforge.pmd.lang.ast.CharStream; +import net.sourceforge.pmd.cpd.internal.JavaCCTokenizer; import net.sourceforge.pmd.lang.ast.impl.TokenDocument; import net.sourceforge.pmd.lang.document.TextDocument; /** * Token document for Javacc implementations. This is a helper object - * for generated token managers. + * for generated token managers. Note: the extension point is a custom + * implementation of {@link TokenDocumentBehavior}, see {@link JjtreeParserAdapter#tokenBehavior()}, + * {@link JavaCCTokenizer#tokenBehavior()} */ -public class JavaccTokenDocument extends TokenDocument { +public final class JavaccTokenDocument extends TokenDocument { + + private final TokenDocumentBehavior behavior; private JavaccToken first; - public JavaccTokenDocument(TextDocument textDocument) { + public JavaccTokenDocument(TextDocument textDocument, TokenDocumentBehavior behavior) { super(textDocument); + this.behavior = behavior; + } + + /** + * Overridable configuration of a token document. + */ + public static class TokenDocumentBehavior { + + public static final TokenDocumentBehavior DEFAULT = new TokenDocumentBehavior(Collections.emptyList()); + private final List tokenNames; + + public TokenDocumentBehavior(List tokenNames) { + this.tokenNames = tokenNames; + } + + /** + * Returns true if the lexer should accumulate the image of MORE + * tokens into the StringBuilder jjimage. This is useless in our + * current implementations, because the image of tokens can be cut + * out using text coordinates, so doesn't need to be put into a separate string. + * The default returns false, which makes {@link CharStream#appendSuffix(StringBuilder, int)} a noop. + */ + public boolean useMarkSuffix() { + return false; + } + + /** + * Translate the escapes of the source document. The default implementation + * does not perform any escaping. + * + * @param text Source doc + * + * @see EscapeTranslator + * + * TODO move that to LanguageVersionHandler once #3919 (Merge CPD and PMD language) is implemented + */ + public TextDocument translate(TextDocument text) throws MalformedSourceException { + return text; + } + + + /** + * Returns a string that describes the token kind. + * + * @param kind Kind of token + * + * @return A descriptive string + */ + public final @NonNull String describeKind(int kind) { + if (kind == JavaccToken.IMPLICIT_TOKEN) { + return " "; + } + String impl = describeKindImpl(kind); + if (impl != null) { + return impl; + } + return " "; + } + + /** + * Describe the given kind. If this returns a non-null value, then + * that's what {@link #describeKind(int)} will use. Otherwise a default + * implementation is used. + * + * An implementation typically uses the JavaCC-generated array + * named {@code
Constants.tokenImage}. Remember to + * check the bounds of the array. + * + * @param kind Kind of token + * + * @return A descriptive string, or null to use default + */ + protected @Nullable String describeKindImpl(int kind) { + if (kind >= 0 && kind < tokenNames.size()) { + return tokenNames.get(kind); + } + return null; + } + + + /** + * Creates a new token with the given kind. This is called back to + * by JavaCC-generated token managers (jjFillToken). Note that a + * created token is not guaranteed to end up in the final token chain. + * + * @param kind Kind of the token + * @param cs Char stream of the file. This can be used to get text + * coordinates and the image + * @param image Shared instance of the image token. If this is non-null, + * then no call to {@link CharStream#getTokenImage()} should be + * issued. + * + * @return A new token + */ + public JavaccToken createToken(JavaccTokenDocument self, int kind, CharStream cs, @Nullable String image) { + return new JavaccToken( + kind, + image == null ? cs.getTokenImageCs() : image, + cs.getStartOffset(), + cs.getEndOffset(), + self + ); + } + } + + boolean useMarkSuffix() { + return behavior.useMarkSuffix(); } /** @@ -52,62 +166,17 @@ public class JavaccTokenDocument extends TokenDocument { } /** - * Returns a string that describes the token kind. - * - * @param kind Kind of token - * - * @return A descriptive string + * @see TokenDocumentBehavior#describeKind(int) */ - public final @NonNull String describeKind(int kind) { - if (kind == JavaccToken.IMPLICIT_TOKEN) { - return " "; - } - String impl = describeKindImpl(kind); - if (impl != null) { - return impl; - } - return " "; + public @NonNull String describeKind(int kind) { + return behavior.describeKind(kind); } /** - * Describe the given kind. If this returns a non-null value, then - * that's what {@link #describeKind(int)} will use. Otherwise a default - * implementation is used. - * - * An implementation typically uses the JavaCC-generated array - * named {@code
Constants.tokenImage}. Remember to - * check the bounds of the array. - * - * @param kind Kind of token - * - * @return A descriptive string, or null to use default - */ - protected @Nullable String describeKindImpl(int kind) { - return null; - } - - - /** - * Creates a new token with the given kind. This is called back to - * by JavaCC-generated token managers (jjFillToken). Note that a - * created token is not guaranteed to end up in the final token chain. - * - * @param kind Kind of the token - * @param cs Char stream of the file. This can be used to get text - * coordinates and the image - * @param image Shared instance of the image token. If this is non-null, - * then no call to {@link CharStream#GetImage()} should be - * issued. - * - * @return A new token + * @see TokenDocumentBehavior#createToken(JavaccTokenDocument, int, CharStream, String) */ public JavaccToken createToken(int kind, CharStream cs, @Nullable String image) { - return new JavaccToken( - kind, - image == null ? cs.GetImage() : image, - cs.getStartOffset(), - cs.getEndOffset(), - this - ); + return behavior.createToken(this, kind, cs, image); + } } diff --git a/pmd-core/src/main/java/net/sourceforge/pmd/lang/ast/impl/javacc/JjtreeNode.java b/pmd-core/src/main/java/net/sourceforge/pmd/lang/ast/impl/javacc/JjtreeNode.java index 68e8d63028..0af5cfe313 100644 --- a/pmd-core/src/main/java/net/sourceforge/pmd/lang/ast/impl/javacc/JjtreeNode.java +++ b/pmd-core/src/main/java/net/sourceforge/pmd/lang/ast/impl/javacc/JjtreeNode.java @@ -6,8 +6,6 @@ package net.sourceforge.pmd.lang.ast.impl.javacc; import net.sourceforge.pmd.lang.ast.TextAvailableNode; import net.sourceforge.pmd.lang.ast.impl.GenericNode; -import net.sourceforge.pmd.lang.document.Chars; -import net.sourceforge.pmd.lang.document.TextRegion; import net.sourceforge.pmd.reporting.Reportable; /** @@ -19,12 +17,6 @@ import net.sourceforge.pmd.reporting.Reportable; */ public interface JjtreeNode > extends GenericNode , TextAvailableNode, Reportable { - @Override - Chars getText(); - - @Override - TextRegion getTextRegion(); - // todo token accessors should most likely be protected in PMD 7. diff --git a/pmd-core/src/main/java/net/sourceforge/pmd/lang/ast/impl/javacc/JjtreeParserAdapter.java b/pmd-core/src/main/java/net/sourceforge/pmd/lang/ast/impl/javacc/JjtreeParserAdapter.java index a25fc65089..60d701bba9 100644 --- a/pmd-core/src/main/java/net/sourceforge/pmd/lang/ast/impl/javacc/JjtreeParserAdapter.java +++ b/pmd-core/src/main/java/net/sourceforge/pmd/lang/ast/impl/javacc/JjtreeParserAdapter.java @@ -4,12 +4,10 @@ package net.sourceforge.pmd.lang.ast.impl.javacc; -import net.sourceforge.pmd.lang.ast.CharStream; +import net.sourceforge.pmd.lang.ast.FileAnalysisException; import net.sourceforge.pmd.lang.ast.ParseException; import net.sourceforge.pmd.lang.ast.Parser; import net.sourceforge.pmd.lang.ast.RootNode; -import net.sourceforge.pmd.lang.ast.TokenMgrError; -import net.sourceforge.pmd.lang.document.TextDocument; /** * Base implementation of the {@link Parser} interface for JavaCC language @@ -24,20 +22,19 @@ public abstract class JjtreeParserAdapter implements Parser // inheritance only } - protected abstract JavaccTokenDocument newDocumentImpl(TextDocument textDocument); - - protected CharStream newCharStream(JavaccTokenDocument tokenDocument) { - return new SimpleCharStream(tokenDocument); - } + protected abstract JavaccTokenDocument.TokenDocumentBehavior tokenBehavior(); @Override - public R parse(ParserTask task) throws ParseException { - JavaccTokenDocument doc = newDocumentImpl(task.getTextDocument()); - CharStream charStream = newCharStream(doc); - + public final R parse(ParserTask task) throws ParseException { try { + // First read the source file and interpret escapes + CharStream charStream = CharStream.create(task.getTextDocument(), tokenBehavior()); + // We replace the text document, so that it reflects escapes properly + // Escapes are processed by CharStream#create + task = task.withTextDocument(charStream.getTokenDocument().getTextDocument()); + // Finally, do the parsing return parseImpl(charStream, task); - } catch (TokenMgrError tme) { + } catch (FileAnalysisException tme) { throw tme.setFileName(task.getFileDisplayName()); } } diff --git a/pmd-core/src/main/java/net/sourceforge/pmd/lang/ast/impl/javacc/MalformedSourceException.java b/pmd-core/src/main/java/net/sourceforge/pmd/lang/ast/impl/javacc/MalformedSourceException.java new file mode 100644 index 0000000000..bc3471861e --- /dev/null +++ b/pmd-core/src/main/java/net/sourceforge/pmd/lang/ast/impl/javacc/MalformedSourceException.java @@ -0,0 +1,35 @@ +/* + * BSD-style license; for more info see http://pmd.sourceforge.net/license.html + */ + +package net.sourceforge.pmd.lang.ast.impl.javacc; + +import java.util.Objects; + +import net.sourceforge.pmd.lang.ast.FileAnalysisException; +import net.sourceforge.pmd.lang.document.FileLocation; + +/** + * A {@link FileAnalysisException} thrown when the source format is invalid, + * for example if some unicode escapes cannot be translated. + */ +public class MalformedSourceException extends FileAnalysisException { + + private final FileLocation location; + + public MalformedSourceException(String message, Throwable cause, FileLocation fileLocation) { + super(message, cause); + this.location = Objects.requireNonNull(fileLocation); + setFileName(fileLocation.getFileName()); + } + + @Override + protected String positionToString() { + return super.positionToString() + " at " + location.startPosToString(); + } + + @Override + protected String errorKind() { + return "Source format error"; + } +} diff --git a/pmd-core/src/main/java/net/sourceforge/pmd/lang/ast/impl/javacc/SimpleCharStream.java b/pmd-core/src/main/java/net/sourceforge/pmd/lang/ast/impl/javacc/SimpleCharStream.java deleted file mode 100644 index d453b34797..0000000000 --- a/pmd-core/src/main/java/net/sourceforge/pmd/lang/ast/impl/javacc/SimpleCharStream.java +++ /dev/null @@ -1,20 +0,0 @@ -/* - * BSD-style license; for more info see http://pmd.sourceforge.net/license.html - */ - -package net.sourceforge.pmd.lang.ast.impl.javacc; - -/** - * A char stream that does not perform unicode escaping. - */ -public class SimpleCharStream extends JavaCharStream { - - public SimpleCharStream(JavaccTokenDocument document) { - super(document); - } - - @Override - protected boolean doEscape() { - return false; - } -} diff --git a/pmd-core/src/main/java/net/sourceforge/pmd/lang/document/BaseMappedDocument.java b/pmd-core/src/main/java/net/sourceforge/pmd/lang/document/BaseMappedDocument.java new file mode 100644 index 0000000000..d6e8fc9e8e --- /dev/null +++ b/pmd-core/src/main/java/net/sourceforge/pmd/lang/document/BaseMappedDocument.java @@ -0,0 +1,113 @@ +/* + * BSD-style license; for more info see http://pmd.sourceforge.net/license.html + */ + +package net.sourceforge.pmd.lang.document; + +import java.io.IOException; + +import org.checkerframework.checker.nullness.qual.NonNull; + +/** + * Base class for documents that apply a transform to their output offsets. + * This includes translated documents, and slices (subdocument views). + */ +abstract class BaseMappedDocument implements TextDocument { + + protected final TextDocument base; + + BaseMappedDocument(TextDocument base) { + this.base = base; + } + + @Override + public long getCheckSum() { + return base.getCheckSum(); + } + + @Override + public String getPathId() { + return base.getPathId(); + } + + @Override + public String getDisplayName() { + return base.getDisplayName(); + } + + @Override + public Chars sliceOriginalText(TextRegion region) { + return base.sliceOriginalText(inputRegion(region)); + } + + @Override + public FileLocation toLocation(TextRegion region) { + return base.toLocation(inputRegion(region)); + } + + @Override + public TextRegion createLineRange(int startLineInclusive, int endLineInclusive) { + // see the doc, lines do not need to be translated + return base.createLineRange(startLineInclusive, endLineInclusive); + } + + @Override + public TextPos2d lineColumnAtOffset(int offset, boolean inclusive) { + return base.lineColumnAtOffset(inputOffset(offset, inclusive)); + } + + /** + * Translate a region given in the coordinate system of this + * document, to the coordinate system of the base document. + * This works as if creating a new region with both start and end + * offsets translated through {@link #inputOffset(int, boolean)}. The + * returned region may have a different length. + * + * @param outputRegion Output region + * + * @return Input region + */ + protected @NonNull TextRegion inputRegion(TextRegion outputRegion) { + return TextRegion.fromBothOffsets(inputOffset(outputRegion.getStartOffset(), true), + inputOffset(outputRegion.getEndOffset(), false)); + } + + /** + * Returns the input offset for the given output offset. This maps + * back an offset in the coordinate system of this document, to the + * coordinate system of the base document. This includes the + * length of any unicode escapes. + * + * + * input: "a\u00a0b" (original document) + * translated: "a b" (this document) + * + * translateOffset(0) = 0 + * translateOffset(1) = 1 + * translateOffset(2) = 7 // includes the length of the escape + *+ * + * @param outOffset Output offset + * @param inclusive Whether the offset is to be interpreted as the index of a character (true), + * or the position after a character (false) + * + * @return Input offset + */ + protected final int inputOffset(int outOffset, boolean inclusive) { + if (outOffset < 0 || outOffset > getLength()) { + throw new IndexOutOfBoundsException(); + } + return localOffsetTransform(outOffset, inclusive); + } + + /** + * Output offset to input offset. + */ + protected abstract int localOffsetTransform(int outOffset, boolean inclusive); + + + @Override + public void close() throws IOException { + base.close(); + } +} diff --git a/pmd-core/src/main/java/net/sourceforge/pmd/lang/document/Chars.java b/pmd-core/src/main/java/net/sourceforge/pmd/lang/document/Chars.java index 1e32d8ea44..a50fb5dd58 100644 --- a/pmd-core/src/main/java/net/sourceforge/pmd/lang/document/Chars.java +++ b/pmd-core/src/main/java/net/sourceforge/pmd/lang/document/Chars.java @@ -581,6 +581,16 @@ public final class Chars implements CharSequence { return StreamSupport.stream(lines().spliterator(), false); } + /** + * Returns a new stringbuilder containing the whole contents of this + * char sequence. + */ + public StringBuilder toStringBuilder() { + StringBuilder sb = new StringBuilder(length()); + appendChars(sb); + return sb; + } + /** * Returns a new reader for the whole contents of this char sequence. diff --git a/pmd-core/src/main/java/net/sourceforge/pmd/lang/document/FileLocation.java b/pmd-core/src/main/java/net/sourceforge/pmd/lang/document/FileLocation.java index 31b0482b5a..6cdf18785c 100644 --- a/pmd-core/src/main/java/net/sourceforge/pmd/lang/document/FileLocation.java +++ b/pmd-core/src/main/java/net/sourceforge/pmd/lang/document/FileLocation.java @@ -138,7 +138,7 @@ public final class FileLocation { } /** - * Creates a new location from the given parameters. + * Creates a new location for a range of text. * * @throws IllegalArgumentException If the file name is null * @throws IllegalArgumentException If any of the line/col parameters are strictly less than 1 @@ -155,6 +155,21 @@ public final class FileLocation { end.getColumn()); } + /** + * Returns a new location that starts and ends at the same position. + * + * @param fileName File name + * @param line Line number + * @param column Column number + * + * @return A new location + * + * @throws IllegalArgumentException See {@link #range(String, int, int, int, int)} + */ + public static FileLocation caret(String fileName, int line, int column) { + return new FileLocation(fileName, line, column, line, column); + } + @Override public String toString() { diff --git a/pmd-core/src/main/java/net/sourceforge/pmd/lang/document/FragmentedDocBuilder.java b/pmd-core/src/main/java/net/sourceforge/pmd/lang/document/FragmentedDocBuilder.java new file mode 100644 index 0000000000..52d98c05a0 --- /dev/null +++ b/pmd-core/src/main/java/net/sourceforge/pmd/lang/document/FragmentedDocBuilder.java @@ -0,0 +1,77 @@ +/* + * BSD-style license; for more info see http://pmd.sourceforge.net/license.html + */ + +package net.sourceforge.pmd.lang.document; + + +import net.sourceforge.pmd.lang.document.FragmentedTextDocument.Fragment; + +public final class FragmentedDocBuilder { + + private final Chars mainBuf; + private final TextDocument original; + + private Fragment lastFragment; + private Fragment firstFragment; + + private int curOffInInput; + + public FragmentedDocBuilder(TextDocument original) { + this.mainBuf = original.getText(); + this.original = original; + } + + public FileLocation toLocation(int indexInInput) { + return original.toLocation(TextRegion.caretAt(indexInInput)); + } + + /** + * Add a new fragment. + * + * @param startInInput Start (inclusive) of the overwritten text in the source + * @param endInInput End (exclusive) ... + * @param translation Characters with which the range startInInput..endInInput are overwritten. + * This may be empty. + */ + public void recordDelta(int startInInput, int endInInput, Chars translation) { + assert curOffInInput <= startInInput : "Already moved past " + curOffInInput + ", cannot add delta at " + startInInput; + assert startInInput <= endInInput : "Offsets must be ordered"; + assert translation != null : "Translation cannot be null"; + + int inLength = endInInput - startInInput; + if (firstFragment == null) { + assert lastFragment == null; + firstFragment = new Fragment(null, startInInput, mainBuf.slice(0, startInInput)); + lastFragment = new Fragment(firstFragment, inLength, translation); + curOffInInput = endInInput; + return; + } + + Fragment last = lastFragment; + int prevLen = startInInput - curOffInInput; + if (prevLen != 0) { + last = new Fragment(last, prevLen, mainBuf.slice(curOffInInput, prevLen)); + } + last = new Fragment(last, inLength, translation); + this.lastFragment = last; + this.curOffInInput = endInInput; + } + + public TextDocument build() { + if (firstFragment == null) { + // No deltas in whole document, there's a single fragment + // This is the case for > 97% of Java files (source: OpenJDK) + return original; + } else { + if (curOffInInput < mainBuf.length()) { + // there's some text left between the last fragment and the end of the doc + int remLen = mainBuf.length() - curOffInInput; + Chars remainder = mainBuf.slice(curOffInInput, remLen); + lastFragment = new Fragment(lastFragment, remLen, remainder); + } + return new FragmentedTextDocument(original, firstFragment, lastFragment); + } + } + +} diff --git a/pmd-core/src/main/java/net/sourceforge/pmd/lang/document/FragmentedTextDocument.java b/pmd-core/src/main/java/net/sourceforge/pmd/lang/document/FragmentedTextDocument.java new file mode 100644 index 0000000000..d92404eeb4 --- /dev/null +++ b/pmd-core/src/main/java/net/sourceforge/pmd/lang/document/FragmentedTextDocument.java @@ -0,0 +1,161 @@ +/* + * BSD-style license; for more info see http://pmd.sourceforge.net/license.html + */ + +package net.sourceforge.pmd.lang.document; + +import org.checkerframework.checker.nullness.qual.Nullable; + +import net.sourceforge.pmd.lang.LanguageVersion; + +/** + * A text document built as a set of deltas over another document. + */ +final class FragmentedTextDocument extends BaseMappedDocument implements TextDocument { + + private final Chars text; + + private Fragment lastAccessedFragment; + + FragmentedTextDocument(TextDocument base, Fragment firstFragment, Fragment lastFragment) { + super(base); + assert firstFragment != lastFragment; // NOPMD + this.text = toChars(firstFragment, lastFragment); + this.lastAccessedFragment = firstFragment; + } + + private static Chars toChars(Fragment firstFragment, Fragment lastFragment) { + StringBuilder sb = new StringBuilder(lastFragment.outEnd()); + Fragment f = firstFragment; + while (f != null) { + f.getChars().appendChars(sb); + f = f.next; + } + return Chars.wrap(sb); + } + + @Override + public Chars getText() { + return text; + } + + + @Override + public LanguageVersion getLanguageVersion() { + return base.getLanguageVersion(); + } + + @Override + protected int localOffsetTransform(int outOffset, boolean inclusive) { + // caching the last accessed fragment instead of doing + // a linear search is critical for performance. + Fragment f = this.lastAccessedFragment; + if (f == null) { + return outOffset; + } + + // Whether the fragment contains the offset we're looking for. + // Will be true most of the time. + boolean containsOffset = + f.outStart() <= outOffset && outOffset < f.outEnd(); + + if (!containsOffset) { + // Slow path, we must search for the fragment + // This optimisation is important, otherwise we have + // to search for very long times in some files + + if (f.outEnd() < outOffset) { // search forward + while (f.next != null && f.outEnd() < outOffset) { + f = f.next; + } + } else { // search backwards + while (f.prev != null && outOffset <= f.outStart()) { + f = f.prev; + } + } + lastAccessedFragment = f; + } + + if (inclusive && f.outEnd() == outOffset && f.next != null) { + // Inclusive means, the offset must correspond to a character in the source document. + // Here we have to skip forward to the fragment that contains the character, because + // it's not this one. + do { + f = f.next; + } while (f.next != null && f.outLen() == 0); + } + return f.outToIn(outOffset); + } + + + /** + * A delta from the original text to the translated text. This maps + * a region of the original document to some new characters. + */ + static final class Fragment { + + private final Chars chars; + + final @Nullable Fragment prev; + @Nullable Fragment next; + + private final int inStart; + private final int inLength; + private final int outStart; + + Fragment(@Nullable Fragment prev, int inLength, Chars chars) { + this.chars = chars; + this.prev = prev; + this.inLength = inLength; + if (prev != null) { + prev.next = this; + this.outStart = prev.outEnd(); + this.inStart = prev.inEnd(); + } else { + this.outStart = 0; + this.inStart = 0; + } + } + + public Chars getChars() { + return chars; + } + + int outStart() { + return outStart; + } + + int outLen() { + return chars.length(); + } + + int outEnd() { + return outStart() + outLen(); + } + + int inStart() { + return inStart; + } + + int inLen() { + return inLength; + } + + int inEnd() { + return inStart() + inLen(); + } + + int outToIn(int outOffset) { + return inStart() + outOffset - outStart(); + } + + int inToOut(int inOffset) { + return inOffset - inStart() + outStart(); + } + + @Override + public String toString() { + return "Fragment[" + inStart() + ".." + inEnd() + " -> " + outStart() + ".." + outEnd() + "]" + chars; + } + } +} diff --git a/pmd-core/src/main/java/net/sourceforge/pmd/lang/document/RootTextDocument.java b/pmd-core/src/main/java/net/sourceforge/pmd/lang/document/RootTextDocument.java index 873fba75c5..be09e85419 100644 --- a/pmd-core/src/main/java/net/sourceforge/pmd/lang/document/RootTextDocument.java +++ b/pmd-core/src/main/java/net/sourceforge/pmd/lang/document/RootTextDocument.java @@ -120,12 +120,13 @@ final class RootTextDocument extends BaseCloseable implements TextDocument { } @Override - public Chars sliceText(TextRegion region) { + public Chars sliceOriginalText(TextRegion region) { return getText().subSequence(region.getStartOffset(), region.getEndOffset()); } private static final String NOT_IN_RANGE = "Region [start=%d, end=%d[ is not in range of this document (length %d)"; private static final String INVALID_LINE_RANGE = "Line range %d..%d is not in range of this document (%d lines) (line numbers are 1-based)"; + private static final String INVALID_OFFSET = "Offset %d is not in range of this document (length %d) (offsets are 0-based)"; static IndexOutOfBoundsException invalidLineRange(int start, int end, int numLines) { return new IndexOutOfBoundsException(String.format(INVALID_LINE_RANGE, start, end, numLines)); @@ -134,4 +135,8 @@ final class RootTextDocument extends BaseCloseable implements TextDocument { static IndexOutOfBoundsException regionOutOfBounds(int start, int end, int maxLen) { return new IndexOutOfBoundsException(String.format(NOT_IN_RANGE, start, end, maxLen)); } + + static IndexOutOfBoundsException invalidOffset(int offset, int maxLen) { + return new IndexOutOfBoundsException(String.format(INVALID_OFFSET, offset, maxLen)); + } } diff --git a/pmd-core/src/main/java/net/sourceforge/pmd/lang/document/TextDocument.java b/pmd-core/src/main/java/net/sourceforge/pmd/lang/document/TextDocument.java index 6bfc817b09..153f43e360 100644 --- a/pmd-core/src/main/java/net/sourceforge/pmd/lang/document/TextDocument.java +++ b/pmd-core/src/main/java/net/sourceforge/pmd/lang/document/TextDocument.java @@ -73,6 +73,8 @@ import net.sourceforge.pmd.util.datasource.DataSource; public interface TextDocument extends Closeable { // todo logical sub-documents, to support embedded languages // ideally, just slice the text, and share the positioner + // a problem with document slices becomes reference counting for the close routine + // todo text edition (there are some reverted commits in the branch // with part of this, including a lot of tests) @@ -105,9 +107,33 @@ public interface TextDocument extends Closeable { Chars getText(); /** - * Returns a region of the {@linkplain #getText() text} as a character sequence. + * Returns a slice of the original text. Note that this is not the + * same as {@code getText().subsequence}, as if this document has + * translated escapes, the returned char slice will contain the + * untranslated escapes, whereas {@link #getText()} would return + * the translated characters. + * + * @param region A region, in the coordinate system of this document + * + * @return The slice of the original text that corresponds to the region + * + * @throws IndexOutOfBoundsException If the region is not a valid range */ - Chars sliceText(TextRegion region); + Chars sliceOriginalText(TextRegion region); + + /** + * Returns a slice of the source text. This is always equal to + * {@code getText().slice(region)}, as the text is the translated text. + * + * @param region A region, in the coordinate system of this document + * + * @return The slice of the original text that corresponds to the region + * + * @throws IndexOutOfBoundsException If the region is not a valid range + */ + default Chars sliceTranslatedText(TextRegion region) { + return getText().slice(region); + } /** @@ -117,6 +143,7 @@ public interface TextDocument extends Closeable { */ long getCheckSum(); + /** * Returns a reader over the text of this document. */ @@ -124,7 +151,6 @@ public interface TextDocument extends Closeable { return getText().newReader(); } - /** * Returns the length in characters of the {@linkplain #getText() text}. */ @@ -133,7 +159,8 @@ public interface TextDocument extends Closeable { } /** - * Returns a text region that corresponds to the entire document. + * Returns a text region that corresponds to the entire document, + * in the coordinate system of this document. */ default TextRegion getEntireRegion() { return TextRegion.fromOffsetLength(0, getLength()); @@ -143,11 +170,16 @@ public interface TextDocument extends Closeable { * Returns a region that spans the text of all the given lines. * This is intended to provide a replacement for {@link SourceCode#getSlice(int, int)}. * + *Note that, as line numbers may only be obtained from {@link #toLocation(TextRegion)}, + * and hence are line numbers of the original source, both parameters + * must be line numbers of the source text and not the translated text + * that this represents. + * * @param startLineInclusive Inclusive start line number (1-based) * @param endLineInclusive Inclusive end line number (1-based) * * @throws IndexOutOfBoundsException If the arguments do not identify - * a valid region in this document + * a valid region in the source document */ TextRegion createLineRange(int startLineInclusive, int endLineInclusive); @@ -181,6 +213,8 @@ public interface TextDocument extends Closeable { /** * Returns the line and column at the given offset. + * Both the input offset and the output range are in the coordinates + * of this document. * * @param offset A source offset (0-based), can range in {@code [0, length]}. * @param inclusive If the offset falls right after a line terminator, diff --git a/pmd-core/src/main/java/net/sourceforge/pmd/lang/document/TextFileBuilder.java b/pmd-core/src/main/java/net/sourceforge/pmd/lang/document/TextFileBuilder.java index f240b51b81..e23f3264b5 100644 --- a/pmd-core/src/main/java/net/sourceforge/pmd/lang/document/TextFileBuilder.java +++ b/pmd-core/src/main/java/net/sourceforge/pmd/lang/document/TextFileBuilder.java @@ -17,6 +17,7 @@ import net.sourceforge.pmd.lang.LanguageVersion; * A builder for a new text file. * See static methods on {@link TextFile}. */ +@SuppressWarnings("PMD.MissingStaticMethodInNonInstantiatableClass") public abstract class TextFileBuilder { protected final LanguageVersion languageVersion; diff --git a/pmd-core/src/main/java/net/sourceforge/pmd/processor/PmdRunnable.java b/pmd-core/src/main/java/net/sourceforge/pmd/processor/PmdRunnable.java index 4a79f9e516..3ae36944bd 100644 --- a/pmd-core/src/main/java/net/sourceforge/pmd/processor/PmdRunnable.java +++ b/pmd-core/src/main/java/net/sourceforge/pmd/processor/PmdRunnable.java @@ -137,6 +137,7 @@ abstract class PmdRunnable implements Runnable { handler.declareParserTaskProperties(task.getProperties()); task.getProperties().setProperty(ParserTask.COMMENT_MARKER, configuration.getSuppressMarker()); + assert task.getCommentMarker().equals(configuration.getSuppressMarker()); Parser parser = handler.getParser(); diff --git a/pmd-core/src/main/java/net/sourceforge/pmd/rules/RuleFactory.java b/pmd-core/src/main/java/net/sourceforge/pmd/rules/RuleFactory.java index a67cdfe4b8..1a08e64952 100644 --- a/pmd-core/src/main/java/net/sourceforge/pmd/rules/RuleFactory.java +++ b/pmd-core/src/main/java/net/sourceforge/pmd/rules/RuleFactory.java @@ -62,12 +62,15 @@ import com.github.oowekyala.ooxml.messages.XmlException; public class RuleFactory { private final ResourceLoader resourceLoader; + private final LanguageRegistry languageRegistry; /** * @param resourceLoader The resource loader to load the rule from jar */ - public RuleFactory(final ResourceLoader resourceLoader) { + public RuleFactory(ResourceLoader resourceLoader, + LanguageRegistry languageRegistry) { this.resourceLoader = resourceLoader; + this.languageRegistry = languageRegistry; } /** @@ -250,7 +253,7 @@ public class RuleFactory { private void setLanguage(Element ruleElement, PmdXmlReporter err, Rule rule) { String langId = SchemaConstants.LANGUAGE.getNonBlankAttribute(ruleElement, err); - Language lang = LanguageRegistry.findLanguageByTerseName(langId); + Language lang = languageRegistry.getLanguageById(langId); if (lang == null) { Attr node = SchemaConstants.LANGUAGE.getAttributeNode(ruleElement); throw err.at(node) @@ -260,7 +263,7 @@ public class RuleFactory { } private @NonNull String supportedLanguages() { - return LanguageRegistry.getLanguages().stream().map(Language::getTerseName).map(StringUtil::inSingleQuotes).collect(Collectors.joining(", ")); + return languageRegistry.commaSeparatedList(l -> StringUtil.inSingleQuotes(l.getId())); } /** diff --git a/pmd-core/src/main/java/net/sourceforge/pmd/util/CollectionUtil.java b/pmd-core/src/main/java/net/sourceforge/pmd/util/CollectionUtil.java index b708538400..af98854f0f 100644 --- a/pmd-core/src/main/java/net/sourceforge/pmd/util/CollectionUtil.java +++ b/pmd-core/src/main/java/net/sourceforge/pmd/util/CollectionUtil.java @@ -8,6 +8,7 @@ import static java.util.Arrays.asList; import static java.util.Collections.emptyIterator; import static java.util.Collections.emptyList; import static java.util.Collections.emptyMap; +import static java.util.Collections.emptySet; import static java.util.Collections.singletonList; import java.util.ArrayList; @@ -511,12 +512,12 @@ public final class CollectionUtil { /** * A collector that returns a mutable set. This contrasts with * {@link Collectors#toSet()}, which makes no guarantee about the - * mutability of the set. + * mutability of the set. The set preserves insertion order. * * @param
Type of accumulated values */ public static Collector > toMutableSet() { - return Collectors.toCollection(HashSet::new); + return Collectors.toCollection(LinkedHashSet::new); } /** @@ -531,6 +532,18 @@ public final class CollectionUtil { return Collectors.collectingAndThen(toMutableList(), Collections::unmodifiableList); } + /** + * A collector that returns an unmodifiable set. This contrasts with + * {@link Collectors#toSet()}, which makes no guarantee about the + * mutability of the set. {@code Collectors::toUnmodifiableSet} was + * only added in JDK 9. The set preserves insertion order. + * + * @param Type of accumulated values + */ + public static Collector > toUnmodifiableSet() { + return Collectors.collectingAndThen(toMutableSet(), Collections::unmodifiableSet); + } + /** * A collectors that accumulates into a persistent set. * @@ -634,6 +647,13 @@ public final class CollectionUtil { return Collections.unmodifiableList(new ArrayList<>(list)); } + public static Set defensiveUnmodifiableCopyToSet(Collection extends T> list) { + if (list.isEmpty()) { + return emptySet(); + } + return Collections.unmodifiableSet(new LinkedHashSet<>(list)); + } + /** * Like {@link String#join(CharSequence, Iterable)}, except it appends * on a preexisting {@link StringBuilder}. The result value is that StringBuilder. diff --git a/pmd-core/src/main/java/net/sourceforge/pmd/util/StringUtil.java b/pmd-core/src/main/java/net/sourceforge/pmd/util/StringUtil.java index 825c891757..ba989da286 100644 --- a/pmd-core/src/main/java/net/sourceforge/pmd/util/StringUtil.java +++ b/pmd-core/src/main/java/net/sourceforge/pmd/util/StringUtil.java @@ -13,6 +13,7 @@ import java.util.regex.Pattern; import java.util.stream.Collectors; import org.apache.commons.lang3.StringUtils; +import org.checkerframework.checker.nullness.qual.NonNull; import net.sourceforge.pmd.annotation.InternalApi; import net.sourceforge.pmd.internal.util.AssertionUtil; @@ -550,6 +551,9 @@ public final class StringUtil { return str.replaceAll("'", "''"); } + public static @NonNull String inDoubleQuotes(String expected) { + return "\"" + expected + "\""; + } public enum CaseConvention { diff --git a/pmd-core/src/main/java/net/sourceforge/pmd/util/treeexport/TreeExportCli.java b/pmd-core/src/main/java/net/sourceforge/pmd/util/treeexport/TreeExportCli.java index e8381171a1..9cd086f186 100644 --- a/pmd-core/src/main/java/net/sourceforge/pmd/util/treeexport/TreeExportCli.java +++ b/pmd-core/src/main/java/net/sourceforge/pmd/util/treeexport/TreeExportCli.java @@ -16,6 +16,7 @@ import java.util.List; import java.util.Map; import org.apache.commons.lang3.StringEscapeUtils; +import org.checkerframework.checker.nullness.qual.Nullable; import net.sourceforge.pmd.annotation.Experimental; import net.sourceforge.pmd.internal.Slf4jSimpleConfiguration; @@ -43,8 +44,8 @@ public class TreeExportCli { @Parameter(names = { "--format", "-f" }, description = "The output format.") private String format = "xml"; - @Parameter(names = { "--language", "-l" }, description = "Specify the language to use.") - private String language = LanguageRegistry.getDefaultLanguage().getTerseName(); + @Parameter(names = { "--language", "-l" }, description = "Specify the language to use.", required = true) + private @Nullable String language = null; @Parameter(names = { "--encoding", "-e" }, description = "Encoding of the source file.") private String encoding = StandardCharsets.UTF_8.name(); @DynamicParameter(names = "-P", description = "Properties for the renderer.") @@ -130,7 +131,7 @@ public class TreeExportCli { sb.append(System.lineSeparator()); sb.append("Available languages: "); - for (Language l : LanguageRegistry.getLanguages()) { + for (Language l : LanguageRegistry.PMD) { sb.append(l.getTerseName()).append(' '); } sb.append(System.lineSeparator()); @@ -176,14 +177,24 @@ public class TreeExportCli { } private void run(TreeRenderer renderer) throws IOException { + run(LanguageRegistry.PMD, renderer); + } + + private void run(LanguageRegistry registry, TreeRenderer renderer) throws IOException { printWarning(); - LanguageVersion langVersion = LanguageRegistry.findLanguageByTerseName(language).getDefaultVersion(); + Language lang = registry.getLanguageById(language); + if (lang == null) { + throw bail("Unknown language '" + language + "', one of [" + + registry.commaSeparatedList(Language::getId) + + "] was expected"); + } + + LanguageVersion langVersion = lang.getDefaultVersion(); LanguageVersionHandler languageHandler = langVersion.getLanguageVersionHandler(); Parser parser = languageHandler.getParser(); - @SuppressWarnings("PMD.CloseResource") - TextFile textFile; + @SuppressWarnings("PMD.CloseResource") TextFile textFile; if (file == null && !readStdin) { throw bail("One of --file or --read-stdin must be mentioned"); } else if (readStdin) { diff --git a/pmd-core/src/main/resources/rulesets/releases/35.xml b/pmd-core/src/main/resources/rulesets/releases/35.xml index debffe92c2..70263d0435 100644 --- a/pmd-core/src/main/resources/rulesets/releases/35.xml +++ b/pmd-core/src/main/resources/rulesets/releases/35.xml @@ -25,7 +25,9 @@ This ruleset contains links to rules that are new in PMD v3.5 +