[cpd] Fix CPD crashes about unicode escapes (#4983)

Merge pull request #4983 from oowekyala:cpd-fix-crashes
2024-05-02 11:13:33 +02:00
parent 99aef24509 5bb6ece01a
commit 42ffb31ca0
7 changed files with 157 additions and 1 deletions
--- a/docs/pages/release_notes.md
+++ b/docs/pages/release_notes.md
@@ -18,6 +18,7 @@ This is a {{ site.pmd.release_type }} release.

 * core
  * [#4978](https://github.com/pmd/pmd/issues/4978): \[core] Referenced Rulesets do not emit details on validation errors
+  * [#4983](https://github.com/pmd/pmd/pull/4983): \[cpd] Fix CPD crashes about unicode escapes
 * java
  * [#4973](https://github.com/pmd/pmd/pull/4973): \[java] Stop parsing Java for CPD
 * java-bestpractices
--- a/pmd-core/src/main/java/net/sourceforge/pmd/cpd/CpdAnalysis.java
+++ b/pmd-core/src/main/java/net/sourceforge/pmd/cpd/CpdAnalysis.java
@@ -171,7 +171,7 @@ public final class CpdAnalysis implements AutoCloseable {
                    int newTokens = doTokenize(textDocument, tokenizers.get(textFile.getLanguageVersion().getLanguage()), tokens);
                    numberOfTokensPerFile.put(textDocument.getFileId(), newTokens);
                    listener.addedFile(1);
-                } catch (LexException | IOException e) {
+                } catch (IOException | FileAnalysisException e) {
                    if (e instanceof FileAnalysisException) { // NOPMD
                        ((FileAnalysisException) e).setFileId(textFile.getFileId());
                    }
--- a/pmd-core/src/main/java/net/sourceforge/pmd/lang/ast/FileAnalysisException.java
+++ b/pmd-core/src/main/java/net/sourceforge/pmd/lang/ast/FileAnalysisException.java
@@ -10,12 +10,14 @@ import org.apache.commons.lang3.StringUtils;
 import org.checkerframework.checker.nullness.qual.NonNull;
 import org.checkerframework.checker.nullness.qual.Nullable;

+import net.sourceforge.pmd.lang.ast.impl.javacc.MalformedSourceException;
 import net.sourceforge.pmd.lang.document.FileId;
 import net.sourceforge.pmd.lang.document.FileLocation;

 /**
 * An exception that occurs while processing a file. Subtypes include
 * <ul>
+ * <li>{@link MalformedSourceException}: error in source format, eg invalid character escapes (in case that happens before lexing)
 * <li>{@link LexException}: lexical syntax errors
 * <li>{@link ParseException}: syntax errors
 * <li>{@link SemanticException}: exceptions occurring after the parsing
--- a/pmd-core/src/main/java/net/sourceforge/pmd/lang/document/BaseMappedDocument.java
+++ b/pmd-core/src/main/java/net/sourceforge/pmd/lang/document/BaseMappedDocument.java
@@ -68,6 +68,9 @@ abstract class BaseMappedDocument implements TextDocument {
     * @return Input region
     */
    protected @NonNull TextRegion inputRegion(TextRegion outputRegion) {
+        if (outputRegion.isEmpty()) {
+            return TextRegion.caretAt(inputOffset(outputRegion.getStartOffset(), true));
+        }
        return TextRegion.fromBothOffsets(inputOffset(outputRegion.getStartOffset(), true),
                                          inputOffset(outputRegion.getEndOffset(), false));
    }
--- a/pmd-core/src/test/java/net/sourceforge/pmd/cpd/CpdAnalysisTest.java
+++ b/pmd-core/src/test/java/net/sourceforge/pmd/cpd/CpdAnalysisTest.java
@@ -8,6 +8,11 @@ import static org.junit.jupiter.api.Assertions.assertEquals;
 import static org.junit.jupiter.api.Assertions.assertFalse;
 import static org.junit.jupiter.api.Assertions.assertTrue;
 import static org.junit.jupiter.api.Assertions.fail;
+import static org.mockito.Mockito.any;
+import static org.mockito.Mockito.eq;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.verify;
+import static org.mockito.Mockito.verifyNoMoreInteractions;

 import java.io.File;
 import java.io.IOException;
@@ -25,8 +30,11 @@ import org.junit.jupiter.api.condition.OS;
 import org.junit.jupiter.api.io.TempDir;

 import net.sourceforge.pmd.lang.DummyLanguageModule;
+import net.sourceforge.pmd.lang.ast.LexException;
+import net.sourceforge.pmd.lang.ast.impl.javacc.MalformedSourceException;
 import net.sourceforge.pmd.lang.document.FileId;
 import net.sourceforge.pmd.lang.document.TextFile;
+import net.sourceforge.pmd.util.log.PmdReporter;

 /**
 * Unit test for {@link CpdAnalysis}
@@ -187,6 +195,39 @@ class CpdAnalysisTest {
        }
    }

+    @Test
+    void testNoSkipLexicalErrors() throws IOException {
+        PmdReporter reporter = mock(PmdReporter.class);
+        config.setReporter(reporter);
+
+        config.setSkipLexicalErrors(false);
+        try (CpdAnalysis cpd = CpdAnalysis.create(config)) {
+            assertTrue(cpd.files().addSourceFile(FileId.fromPathLikeString("foo.dummy"), DummyLanguageModule.CPD_THROW_LEX_EXCEPTION));
+            assertTrue(cpd.files().addSourceFile(FileId.fromPathLikeString("foo2.dummy"), DummyLanguageModule.CPD_THROW_MALFORMED_SOURCE_EXCEPTION));
+            cpd.performAnalysis();
+        }
+        verify(reporter).errorEx(eq("Error while tokenizing"), any(LexException.class));
+        verify(reporter).errorEx(eq("Error while tokenizing"), any(MalformedSourceException.class));
+        verify(reporter).errorEx(eq("Exception while running CPD"), any(IllegalStateException.class));
+        verifyNoMoreInteractions(reporter);
+    }
+
+    @Test
+    void testSkipLexicalErrors() throws IOException {
+        PmdReporter reporter = mock(PmdReporter.class);
+        config.setReporter(reporter);
+
+        config.setSkipLexicalErrors(true);
+        try (CpdAnalysis cpd = CpdAnalysis.create(config)) {
+            assertTrue(cpd.files().addSourceFile(FileId.fromPathLikeString("foo.dummy"), DummyLanguageModule.CPD_THROW_LEX_EXCEPTION));
+            assertTrue(cpd.files().addSourceFile(FileId.fromPathLikeString("foo2.dummy"), DummyLanguageModule.CPD_THROW_MALFORMED_SOURCE_EXCEPTION));
+            cpd.performAnalysis();
+        }
+        verify(reporter).errorEx(eq("Skipping file"), any(LexException.class));
+        verify(reporter).errorEx(eq("Skipping file"), any(MalformedSourceException.class));
+        verifyNoMoreInteractions(reporter);
+    }
+
    @Test
    void duplicatedFilesShouldBeSkipped() throws IOException {
        String filename = "file1.dummy";
--- a/pmd-core/src/test/java/net/sourceforge/pmd/lang/DummyLanguageModule.java
+++ b/pmd-core/src/test/java/net/sourceforge/pmd/lang/DummyLanguageModule.java
@@ -6,15 +6,20 @@ package net.sourceforge.pmd.lang;

 import java.util.Objects;

+import net.sourceforge.pmd.cpd.AnyCpdLexer;
 import net.sourceforge.pmd.cpd.CpdCapableLanguage;
 import net.sourceforge.pmd.cpd.CpdLanguageProperties;
+import net.sourceforge.pmd.cpd.CpdLexer;
 import net.sourceforge.pmd.lang.ast.DummyNode;
 import net.sourceforge.pmd.lang.ast.DummyNode.DummyRootNode;
 import net.sourceforge.pmd.lang.ast.ParseException;
 import net.sourceforge.pmd.lang.ast.Parser;
 import net.sourceforge.pmd.lang.ast.Parser.ParserTask;
+import net.sourceforge.pmd.lang.ast.impl.javacc.MalformedSourceException;
 import net.sourceforge.pmd.lang.document.Chars;
+import net.sourceforge.pmd.lang.document.FileLocation;
 import net.sourceforge.pmd.lang.document.TextDocument;
+import net.sourceforge.pmd.lang.document.TextPos2d;
 import net.sourceforge.pmd.lang.document.TextRegion;
 import net.sourceforge.pmd.lang.impl.SimpleLanguageModuleBase;
 import net.sourceforge.pmd.reporting.RuleViolation;
@@ -29,6 +34,10 @@ public class DummyLanguageModule extends SimpleLanguageModuleBase implements Cpd
    public static final String TERSE_NAME = "dummy";
    private static final String PARSER_THROWS = "parserThrows";

+    public static final String CPD_THROW_MALFORMED_SOURCE_EXCEPTION = ":throw_malformed_source_exception:";
+    public static final String CPD_THROW_LEX_EXCEPTION = ":throw_lex_source_exception:";
+    public static final String CPD_THROW_OTHER_EXCEPTION = ":throw_other_exception:";
+
    public DummyLanguageModule() {
        super(LanguageMetadata.withId(TERSE_NAME).name(NAME).extensions("dummy", "txt")
                              .addVersion("1.0")
@@ -55,6 +64,29 @@ public class DummyLanguageModule extends SimpleLanguageModuleBase implements Cpd
        return bundle;
    }

+    @Override
+    public CpdLexer createCpdLexer(LanguagePropertyBundle bundle) {
+        CpdLexer base = new AnyCpdLexer();
+        return (doc, tokens) -> {
+            Chars text = doc.getText();
+            int offset = text.indexOf(CPD_THROW_LEX_EXCEPTION, 0);
+            if (offset != -1) {
+                TextPos2d lc = doc.lineColumnAtOffset(offset);
+                throw tokens.makeLexException(lc.getLine(), lc.getColumn(), "test exception", null);
+            }
+            offset = text.indexOf(CPD_THROW_MALFORMED_SOURCE_EXCEPTION, 0);
+            if (offset != -1) {
+                FileLocation lc = doc.toLocation(TextRegion.caretAt(offset));
+                throw new MalformedSourceException("test exception", null, lc);
+            }
+            offset = text.indexOf(CPD_THROW_OTHER_EXCEPTION, 0);
+            if (offset != -1) {
+                throw new IllegalArgumentException("test exception");
+            }
+            base.tokenize(doc, tokens);
+        };
+    }
+
    public LanguageVersion getVersionWhereParserThrows() {
        return getVersion(PARSER_THROWS);
    }
--- a/pmd-core/src/test/java/net/sourceforge/pmd/lang/document/FragmentedTextDocumentTest.java
+++ b/pmd-core/src/test/java/net/sourceforge/pmd/lang/document/FragmentedTextDocumentTest.java
@@ -0,0 +1,77 @@
+/*
+ * BSD-style license; for more info see http://pmd.sourceforge.net/license.html
+ */
+
+package net.sourceforge.pmd.lang.document;
+
+import static net.sourceforge.pmd.lang.document.TextPos2d.pos2d;
+import static org.junit.jupiter.api.Assertions.assertEquals;
+
+import java.io.IOException;
+
+import org.junit.jupiter.api.Test;
+
+import net.sourceforge.pmd.lang.DummyLanguageModule;
+import net.sourceforge.pmd.lang.LanguageVersion;
+
+class FragmentedTextDocumentTest {
+
+    LanguageVersion dummyVersion = DummyLanguageModule.getInstance().getDefaultVersion();
+
+    @Test
+    void testSimple() throws IOException {
+
+        try (TextDocument base = TextDocument.readOnlyString("abc", dummyVersion)) {
+            FragmentedDocBuilder builder = new FragmentedDocBuilder(base);
+            builder.recordDelta(1, 2, Chars.wrap("abx"));
+            try (TextDocument doc = builder.build()) {
+                assertEquals("aabxc", doc.getText().toString());
+
+                assertEquals(pos2d(1, 1), doc.lineColumnAtOffset(0));
+                assertEquals(pos2d(1, 2), doc.lineColumnAtOffset(1, true));
+                assertEquals(pos2d(1, 3), doc.lineColumnAtOffset(2, true));
+                assertEquals(pos2d(1, 3), doc.lineColumnAtOffset(2, false));
+                assertEquals(pos2d(1, 4), doc.lineColumnAtOffset(3, true));
+                assertEquals(pos2d(1, 4), doc.lineColumnAtOffset(3, false));
+                assertEquals(pos2d(1, 4), doc.lineColumnAtOffset(5));
+            }
+
+        }
+
+
+    }
+
+    @Test
+    void testToLocationWithCaret() throws IOException {
+
+        try (TextDocument base = TextDocument.readOnlyString("abc", dummyVersion)) {
+            FragmentedDocBuilder builder = new FragmentedDocBuilder(base);
+            builder.recordDelta(1, 2, Chars.wrap("abx"));
+            try (TextDocument doc = builder.build()) {
+                assertEquals("aabxc", doc.getText().toString());
+
+                TextRegion region = TextRegion.caretAt(4);
+                assertEquals(pos2d(1, 3), doc.toLocation(region).getStartPos());
+            }
+
+        }
+    }
+
+    @Test
+    void testToLocationWithCaretBetweenEscapes() throws IOException {
+
+        try (TextDocument base = TextDocument.readOnlyString("aBBCCd", dummyVersion)) {
+            FragmentedDocBuilder builder = new FragmentedDocBuilder(base);
+            builder.recordDelta(1, 3, Chars.wrap("X"));
+            builder.recordDelta(3, 5, Chars.wrap("Y"));
+            try (TextDocument doc = builder.build()) {
+                assertEquals("aXYd", doc.getText().toString());
+
+                TextRegion region = TextRegion.caretAt(2);
+                assertEquals(pos2d(1, 4), doc.toLocation(region).getStartPos());
+            }
+
+        }
+    }
+
+}