[cpd] Fix CPD crashes about unicode escapes (#4983)

Merge pull request #4983 from oowekyala:cpd-fix-crashes
This commit is contained in:
Andreas Dangel
2024-05-02 11:13:33 +02:00
7 changed files with 157 additions and 1 deletions

View File

@@ -18,6 +18,7 @@ This is a {{ site.pmd.release_type }} release.
* core
* [#4978](https://github.com/pmd/pmd/issues/4978): \[core] Referenced Rulesets do not emit details on validation errors
* [#4983](https://github.com/pmd/pmd/pull/4983): \[cpd] Fix CPD crashes about unicode escapes
* java
* [#4973](https://github.com/pmd/pmd/pull/4973): \[java] Stop parsing Java for CPD
* java-bestpractices

View File

@@ -171,7 +171,7 @@ public final class CpdAnalysis implements AutoCloseable {
int newTokens = doTokenize(textDocument, tokenizers.get(textFile.getLanguageVersion().getLanguage()), tokens);
numberOfTokensPerFile.put(textDocument.getFileId(), newTokens);
listener.addedFile(1);
} catch (LexException | IOException e) {
} catch (IOException | FileAnalysisException e) {
if (e instanceof FileAnalysisException) { // NOPMD
((FileAnalysisException) e).setFileId(textFile.getFileId());
}

View File

@@ -10,12 +10,14 @@ import org.apache.commons.lang3.StringUtils;
import org.checkerframework.checker.nullness.qual.NonNull;
import org.checkerframework.checker.nullness.qual.Nullable;
import net.sourceforge.pmd.lang.ast.impl.javacc.MalformedSourceException;
import net.sourceforge.pmd.lang.document.FileId;
import net.sourceforge.pmd.lang.document.FileLocation;
/**
* An exception that occurs while processing a file. Subtypes include
* <ul>
* <li>{@link MalformedSourceException}: error in source format, eg invalid character escapes (in case that happens before lexing)
* <li>{@link LexException}: lexical syntax errors
* <li>{@link ParseException}: syntax errors
* <li>{@link SemanticException}: exceptions occurring after the parsing

View File

@@ -68,6 +68,9 @@ abstract class BaseMappedDocument implements TextDocument {
* @return Input region
*/
protected @NonNull TextRegion inputRegion(TextRegion outputRegion) {
if (outputRegion.isEmpty()) {
return TextRegion.caretAt(inputOffset(outputRegion.getStartOffset(), true));
}
return TextRegion.fromBothOffsets(inputOffset(outputRegion.getStartOffset(), true),
inputOffset(outputRegion.getEndOffset(), false));
}

View File

@@ -8,6 +8,11 @@ import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertTrue;
import static org.junit.jupiter.api.Assertions.fail;
import static org.mockito.Mockito.any;
import static org.mockito.Mockito.eq;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.verify;
import static org.mockito.Mockito.verifyNoMoreInteractions;
import java.io.File;
import java.io.IOException;
@@ -25,8 +30,11 @@ import org.junit.jupiter.api.condition.OS;
import org.junit.jupiter.api.io.TempDir;
import net.sourceforge.pmd.lang.DummyLanguageModule;
import net.sourceforge.pmd.lang.ast.LexException;
import net.sourceforge.pmd.lang.ast.impl.javacc.MalformedSourceException;
import net.sourceforge.pmd.lang.document.FileId;
import net.sourceforge.pmd.lang.document.TextFile;
import net.sourceforge.pmd.util.log.PmdReporter;
/**
* Unit test for {@link CpdAnalysis}
@@ -187,6 +195,39 @@ class CpdAnalysisTest {
}
}
@Test
void testNoSkipLexicalErrors() throws IOException {
PmdReporter reporter = mock(PmdReporter.class);
config.setReporter(reporter);
config.setSkipLexicalErrors(false);
try (CpdAnalysis cpd = CpdAnalysis.create(config)) {
assertTrue(cpd.files().addSourceFile(FileId.fromPathLikeString("foo.dummy"), DummyLanguageModule.CPD_THROW_LEX_EXCEPTION));
assertTrue(cpd.files().addSourceFile(FileId.fromPathLikeString("foo2.dummy"), DummyLanguageModule.CPD_THROW_MALFORMED_SOURCE_EXCEPTION));
cpd.performAnalysis();
}
verify(reporter).errorEx(eq("Error while tokenizing"), any(LexException.class));
verify(reporter).errorEx(eq("Error while tokenizing"), any(MalformedSourceException.class));
verify(reporter).errorEx(eq("Exception while running CPD"), any(IllegalStateException.class));
verifyNoMoreInteractions(reporter);
}
@Test
void testSkipLexicalErrors() throws IOException {
PmdReporter reporter = mock(PmdReporter.class);
config.setReporter(reporter);
config.setSkipLexicalErrors(true);
try (CpdAnalysis cpd = CpdAnalysis.create(config)) {
assertTrue(cpd.files().addSourceFile(FileId.fromPathLikeString("foo.dummy"), DummyLanguageModule.CPD_THROW_LEX_EXCEPTION));
assertTrue(cpd.files().addSourceFile(FileId.fromPathLikeString("foo2.dummy"), DummyLanguageModule.CPD_THROW_MALFORMED_SOURCE_EXCEPTION));
cpd.performAnalysis();
}
verify(reporter).errorEx(eq("Skipping file"), any(LexException.class));
verify(reporter).errorEx(eq("Skipping file"), any(MalformedSourceException.class));
verifyNoMoreInteractions(reporter);
}
@Test
void duplicatedFilesShouldBeSkipped() throws IOException {
String filename = "file1.dummy";

View File

@@ -6,15 +6,20 @@ package net.sourceforge.pmd.lang;
import java.util.Objects;
import net.sourceforge.pmd.cpd.AnyCpdLexer;
import net.sourceforge.pmd.cpd.CpdCapableLanguage;
import net.sourceforge.pmd.cpd.CpdLanguageProperties;
import net.sourceforge.pmd.cpd.CpdLexer;
import net.sourceforge.pmd.lang.ast.DummyNode;
import net.sourceforge.pmd.lang.ast.DummyNode.DummyRootNode;
import net.sourceforge.pmd.lang.ast.ParseException;
import net.sourceforge.pmd.lang.ast.Parser;
import net.sourceforge.pmd.lang.ast.Parser.ParserTask;
import net.sourceforge.pmd.lang.ast.impl.javacc.MalformedSourceException;
import net.sourceforge.pmd.lang.document.Chars;
import net.sourceforge.pmd.lang.document.FileLocation;
import net.sourceforge.pmd.lang.document.TextDocument;
import net.sourceforge.pmd.lang.document.TextPos2d;
import net.sourceforge.pmd.lang.document.TextRegion;
import net.sourceforge.pmd.lang.impl.SimpleLanguageModuleBase;
import net.sourceforge.pmd.reporting.RuleViolation;
@@ -29,6 +34,10 @@ public class DummyLanguageModule extends SimpleLanguageModuleBase implements Cpd
public static final String TERSE_NAME = "dummy";
private static final String PARSER_THROWS = "parserThrows";
public static final String CPD_THROW_MALFORMED_SOURCE_EXCEPTION = ":throw_malformed_source_exception:";
public static final String CPD_THROW_LEX_EXCEPTION = ":throw_lex_source_exception:";
public static final String CPD_THROW_OTHER_EXCEPTION = ":throw_other_exception:";
public DummyLanguageModule() {
super(LanguageMetadata.withId(TERSE_NAME).name(NAME).extensions("dummy", "txt")
.addVersion("1.0")
@@ -55,6 +64,29 @@ public class DummyLanguageModule extends SimpleLanguageModuleBase implements Cpd
return bundle;
}
@Override
public CpdLexer createCpdLexer(LanguagePropertyBundle bundle) {
CpdLexer base = new AnyCpdLexer();
return (doc, tokens) -> {
Chars text = doc.getText();
int offset = text.indexOf(CPD_THROW_LEX_EXCEPTION, 0);
if (offset != -1) {
TextPos2d lc = doc.lineColumnAtOffset(offset);
throw tokens.makeLexException(lc.getLine(), lc.getColumn(), "test exception", null);
}
offset = text.indexOf(CPD_THROW_MALFORMED_SOURCE_EXCEPTION, 0);
if (offset != -1) {
FileLocation lc = doc.toLocation(TextRegion.caretAt(offset));
throw new MalformedSourceException("test exception", null, lc);
}
offset = text.indexOf(CPD_THROW_OTHER_EXCEPTION, 0);
if (offset != -1) {
throw new IllegalArgumentException("test exception");
}
base.tokenize(doc, tokens);
};
}
public LanguageVersion getVersionWhereParserThrows() {
return getVersion(PARSER_THROWS);
}

View File

@@ -0,0 +1,77 @@
/*
* BSD-style license; for more info see http://pmd.sourceforge.net/license.html
*/
package net.sourceforge.pmd.lang.document;
import static net.sourceforge.pmd.lang.document.TextPos2d.pos2d;
import static org.junit.jupiter.api.Assertions.assertEquals;
import java.io.IOException;
import org.junit.jupiter.api.Test;
import net.sourceforge.pmd.lang.DummyLanguageModule;
import net.sourceforge.pmd.lang.LanguageVersion;
class FragmentedTextDocumentTest {
LanguageVersion dummyVersion = DummyLanguageModule.getInstance().getDefaultVersion();
@Test
void testSimple() throws IOException {
try (TextDocument base = TextDocument.readOnlyString("abc", dummyVersion)) {
FragmentedDocBuilder builder = new FragmentedDocBuilder(base);
builder.recordDelta(1, 2, Chars.wrap("abx"));
try (TextDocument doc = builder.build()) {
assertEquals("aabxc", doc.getText().toString());
assertEquals(pos2d(1, 1), doc.lineColumnAtOffset(0));
assertEquals(pos2d(1, 2), doc.lineColumnAtOffset(1, true));
assertEquals(pos2d(1, 3), doc.lineColumnAtOffset(2, true));
assertEquals(pos2d(1, 3), doc.lineColumnAtOffset(2, false));
assertEquals(pos2d(1, 4), doc.lineColumnAtOffset(3, true));
assertEquals(pos2d(1, 4), doc.lineColumnAtOffset(3, false));
assertEquals(pos2d(1, 4), doc.lineColumnAtOffset(5));
}
}
}
@Test
void testToLocationWithCaret() throws IOException {
try (TextDocument base = TextDocument.readOnlyString("abc", dummyVersion)) {
FragmentedDocBuilder builder = new FragmentedDocBuilder(base);
builder.recordDelta(1, 2, Chars.wrap("abx"));
try (TextDocument doc = builder.build()) {
assertEquals("aabxc", doc.getText().toString());
TextRegion region = TextRegion.caretAt(4);
assertEquals(pos2d(1, 3), doc.toLocation(region).getStartPos());
}
}
}
@Test
void testToLocationWithCaretBetweenEscapes() throws IOException {
try (TextDocument base = TextDocument.readOnlyString("aBBCCd", dummyVersion)) {
FragmentedDocBuilder builder = new FragmentedDocBuilder(base);
builder.recordDelta(1, 3, Chars.wrap("X"));
builder.recordDelta(3, 5, Chars.wrap("Y"));
try (TextDocument doc = builder.build()) {
assertEquals("aXYd", doc.getText().toString());
TextRegion region = TextRegion.caretAt(2);
assertEquals(pos2d(1, 4), doc.toLocation(region).getStartPos());
}
}
}
}