[apex] Must use case-insensitive input stream to avoid choking on Unicode escape sequences

This commit is contained in:
Willem Hajenius
2024-10-21 23:45:20 +02:00
parent 97fe389dcc
commit 61b1c372cd
2 changed files with 38 additions and 1 deletions

View File

@ -22,6 +22,7 @@ import net.sourceforge.pmd.lang.document.TextDocument;
import net.sourceforge.pmd.lang.document.TextRegion;
import io.github.apexdevtools.apexparser.ApexLexer;
import io.github.apexdevtools.apexparser.CaseInsensitiveInputStream;
@InternalApi
final class ApexCommentBuilder {
@ -103,7 +104,8 @@ final class ApexCommentBuilder {
}
private static CommentInformation extractInformationFromComments(TextDocument sourceCode, String suppressMarker) {
ApexLexer lexer = new ApexLexer(CharStreams.fromString(sourceCode.getText().toString()));
String source = sourceCode.getText().toString();
ApexLexer lexer = new ApexLexer(new CaseInsensitiveInputStream(CharStreams.fromString(source)));
List<Token> allCommentTokens = new ArrayList<>();
Map<Integer, String> suppressMap = new HashMap<>();

View File

@ -8,14 +8,18 @@ package net.sourceforge.pmd.lang.apex.ast;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertNotNull;
import org.antlr.v4.runtime.BaseErrorListener;
import org.antlr.v4.runtime.CharStream;
import org.antlr.v4.runtime.CharStreams;
import org.antlr.v4.runtime.CommonTokenStream;
import org.antlr.v4.runtime.RecognitionException;
import org.antlr.v4.runtime.Recognizer;
import org.antlr.v4.runtime.Token;
import org.junit.jupiter.api.Test;
import io.github.apexdevtools.apexparser.ApexLexer;
import io.github.apexdevtools.apexparser.ApexParser;
import io.github.apexdevtools.apexparser.CaseInsensitiveInputStream;
/**
* This is an exploration test for {@link ApexLexer}.
@ -49,4 +53,35 @@ class ApexLexerTest {
ApexParser.CompilationUnitContext compilationUnit = parser.compilationUnit();
assertNotNull(compilationUnit);
}
@Test
void testLexerUnicodeEscapes() {
String s = "'Fran\\u00E7ois'";
assertEquals(2, getLexingErrors(CharStreams.fromString(s)));
assertEquals(0, getLexingErrors(new CaseInsensitiveInputStream(CharStreams.fromString(s))));
}
private int getLexingErrors(CharStream stream) {
ApexLexer lexer = new ApexLexer(stream);
ErrorListener errorListener = new ErrorListener();
lexer.removeErrorListeners(); // Avoid distracting "token recognition error" stderr output
lexer.addErrorListener(errorListener);
CommonTokenStream tokens = new CommonTokenStream(lexer);
tokens.fill();
return errorListener.getErrorCount();
}
static class ErrorListener extends BaseErrorListener {
private int errorCount = 0;
@Override
public void syntaxError(Recognizer<?, ?> recognizer, Object offendingSymbol, int line,
int charPositionInLine, String msg, RecognitionException e) {
++errorCount;
}
public int getErrorCount() {
return errorCount;
}
}
}