Normalize image of PLSQL tokens to uppercase, reuse strings

This commit is contained in:
Clément Fournier
2024-04-08 20:43:34 +02:00
parent 44f29c3983
commit 72408ca679
2 changed files with 45 additions and 4 deletions

View File

@ -4,16 +4,53 @@
package net.sourceforge.pmd.lang.plsql.ast;
import org.checkerframework.checker.nullness.qual.Nullable;
import net.sourceforge.pmd.benchmark.TimeTracker;
import net.sourceforge.pmd.lang.ast.ParseException;
import net.sourceforge.pmd.lang.ast.impl.javacc.CharStream;
import net.sourceforge.pmd.lang.ast.impl.javacc.JavaccToken;
import net.sourceforge.pmd.lang.ast.impl.javacc.JavaccTokenDocument;
import net.sourceforge.pmd.lang.ast.impl.javacc.JavaccTokenDocument.TokenDocumentBehavior;
import net.sourceforge.pmd.lang.ast.impl.javacc.JjtreeParserAdapter;
import net.sourceforge.pmd.lang.plsql.symboltable.SymbolFacade;
public class PLSQLParser extends JjtreeParserAdapter<ASTInput> {
private static final TokenDocumentBehavior TOKEN_BEHAVIOR = new TokenDocumentBehavior(PLSQLTokenKinds.TOKEN_NAMES);
// Stores images of constant string literals.
// This is to reuse the image strings for PLSQL keywords.
// JavaCC unfortunately does not store a constant image for those
// keywords because the grammar is case-insensitive.
// This optimization has the effect that the image of keyword tokens
// is always upper-case, regardless of the actual case used in the code.
// The original casing can be found by looking at the TextDocument for the file.
// NOTE: the size of this array should be greater than the number of tokens in the file.
private static final String[] STRING_LITERAL_IMAGES_EXTRA = new String[512];
static {
int i = 0;
String image = PLSQLTokenKinds.describe(i);
while (image != null && i < STRING_LITERAL_IMAGES_EXTRA.length) {
if (image.startsWith("\"") && image.endsWith("\"")) {
// a string literal image, remove the quotes
image = image.substring(1, image.length() - 1);
STRING_LITERAL_IMAGES_EXTRA[i] = image;
}
i++;
}
}
private static final TokenDocumentBehavior TOKEN_BEHAVIOR = new TokenDocumentBehavior(PLSQLTokenKinds.TOKEN_NAMES) {
@Override
public JavaccToken createToken(JavaccTokenDocument self, int kind, CharStream cs, @Nullable String image) {
if (image == null) {
// fetch another constant image if possible.
image = STRING_LITERAL_IMAGES_EXTRA[kind];
}
return super.createToken(self, kind, cs, image);
}
};
@Override
protected TokenDocumentBehavior tokenBehavior() {

View File

@ -51,9 +51,13 @@ public class PLSQLCpdLexer extends JavaccCpdLexer {
} else if (plsqlToken.kind != PLSQLTokenKinds.CHARACTER_LITERAL
&& plsqlToken.kind != PLSQLTokenKinds.STRING_LITERAL
&& plsqlToken.kind != PLSQLTokenKinds.QUOTED_LITERAL) {
// PLSQL is case-insensitive, but of course the contents of
// string literals and the like are case-sensitive
image = image.toLowerCase(Locale.ROOT);
// PLSQL is case-insensitive, but the contents of
// string literals and the like are case-sensitive.
// Note: tokens are normalized to uppercase make CPD case-insensitive.
// We use uppercase and not lowercase because that way, PLSQL keywords
// will not be changed (they are already uppercase, see PLSQLParser),
// therefore creating less strings in memory.
image = image.toUpperCase(Locale.ROOT);
}
return image;
}