From 72408ca679d4fba5175b6a5aa919777c19ecaba9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Fournier?= Date: Mon, 8 Apr 2024 20:43:34 +0200 Subject: [PATCH] Normalize image of PLSQL tokens to uppercase, reuse strings --- .../pmd/lang/plsql/ast/PLSQLParser.java | 39 ++++++++++++++++++- .../pmd/lang/plsql/cpd/PLSQLCpdLexer.java | 10 +++-- 2 files changed, 45 insertions(+), 4 deletions(-) diff --git a/pmd-plsql/src/main/java/net/sourceforge/pmd/lang/plsql/ast/PLSQLParser.java b/pmd-plsql/src/main/java/net/sourceforge/pmd/lang/plsql/ast/PLSQLParser.java index ac50bdc6a3..e61e6f2350 100644 --- a/pmd-plsql/src/main/java/net/sourceforge/pmd/lang/plsql/ast/PLSQLParser.java +++ b/pmd-plsql/src/main/java/net/sourceforge/pmd/lang/plsql/ast/PLSQLParser.java @@ -4,16 +4,53 @@ package net.sourceforge.pmd.lang.plsql.ast; +import org.checkerframework.checker.nullness.qual.Nullable; + import net.sourceforge.pmd.benchmark.TimeTracker; import net.sourceforge.pmd.lang.ast.ParseException; import net.sourceforge.pmd.lang.ast.impl.javacc.CharStream; +import net.sourceforge.pmd.lang.ast.impl.javacc.JavaccToken; +import net.sourceforge.pmd.lang.ast.impl.javacc.JavaccTokenDocument; import net.sourceforge.pmd.lang.ast.impl.javacc.JavaccTokenDocument.TokenDocumentBehavior; import net.sourceforge.pmd.lang.ast.impl.javacc.JjtreeParserAdapter; import net.sourceforge.pmd.lang.plsql.symboltable.SymbolFacade; public class PLSQLParser extends JjtreeParserAdapter { - private static final TokenDocumentBehavior TOKEN_BEHAVIOR = new TokenDocumentBehavior(PLSQLTokenKinds.TOKEN_NAMES); + // Stores images of constant string literals. + // This is to reuse the image strings for PLSQL keywords. + // JavaCC unfortunately does not store a constant image for those + // keywords because the grammar is case-insensitive. + // This optimization has the effect that the image of keyword tokens + // is always upper-case, regardless of the actual case used in the code. + // The original casing can be found by looking at the TextDocument for the file. + + // NOTE: the size of this array should be greater than the number of tokens in the file. + private static final String[] STRING_LITERAL_IMAGES_EXTRA = new String[512]; + + static { + int i = 0; + String image = PLSQLTokenKinds.describe(i); + while (image != null && i < STRING_LITERAL_IMAGES_EXTRA.length) { + if (image.startsWith("\"") && image.endsWith("\"")) { + // a string literal image, remove the quotes + image = image.substring(1, image.length() - 1); + STRING_LITERAL_IMAGES_EXTRA[i] = image; + } + i++; + } + } + + private static final TokenDocumentBehavior TOKEN_BEHAVIOR = new TokenDocumentBehavior(PLSQLTokenKinds.TOKEN_NAMES) { + @Override + public JavaccToken createToken(JavaccTokenDocument self, int kind, CharStream cs, @Nullable String image) { + if (image == null) { + // fetch another constant image if possible. + image = STRING_LITERAL_IMAGES_EXTRA[kind]; + } + return super.createToken(self, kind, cs, image); + } + }; @Override protected TokenDocumentBehavior tokenBehavior() { diff --git a/pmd-plsql/src/main/java/net/sourceforge/pmd/lang/plsql/cpd/PLSQLCpdLexer.java b/pmd-plsql/src/main/java/net/sourceforge/pmd/lang/plsql/cpd/PLSQLCpdLexer.java index cdb371aba7..0a64f5f613 100644 --- a/pmd-plsql/src/main/java/net/sourceforge/pmd/lang/plsql/cpd/PLSQLCpdLexer.java +++ b/pmd-plsql/src/main/java/net/sourceforge/pmd/lang/plsql/cpd/PLSQLCpdLexer.java @@ -51,9 +51,13 @@ public class PLSQLCpdLexer extends JavaccCpdLexer { } else if (plsqlToken.kind != PLSQLTokenKinds.CHARACTER_LITERAL && plsqlToken.kind != PLSQLTokenKinds.STRING_LITERAL && plsqlToken.kind != PLSQLTokenKinds.QUOTED_LITERAL) { - // PLSQL is case-insensitive, but of course the contents of - // string literals and the like are case-sensitive - image = image.toLowerCase(Locale.ROOT); + // PLSQL is case-insensitive, but the contents of + // string literals and the like are case-sensitive. + // Note: tokens are normalized to uppercase make CPD case-insensitive. + // We use uppercase and not lowercase because that way, PLSQL keywords + // will not be changed (they are already uppercase, see PLSQLParser), + // therefore creating less strings in memory. + image = image.toUpperCase(Locale.ROOT); } return image; }