Normalize image of PLSQL tokens to uppercase, reuse strings

2024-04-08 20:43:34 +02:00
parent 44f29c3983
commit 72408ca679
2 changed files with 45 additions and 4 deletions
--- a/pmd-plsql/src/main/java/net/sourceforge/pmd/lang/plsql/ast/PLSQLParser.java
+++ b/pmd-plsql/src/main/java/net/sourceforge/pmd/lang/plsql/ast/PLSQLParser.java
@ -4,16 +4,53 @@

 package net.sourceforge.pmd.lang.plsql.ast;

+import org.checkerframework.checker.nullness.qual.Nullable;
+
 import net.sourceforge.pmd.benchmark.TimeTracker;
 import net.sourceforge.pmd.lang.ast.ParseException;
 import net.sourceforge.pmd.lang.ast.impl.javacc.CharStream;
+import net.sourceforge.pmd.lang.ast.impl.javacc.JavaccToken;
+import net.sourceforge.pmd.lang.ast.impl.javacc.JavaccTokenDocument;
 import net.sourceforge.pmd.lang.ast.impl.javacc.JavaccTokenDocument.TokenDocumentBehavior;
 import net.sourceforge.pmd.lang.ast.impl.javacc.JjtreeParserAdapter;
 import net.sourceforge.pmd.lang.plsql.symboltable.SymbolFacade;

 public class PLSQLParser extends JjtreeParserAdapter<ASTInput> {

-    private static final TokenDocumentBehavior TOKEN_BEHAVIOR = new TokenDocumentBehavior(PLSQLTokenKinds.TOKEN_NAMES);
+    // Stores images of constant string literals.
+    // This is to reuse the image strings for PLSQL keywords.
+    // JavaCC unfortunately does not store a constant image for those
+    // keywords because the grammar is case-insensitive.
+    // This optimization has the effect that the image of keyword tokens
+    // is always upper-case, regardless of the actual case used in the code.
+    // The original casing can be found by looking at the TextDocument for the file.
+
+    // NOTE: the size of this array should be greater than the number of tokens in the file.
+    private static final String[] STRING_LITERAL_IMAGES_EXTRA = new String[512];
+
+    static {
+        int i = 0;
+        String image = PLSQLTokenKinds.describe(i);
+        while (image != null && i < STRING_LITERAL_IMAGES_EXTRA.length) {
+            if (image.startsWith("\"") && image.endsWith("\"")) {
+                // a string literal image, remove the quotes
+                image = image.substring(1, image.length() - 1);
+                STRING_LITERAL_IMAGES_EXTRA[i] = image;
+            }
+            i++;
+        }
+    }
+
+    private static final TokenDocumentBehavior TOKEN_BEHAVIOR = new TokenDocumentBehavior(PLSQLTokenKinds.TOKEN_NAMES) {
+        @Override
+        public JavaccToken createToken(JavaccTokenDocument self, int kind, CharStream cs, @Nullable String image) {
+            if (image == null) {
+                // fetch another constant image if possible.
+                image = STRING_LITERAL_IMAGES_EXTRA[kind];
+            }
+            return super.createToken(self, kind, cs, image);
+        }
+    };

    @Override
    protected TokenDocumentBehavior tokenBehavior() {
--- a/pmd-plsql/src/main/java/net/sourceforge/pmd/lang/plsql/cpd/PLSQLCpdLexer.java
+++ b/pmd-plsql/src/main/java/net/sourceforge/pmd/lang/plsql/cpd/PLSQLCpdLexer.java
@ -51,9 +51,13 @@ public class PLSQLCpdLexer extends JavaccCpdLexer {
        } else if (plsqlToken.kind != PLSQLTokenKinds.CHARACTER_LITERAL
            && plsqlToken.kind != PLSQLTokenKinds.STRING_LITERAL
            && plsqlToken.kind != PLSQLTokenKinds.QUOTED_LITERAL) {
-            // PLSQL is case-insensitive, but of course the contents of
-            // string literals and the like are case-sensitive
-            image = image.toLowerCase(Locale.ROOT);
+            // PLSQL is case-insensitive, but the contents of
+            // string literals and the like are case-sensitive.
+            // Note: tokens are normalized to uppercase make CPD case-insensitive.
+            // We use uppercase and not lowercase because that way, PLSQL keywords
+            // will not be changed (they are already uppercase, see PLSQLParser),
+            // therefore creating less strings in memory.
+            image = image.toUpperCase(Locale.ROOT);
        }
        return image;
    }