Normalize token images also in PMD parser

This commit is contained in:
Clément Fournier
2024-04-21 19:11:04 +02:00
parent 95721effd4
commit 838df27a19
2 changed files with 26 additions and 23 deletions

View File

@ -4,6 +4,8 @@
package net.sourceforge.pmd.lang.plsql.ast;
import java.util.Locale;
import org.checkerframework.checker.nullness.qual.Nullable;
import net.sourceforge.pmd.benchmark.TimeTracker;
@ -13,6 +15,7 @@ import net.sourceforge.pmd.lang.ast.impl.javacc.JavaccToken;
import net.sourceforge.pmd.lang.ast.impl.javacc.JavaccTokenDocument;
import net.sourceforge.pmd.lang.ast.impl.javacc.JavaccTokenDocument.TokenDocumentBehavior;
import net.sourceforge.pmd.lang.ast.impl.javacc.JjtreeParserAdapter;
import net.sourceforge.pmd.lang.document.Chars;
import net.sourceforge.pmd.lang.plsql.symboltable.SymbolFacade;
public class PLSQLParser extends JjtreeParserAdapter<ASTInput> {
@ -46,6 +49,29 @@ public class PLSQLParser extends JjtreeParserAdapter<ASTInput> {
// fetch another constant image if possible.
image = STRING_LITERAL_IMAGES_EXTRA[kind];
}
if (image == null) {
Chars imageCs = cs.getTokenImageCs();
if (kind == PLSQLTokenKinds.IDENTIFIER && imageCs.charAt(0) == '"') {
// remove quotes to make identical to bare ID
image = imageCs.substring(1, imageCs.length() - 1);
} else {
image = imageCs.toString();
}
// PLSQL is case-insensitive, but the contents of
// string literals and the like are case-sensitive.
// Note: tokens are normalized to uppercase make CPD case-insensitive.
// We use uppercase and not lowercase because that way, PLSQL keywords
// will be returned unchanged (they are already uppercase, see PLSQLParser),
// therefore creating fewer strings in memory.
if (kind != PLSQLTokenKinds.CHARACTER_LITERAL
&& kind != PLSQLTokenKinds.STRING_LITERAL
&& kind != PLSQLTokenKinds.QUOTED_LITERAL) {
image = image.toUpperCase(Locale.ROOT);
}
}
return super.createToken(self, kind, cs, image);
}
};

View File

@ -4,15 +4,12 @@
package net.sourceforge.pmd.lang.plsql.cpd;
import java.util.Locale;
import net.sourceforge.pmd.cpd.CpdLanguageProperties;
import net.sourceforge.pmd.cpd.impl.JavaccCpdLexer;
import net.sourceforge.pmd.lang.LanguagePropertyBundle;
import net.sourceforge.pmd.lang.TokenManager;
import net.sourceforge.pmd.lang.ast.impl.javacc.CharStream;
import net.sourceforge.pmd.lang.ast.impl.javacc.JavaccToken;
import net.sourceforge.pmd.lang.document.Chars;
import net.sourceforge.pmd.lang.document.TextDocument;
import net.sourceforge.pmd.lang.plsql.ast.PLSQLTokenKinds;
@ -49,26 +46,6 @@ public class PLSQLCpdLexer extends JavaccCpdLexer {
|| plsqlToken.kind == PLSQLTokenKinds.QUOTED_LITERAL)) {
// the token kind is preserved
image = PLSQLTokenKinds.describe(plsqlToken.kind);
} else if (plsqlToken.kind != PLSQLTokenKinds.CHARACTER_LITERAL
&& plsqlToken.kind != PLSQLTokenKinds.STRING_LITERAL
&& plsqlToken.kind != PLSQLTokenKinds.QUOTED_LITERAL) {
Chars imageCs = plsqlToken.getImageCs();
if (plsqlToken.kind == PLSQLTokenKinds.IDENTIFIER && imageCs.charAt(0) == '"') {
// remove quotes to make identical to bare ID
image = imageCs.substring(1, imageCs.length() - 1);
} else {
image = plsqlToken.getImage();
}
// PLSQL is case-insensitive, but the contents of
// string literals and the like are case-sensitive.
// Note: tokens are normalized to uppercase make CPD case-insensitive.
// We use uppercase and not lowercase because that way, PLSQL keywords
// will be returned unchanged (they are already uppercase, see PLSQLParser),
// therefore creating fewer strings in memory.
image = image.toUpperCase(Locale.ROOT);
} else {
image = plsqlToken.getImage();
}