Also add this ability for Antlr lexers, adapt TSQL

This commit is contained in:
Clément Fournier
2024-04-08 21:31:22 +02:00
parent 0cb2e37ce9
commit ab80b2443d
7 changed files with 2132 additions and 2056 deletions

View File

@ -12,6 +12,7 @@ import org.antlr.v4.runtime.Lexer;
import net.sourceforge.pmd.cpd.CpdLexer; import net.sourceforge.pmd.cpd.CpdLexer;
import net.sourceforge.pmd.lang.TokenManager; import net.sourceforge.pmd.lang.TokenManager;
import net.sourceforge.pmd.lang.ast.impl.antlr4.AntlrLexerBehavior;
import net.sourceforge.pmd.lang.ast.impl.antlr4.AntlrToken; import net.sourceforge.pmd.lang.ast.impl.antlr4.AntlrToken;
import net.sourceforge.pmd.lang.ast.impl.antlr4.AntlrTokenManager; import net.sourceforge.pmd.lang.ast.impl.antlr4.AntlrTokenManager;
import net.sourceforge.pmd.lang.document.TextDocument; import net.sourceforge.pmd.lang.document.TextDocument;
@ -23,7 +24,15 @@ public abstract class AntlrCpdLexer extends CpdLexerBase<AntlrToken> {
@Override @Override
protected final TokenManager<AntlrToken> makeLexerImpl(TextDocument doc) throws IOException { protected final TokenManager<AntlrToken> makeLexerImpl(TextDocument doc) throws IOException {
CharStream charStream = CharStreams.fromReader(doc.newReader(), doc.getFileId().getAbsolutePath()); CharStream charStream = CharStreams.fromReader(doc.newReader(), doc.getFileId().getAbsolutePath());
return new AntlrTokenManager(getLexerForSource(charStream), doc); return new AntlrTokenManager(getLexerForSource(charStream), doc, getLexerBehavior());
}
/**
* Override this method to customize some aspects of the
* lexer.
*/
protected AntlrLexerBehavior getLexerBehavior() {
return new AntlrLexerBehavior();
} }
protected abstract Lexer getLexerForSource(CharStream charStream); protected abstract Lexer getLexerForSource(CharStream charStream);

View File

@ -0,0 +1,32 @@
/**
* BSD-style license; for more info see http://pmd.sourceforge.net/license.html
*/
package net.sourceforge.pmd.lang.ast.impl.antlr4;
import org.antlr.v4.runtime.Token;
import net.sourceforge.pmd.cpd.CpdLanguageProperties;
/**
* Strategy to customize some aspects of the mapping
* from Antlr tokens to PMD/CPD tokens.
*/
public class AntlrLexerBehavior {
/**
* Return the image that the token should have, possibly applying a transformation.
* The default just returns {@link Token#getText()}.
* Transformations here are usually normalizations, for instance, mapping
* the image of all keywords to uppercase/lowercase to implement case-insensitivity,
* or replacing the image of literals by a placeholder to implement {@link CpdLanguageProperties#CPD_ANONYMIZE_LITERALS}.
*
* @param token A token from the Antlr Lexer
*
* @return The image
*/
protected String getTokenImage(Token token) {
return token.getText();
}
}

View File

@ -17,9 +17,13 @@ import net.sourceforge.pmd.lang.document.TextRegion;
*/ */
public class AntlrToken implements GenericToken<AntlrToken> { public class AntlrToken implements GenericToken<AntlrToken> {
private final Token token;
private final AntlrToken previousComment; private final AntlrToken previousComment;
private final TextDocument textDoc; private final TextDocument textDoc;
private final String image;
private final int endOffset;
private final int startOffset;
private final int channel;
private final int kind;
AntlrToken next; AntlrToken next;
@ -30,10 +34,14 @@ public class AntlrToken implements GenericToken<AntlrToken> {
* @param previousComment The previous comment * @param previousComment The previous comment
* @param textDoc The text document * @param textDoc The text document
*/ */
public AntlrToken(final Token token, final AntlrToken previousComment, TextDocument textDoc) { AntlrToken(final Token token, final AntlrToken previousComment, TextDocument textDoc, AntlrLexerBehavior behavior) {
this.token = token;
this.previousComment = previousComment; this.previousComment = previousComment;
this.textDoc = textDoc; this.textDoc = textDoc;
this.image = behavior.getTokenImage(token);
this.startOffset = token.getStartIndex();
this.endOffset = token.getStopIndex();
this.channel = token.getChannel();
this.kind = token.getType();
} }
@Override @Override
@ -48,13 +56,13 @@ public class AntlrToken implements GenericToken<AntlrToken> {
@Override @Override
public CharSequence getImageCs() { public CharSequence getImageCs() {
return token.getText(); return image;
} }
/** Returns a text region with the coordinates of this token. */ /** Returns a text region with the coordinates of this token. */
@Override @Override
public TextRegion getRegion() { public TextRegion getRegion() {
return TextRegion.fromBothOffsets(token.getStartIndex(), token.getStopIndex() + 1); return TextRegion.fromBothOffsets(startOffset, endOffset);
} }
@Override @Override
@ -74,7 +82,7 @@ public class AntlrToken implements GenericToken<AntlrToken> {
@Override @Override
public int getKind() { public int getKind() {
return token.getType(); return kind;
} }
public boolean isHidden() { public boolean isHidden() {
@ -82,6 +90,6 @@ public class AntlrToken implements GenericToken<AntlrToken> {
} }
public boolean isDefault() { public boolean isDefault() {
return token.getChannel() == Lexer.DEFAULT_TOKEN_CHANNEL; return channel == Lexer.DEFAULT_TOKEN_CHANNEL;
} }
} }

View File

@ -20,12 +20,20 @@ public class AntlrTokenManager implements TokenManager<AntlrToken> {
private final Lexer lexer; private final Lexer lexer;
private final TextDocument textDoc; private final TextDocument textDoc;
private final AntlrLexerBehavior behavior;
private AntlrToken previousToken; private AntlrToken previousToken;
public AntlrTokenManager(final Lexer lexer, final TextDocument textDocument) { public AntlrTokenManager(final Lexer lexer, final TextDocument textDocument) {
this(lexer, textDocument, new AntlrLexerBehavior());
}
public AntlrTokenManager(final Lexer lexer,
final TextDocument textDocument,
final AntlrLexerBehavior behavior) {
this.lexer = lexer; this.lexer = lexer;
this.textDoc = textDocument; this.textDoc = textDocument;
this.behavior = behavior;
resetListeners(); resetListeners();
} }
@ -40,7 +48,7 @@ public class AntlrTokenManager implements TokenManager<AntlrToken> {
private AntlrToken getNextTokenFromAnyChannel() { private AntlrToken getNextTokenFromAnyChannel() {
final AntlrToken previousComment = previousToken != null && previousToken.isHidden() ? previousToken : null; final AntlrToken previousComment = previousToken != null && previousToken.isHidden() ? previousToken : null;
final AntlrToken currentToken = new AntlrToken(lexer.nextToken(), previousComment, textDoc); final AntlrToken currentToken = new AntlrToken(lexer.nextToken(), previousComment, textDoc, this.behavior);
if (previousToken != null) { if (previousToken != null) {
previousToken.next = currentToken; previousToken.next = currentToken;
} }

View File

@ -4,10 +4,14 @@
package net.sourceforge.pmd.lang.tsql.cpd; package net.sourceforge.pmd.lang.tsql.cpd;
import java.util.Locale;
import org.antlr.v4.runtime.CharStream; import org.antlr.v4.runtime.CharStream;
import org.antlr.v4.runtime.Lexer; import org.antlr.v4.runtime.Lexer;
import org.antlr.v4.runtime.Token;
import net.sourceforge.pmd.cpd.impl.AntlrCpdLexer; import net.sourceforge.pmd.cpd.impl.AntlrCpdLexer;
import net.sourceforge.pmd.lang.ast.impl.antlr4.AntlrLexerBehavior;
import net.sourceforge.pmd.lang.tsql.ast.TSqlLexer; import net.sourceforge.pmd.lang.tsql.ast.TSqlLexer;
/** /**
@ -19,4 +23,19 @@ public class TSqlCpdLexer extends AntlrCpdLexer {
protected Lexer getLexerForSource(CharStream charStream) { protected Lexer getLexerForSource(CharStream charStream) {
return new TSqlLexer(new CaseChangingCharStream(charStream, true)); return new TSqlLexer(new CaseChangingCharStream(charStream, true));
} }
@Override
protected AntlrLexerBehavior getLexerBehavior() {
return new AntlrLexerBehavior() {
@Override
protected String getTokenImage(Token token) {
if (token.getType() == TSqlLexer.STRING) {
// This path is for case-sensitive tokens
return super.getTokenImage(token);
}
// normalize case sensitive tokens
return token.getText().toUpperCase(Locale.ROOT);
}
};
}
} }

View File

@ -1,28 +1,28 @@
[Image] or [Truncated image[ Bcol Ecol [Image] or [Truncated image[ Bcol Ecol
L1 L1
[create] 1 7 [CREATE] 1 6
[procedure] 8 17 [PROCEDURE] 8 16
[p] 18 19 [P] 18 18
[(] 20 21 [(] 20 20
[@v] 21 23 [@V] 21 22
[int] 24 27 [INT] 24 26
[)] 27 28 [)] 27 27
[as] 29 31 [AS] 29 30
[begin] 32 37 [BEGIN] 32 36
L2 L2
[declare] 2 9 [DECLARE] 2 8
[@f] 10 12 [@F] 10 11
[int] 13 16 [INT] 13 15
L3 L3
[set] 2 5 [SET] 2 4
[@f] 6 8 [@F] 6 7
[=] 9 10 [=] 9 9
[@v] 11 13 [@V] 11 12
[+] 14 15 [+] 14 14
[2] 16 17 [2] 16 16
L4 L4
[select] 2 8 [SELECT] 2 7
[@f] 9 11 [@F] 9 10
L5 L5
[end] 1 4 [END] 1 3
EOF EOF