Also add this ability for Antlr lexers, adapt TSQL
This commit is contained in:
@ -12,6 +12,7 @@ import org.antlr.v4.runtime.Lexer;
|
|||||||
|
|
||||||
import net.sourceforge.pmd.cpd.CpdLexer;
|
import net.sourceforge.pmd.cpd.CpdLexer;
|
||||||
import net.sourceforge.pmd.lang.TokenManager;
|
import net.sourceforge.pmd.lang.TokenManager;
|
||||||
|
import net.sourceforge.pmd.lang.ast.impl.antlr4.AntlrLexerBehavior;
|
||||||
import net.sourceforge.pmd.lang.ast.impl.antlr4.AntlrToken;
|
import net.sourceforge.pmd.lang.ast.impl.antlr4.AntlrToken;
|
||||||
import net.sourceforge.pmd.lang.ast.impl.antlr4.AntlrTokenManager;
|
import net.sourceforge.pmd.lang.ast.impl.antlr4.AntlrTokenManager;
|
||||||
import net.sourceforge.pmd.lang.document.TextDocument;
|
import net.sourceforge.pmd.lang.document.TextDocument;
|
||||||
@ -23,7 +24,15 @@ public abstract class AntlrCpdLexer extends CpdLexerBase<AntlrToken> {
|
|||||||
@Override
|
@Override
|
||||||
protected final TokenManager<AntlrToken> makeLexerImpl(TextDocument doc) throws IOException {
|
protected final TokenManager<AntlrToken> makeLexerImpl(TextDocument doc) throws IOException {
|
||||||
CharStream charStream = CharStreams.fromReader(doc.newReader(), doc.getFileId().getAbsolutePath());
|
CharStream charStream = CharStreams.fromReader(doc.newReader(), doc.getFileId().getAbsolutePath());
|
||||||
return new AntlrTokenManager(getLexerForSource(charStream), doc);
|
return new AntlrTokenManager(getLexerForSource(charStream), doc, getLexerBehavior());
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Override this method to customize some aspects of the
|
||||||
|
* lexer.
|
||||||
|
*/
|
||||||
|
protected AntlrLexerBehavior getLexerBehavior() {
|
||||||
|
return new AntlrLexerBehavior();
|
||||||
}
|
}
|
||||||
|
|
||||||
protected abstract Lexer getLexerForSource(CharStream charStream);
|
protected abstract Lexer getLexerForSource(CharStream charStream);
|
||||||
|
@ -0,0 +1,32 @@
|
|||||||
|
/**
|
||||||
|
* BSD-style license; for more info see http://pmd.sourceforge.net/license.html
|
||||||
|
*/
|
||||||
|
|
||||||
|
package net.sourceforge.pmd.lang.ast.impl.antlr4;
|
||||||
|
|
||||||
|
import org.antlr.v4.runtime.Token;
|
||||||
|
|
||||||
|
import net.sourceforge.pmd.cpd.CpdLanguageProperties;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Strategy to customize some aspects of the mapping
|
||||||
|
* from Antlr tokens to PMD/CPD tokens.
|
||||||
|
*/
|
||||||
|
public class AntlrLexerBehavior {
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Return the image that the token should have, possibly applying a transformation.
|
||||||
|
* The default just returns {@link Token#getText()}.
|
||||||
|
* Transformations here are usually normalizations, for instance, mapping
|
||||||
|
* the image of all keywords to uppercase/lowercase to implement case-insensitivity,
|
||||||
|
* or replacing the image of literals by a placeholder to implement {@link CpdLanguageProperties#CPD_ANONYMIZE_LITERALS}.
|
||||||
|
*
|
||||||
|
* @param token A token from the Antlr Lexer
|
||||||
|
*
|
||||||
|
* @return The image
|
||||||
|
*/
|
||||||
|
protected String getTokenImage(Token token) {
|
||||||
|
return token.getText();
|
||||||
|
}
|
||||||
|
}
|
@ -17,9 +17,13 @@ import net.sourceforge.pmd.lang.document.TextRegion;
|
|||||||
*/
|
*/
|
||||||
public class AntlrToken implements GenericToken<AntlrToken> {
|
public class AntlrToken implements GenericToken<AntlrToken> {
|
||||||
|
|
||||||
private final Token token;
|
|
||||||
private final AntlrToken previousComment;
|
private final AntlrToken previousComment;
|
||||||
private final TextDocument textDoc;
|
private final TextDocument textDoc;
|
||||||
|
private final String image;
|
||||||
|
private final int endOffset;
|
||||||
|
private final int startOffset;
|
||||||
|
private final int channel;
|
||||||
|
private final int kind;
|
||||||
AntlrToken next;
|
AntlrToken next;
|
||||||
|
|
||||||
|
|
||||||
@ -30,10 +34,14 @@ public class AntlrToken implements GenericToken<AntlrToken> {
|
|||||||
* @param previousComment The previous comment
|
* @param previousComment The previous comment
|
||||||
* @param textDoc The text document
|
* @param textDoc The text document
|
||||||
*/
|
*/
|
||||||
public AntlrToken(final Token token, final AntlrToken previousComment, TextDocument textDoc) {
|
AntlrToken(final Token token, final AntlrToken previousComment, TextDocument textDoc, AntlrLexerBehavior behavior) {
|
||||||
this.token = token;
|
|
||||||
this.previousComment = previousComment;
|
this.previousComment = previousComment;
|
||||||
this.textDoc = textDoc;
|
this.textDoc = textDoc;
|
||||||
|
this.image = behavior.getTokenImage(token);
|
||||||
|
this.startOffset = token.getStartIndex();
|
||||||
|
this.endOffset = token.getStopIndex();
|
||||||
|
this.channel = token.getChannel();
|
||||||
|
this.kind = token.getType();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@ -48,13 +56,13 @@ public class AntlrToken implements GenericToken<AntlrToken> {
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
public CharSequence getImageCs() {
|
public CharSequence getImageCs() {
|
||||||
return token.getText();
|
return image;
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Returns a text region with the coordinates of this token. */
|
/** Returns a text region with the coordinates of this token. */
|
||||||
@Override
|
@Override
|
||||||
public TextRegion getRegion() {
|
public TextRegion getRegion() {
|
||||||
return TextRegion.fromBothOffsets(token.getStartIndex(), token.getStopIndex() + 1);
|
return TextRegion.fromBothOffsets(startOffset, endOffset);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@ -74,7 +82,7 @@ public class AntlrToken implements GenericToken<AntlrToken> {
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int getKind() {
|
public int getKind() {
|
||||||
return token.getType();
|
return kind;
|
||||||
}
|
}
|
||||||
|
|
||||||
public boolean isHidden() {
|
public boolean isHidden() {
|
||||||
@ -82,6 +90,6 @@ public class AntlrToken implements GenericToken<AntlrToken> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
public boolean isDefault() {
|
public boolean isDefault() {
|
||||||
return token.getChannel() == Lexer.DEFAULT_TOKEN_CHANNEL;
|
return channel == Lexer.DEFAULT_TOKEN_CHANNEL;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -20,12 +20,20 @@ public class AntlrTokenManager implements TokenManager<AntlrToken> {
|
|||||||
|
|
||||||
private final Lexer lexer;
|
private final Lexer lexer;
|
||||||
private final TextDocument textDoc;
|
private final TextDocument textDoc;
|
||||||
|
private final AntlrLexerBehavior behavior;
|
||||||
private AntlrToken previousToken;
|
private AntlrToken previousToken;
|
||||||
|
|
||||||
|
|
||||||
public AntlrTokenManager(final Lexer lexer, final TextDocument textDocument) {
|
public AntlrTokenManager(final Lexer lexer, final TextDocument textDocument) {
|
||||||
|
this(lexer, textDocument, new AntlrLexerBehavior());
|
||||||
|
}
|
||||||
|
|
||||||
|
public AntlrTokenManager(final Lexer lexer,
|
||||||
|
final TextDocument textDocument,
|
||||||
|
final AntlrLexerBehavior behavior) {
|
||||||
this.lexer = lexer;
|
this.lexer = lexer;
|
||||||
this.textDoc = textDocument;
|
this.textDoc = textDocument;
|
||||||
|
this.behavior = behavior;
|
||||||
resetListeners();
|
resetListeners();
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -40,7 +48,7 @@ public class AntlrTokenManager implements TokenManager<AntlrToken> {
|
|||||||
|
|
||||||
private AntlrToken getNextTokenFromAnyChannel() {
|
private AntlrToken getNextTokenFromAnyChannel() {
|
||||||
final AntlrToken previousComment = previousToken != null && previousToken.isHidden() ? previousToken : null;
|
final AntlrToken previousComment = previousToken != null && previousToken.isHidden() ? previousToken : null;
|
||||||
final AntlrToken currentToken = new AntlrToken(lexer.nextToken(), previousComment, textDoc);
|
final AntlrToken currentToken = new AntlrToken(lexer.nextToken(), previousComment, textDoc, this.behavior);
|
||||||
if (previousToken != null) {
|
if (previousToken != null) {
|
||||||
previousToken.next = currentToken;
|
previousToken.next = currentToken;
|
||||||
}
|
}
|
||||||
|
@ -4,10 +4,14 @@
|
|||||||
|
|
||||||
package net.sourceforge.pmd.lang.tsql.cpd;
|
package net.sourceforge.pmd.lang.tsql.cpd;
|
||||||
|
|
||||||
|
import java.util.Locale;
|
||||||
|
|
||||||
import org.antlr.v4.runtime.CharStream;
|
import org.antlr.v4.runtime.CharStream;
|
||||||
import org.antlr.v4.runtime.Lexer;
|
import org.antlr.v4.runtime.Lexer;
|
||||||
|
import org.antlr.v4.runtime.Token;
|
||||||
|
|
||||||
import net.sourceforge.pmd.cpd.impl.AntlrCpdLexer;
|
import net.sourceforge.pmd.cpd.impl.AntlrCpdLexer;
|
||||||
|
import net.sourceforge.pmd.lang.ast.impl.antlr4.AntlrLexerBehavior;
|
||||||
import net.sourceforge.pmd.lang.tsql.ast.TSqlLexer;
|
import net.sourceforge.pmd.lang.tsql.ast.TSqlLexer;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -19,4 +23,19 @@ public class TSqlCpdLexer extends AntlrCpdLexer {
|
|||||||
protected Lexer getLexerForSource(CharStream charStream) {
|
protected Lexer getLexerForSource(CharStream charStream) {
|
||||||
return new TSqlLexer(new CaseChangingCharStream(charStream, true));
|
return new TSqlLexer(new CaseChangingCharStream(charStream, true));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected AntlrLexerBehavior getLexerBehavior() {
|
||||||
|
return new AntlrLexerBehavior() {
|
||||||
|
@Override
|
||||||
|
protected String getTokenImage(Token token) {
|
||||||
|
if (token.getType() == TSqlLexer.STRING) {
|
||||||
|
// This path is for case-sensitive tokens
|
||||||
|
return super.getTokenImage(token);
|
||||||
|
}
|
||||||
|
// normalize case sensitive tokens
|
||||||
|
return token.getText().toUpperCase(Locale.ROOT);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
File diff suppressed because it is too large
Load Diff
@ -1,28 +1,28 @@
|
|||||||
[Image] or [Truncated image[ Bcol Ecol
|
[Image] or [Truncated image[ Bcol Ecol
|
||||||
L1
|
L1
|
||||||
[create] 1 7
|
[CREATE] 1 6
|
||||||
[procedure] 8 17
|
[PROCEDURE] 8 16
|
||||||
[p] 18 19
|
[P] 18 18
|
||||||
[(] 20 21
|
[(] 20 20
|
||||||
[@v] 21 23
|
[@V] 21 22
|
||||||
[int] 24 27
|
[INT] 24 26
|
||||||
[)] 27 28
|
[)] 27 27
|
||||||
[as] 29 31
|
[AS] 29 30
|
||||||
[begin] 32 37
|
[BEGIN] 32 36
|
||||||
L2
|
L2
|
||||||
[declare] 2 9
|
[DECLARE] 2 8
|
||||||
[@f] 10 12
|
[@F] 10 11
|
||||||
[int] 13 16
|
[INT] 13 15
|
||||||
L3
|
L3
|
||||||
[set] 2 5
|
[SET] 2 4
|
||||||
[@f] 6 8
|
[@F] 6 7
|
||||||
[=] 9 10
|
[=] 9 9
|
||||||
[@v] 11 13
|
[@V] 11 12
|
||||||
[+] 14 15
|
[+] 14 14
|
||||||
[2] 16 17
|
[2] 16 16
|
||||||
L4
|
L4
|
||||||
[select] 2 8
|
[SELECT] 2 7
|
||||||
[@f] 9 11
|
[@F] 9 10
|
||||||
L5
|
L5
|
||||||
[end] 1 4
|
[END] 1 3
|
||||||
EOF
|
EOF
|
||||||
|
Reference in New Issue
Block a user