From 43412b532bf92e5a1c3cb825cdd6d8a5a5ecf1a8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Fournier?= Date: Fri, 12 Jun 2020 18:15:33 +0200 Subject: [PATCH] Fix column problem with Antlr multiline tokens --- .../sourceforge/pmd/cpd/AntlrTokenizer.java | 2 +- .../sourceforge/pmd/cpd/token/AntlrToken.java | 72 +++++++++++++++++-- .../pmd/cpd/testData/string_multiline.txt | 4 +- 3 files changed, 68 insertions(+), 10 deletions(-) diff --git a/pmd-core/src/main/java/net/sourceforge/pmd/cpd/AntlrTokenizer.java b/pmd-core/src/main/java/net/sourceforge/pmd/cpd/AntlrTokenizer.java index 4bc13d81fa..570115dc07 100644 --- a/pmd-core/src/main/java/net/sourceforge/pmd/cpd/AntlrTokenizer.java +++ b/pmd-core/src/main/java/net/sourceforge/pmd/cpd/AntlrTokenizer.java @@ -57,7 +57,7 @@ public abstract class AntlrTokenizer implements Tokenizer { } private void processToken(final Tokens tokenEntries, final String fileName, final AntlrToken token) { - final TokenEntry tokenEntry = new TokenEntry(token.getImage(), fileName, token.getBeginLine(), token.getBeginColumn() + 1, token.getEndColumn() + 1); + final TokenEntry tokenEntry = new TokenEntry(token.getImage(), fileName, token.getBeginLine(), token.getBeginColumn(), token.getEndColumn()); tokenEntries.add(tokenEntry); } } diff --git a/pmd-core/src/main/java/net/sourceforge/pmd/cpd/token/AntlrToken.java b/pmd-core/src/main/java/net/sourceforge/pmd/cpd/token/AntlrToken.java index 5d5521c51f..0acda6759e 100644 --- a/pmd-core/src/main/java/net/sourceforge/pmd/cpd/token/AntlrToken.java +++ b/pmd-core/src/main/java/net/sourceforge/pmd/cpd/token/AntlrToken.java @@ -4,6 +4,9 @@ package net.sourceforge.pmd.cpd.token; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + import org.antlr.v4.runtime.Lexer; import org.antlr.v4.runtime.Token; @@ -14,13 +17,20 @@ import net.sourceforge.pmd.lang.ast.GenericToken; */ public class AntlrToken implements GenericToken { + private static final Pattern NEWLINE_MATCHER = Pattern.compile("\\R"); + private final Token token; private final AntlrToken previousComment; + private String text; + + private int endline; + private int endcolumn; + /** * Constructor * - * @param token The antlr token implementation + * @param token The antlr token implementation * @param previousComment The previous comment */ public AntlrToken(final Token token, final AntlrToken previousComment) { @@ -41,7 +51,10 @@ public class AntlrToken implements GenericToken { @Override public String getImage() { - return token.getText(); + if (text == null) { + text = token.getText(); + } + return text; } @Override @@ -50,18 +63,63 @@ public class AntlrToken implements GenericToken { } @Override - public int getEndLine() { - return token.getLine(); + public int getBeginColumn() { + int charPos = token.getCharPositionInLine() + 1; + assert charPos > 0; + return charPos; } + @Override - public int getBeginColumn() { - return token.getCharPositionInLine(); + public int getEndLine() { + if (endline == 0) { + computeEndCoords(); + assert endline > 0; + } + return endline; } @Override public int getEndColumn() { - return token.getCharPositionInLine() + token.getStopIndex() - token.getStartIndex(); + if (endcolumn == 0) { + computeEndCoords(); + assert endcolumn > 0; + } + return endcolumn; + } + + private void computeEndCoords() { + String image = getImage(); + if (image.length() == 1) { + // fast path for single char tokens + if (image.charAt(0) != '\n') { + this.endline = getBeginLine(); + this.endcolumn = getBeginColumn(); + } else { + this.endline = getBeginLine() + 1; + this.endcolumn = 1; + } + return; + } + + Matcher matcher = NEWLINE_MATCHER.matcher(image); + int numNls = 0; + int lastOffset = 0; + while (matcher.find()) { + // continue + numNls++; + lastOffset = matcher.end(); + } + + if (numNls == 0) { + // single line token + this.endline = this.getBeginLine(); + int length = 1 + token.getStopIndex() - token.getStartIndex(); + this.endcolumn = token.getCharPositionInLine() + length; + } else { + this.endline = this.getBeginLine() + numNls; + this.endcolumn = image.length() - lastOffset; + } } public int getKind() { diff --git a/pmd-dart/src/test/resources/net/sourceforge/pmd/cpd/testData/string_multiline.txt b/pmd-dart/src/test/resources/net/sourceforge/pmd/cpd/testData/string_multiline.txt index e122347bb1..7f3ea733b8 100644 --- a/pmd-dart/src/test/resources/net/sourceforge/pmd/cpd/testData/string_multiline.txt +++ b/pmd-dart/src/test/resources/net/sourceforge/pmd/cpd/testData/string_multiline.txt @@ -7,12 +7,12 @@ L2 [var] 5 7 [s1] 9 10 [=] 12 12 - ['''\nYou can create\nmulti-line st[ 14 69 + ['''\nYou can create\nmulti-line st[ 14 3 L7 [var] 5 7 [s2] 9 10 [=] 12 12 - ["""This is also a\nmulti-line stri[ 14 52 + ["""This is also a\nmulti-line stri[ 14 21 L9 [}] 1 1 EOF