From 9af59c503b4b193adeb01718fa4ea27ac180df8c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Fournier?= Date: Tue, 1 Sep 2020 17:41:48 +0200 Subject: [PATCH] Fix linenos with multiline strings --- .../net/sourceforge/pmd/cpd/AnyTokenizer.java | 7 +++++- .../sourceforge/pmd/cpd/AnyTokenizerTest.java | 22 ++++++++++++++++++- .../pmd/lang/perl/cpd/testdata/sample.pl | 3 ++- .../pmd/lang/perl/cpd/testdata/sample.txt | 13 ++++++----- 4 files changed, 36 insertions(+), 9 deletions(-) diff --git a/pmd-core/src/main/java/net/sourceforge/pmd/cpd/AnyTokenizer.java b/pmd-core/src/main/java/net/sourceforge/pmd/cpd/AnyTokenizer.java index a2d1825af3..76e0e4ce5e 100644 --- a/pmd-core/src/main/java/net/sourceforge/pmd/cpd/AnyTokenizer.java +++ b/pmd-core/src/main/java/net/sourceforge/pmd/cpd/AnyTokenizer.java @@ -76,12 +76,17 @@ public class AnyTokenizer implements Tokenizer { continue; } + int bline = lineNo; int bcol = 1 + matcher.start() - lastLineStart; // + 1 because columns are 1 based int ecol = StringUtil.columnNumberAt(image, image.length()); // this already outputs a 1-based column if (ecol == image.length() + 1) { ecol = bcol + image.length(); // single-line token + } else { + // multiline, need to update the line count + lineNo += StringUtil.lineNumberAt(image, image.length()) - 1; + lastLineStart = matcher.start() + image.length() - ecol + 1; } - tokenEntries.add(new TokenEntry(image, sourceCode.getFileName(), lineNo, bcol, ecol)); + tokenEntries.add(new TokenEntry(image, sourceCode.getFileName(), bline, bcol, ecol)); } } finally { tokenEntries.add(TokenEntry.EOF); diff --git a/pmd-core/src/test/java/net/sourceforge/pmd/cpd/AnyTokenizerTest.java b/pmd-core/src/test/java/net/sourceforge/pmd/cpd/AnyTokenizerTest.java index bbba98a210..4e6ee6e9a7 100644 --- a/pmd-core/src/test/java/net/sourceforge/pmd/cpd/AnyTokenizerTest.java +++ b/pmd-core/src/test/java/net/sourceforge/pmd/cpd/AnyTokenizerTest.java @@ -13,6 +13,8 @@ import java.util.stream.Collectors; import org.checkerframework.checker.nullness.qual.NonNull; import org.junit.Test; +import net.sourceforge.pmd.internal.util.IteratorUtil; + public class AnyTokenizerTest { @Test @@ -27,7 +29,24 @@ public class AnyTokenizerTest { compareResult(tokenizer, "a = \"oo\\n\"", listOf("a", "=", "\"oo\\n\"", "EOF")); } - private void compareResult(AnyTokenizer tokenizer, String source, List expectedImages) { + @Test + public void testMultilineString() { + AnyTokenizer tokenizer = new AnyTokenizer("//"); + Tokens tokens = compareResult(tokenizer, "a = \"oo\n\";", listOf("a", "=", "\"oo\n\"", ";", "EOF")); + TokenEntry string = IteratorUtil.getNth(tokens.iterator(), 2); + assertEquals("\"oo\n\"", getTokenImage(string)); + assertEquals(1, string.getBeginLine()); + assertEquals(5, string.getBeginColumn()); + assertEquals(2, string.getEndColumn()); // ends on line 2 + + TokenEntry semi = IteratorUtil.getNth(tokens.iterator(), 3); + assertEquals(";", getTokenImage(semi)); + assertEquals(2, semi.getBeginLine()); + assertEquals(2, semi.getBeginColumn()); + assertEquals(3, semi.getEndColumn()); + } + + private Tokens compareResult(AnyTokenizer tokenizer, String source, List expectedImages) { SourceCode code = new SourceCode(new SourceCode.StringCodeLoader(source)); Tokens tokens = new Tokens(); tokenizer.tokenize(code, tokens); @@ -36,6 +55,7 @@ public class AnyTokenizerTest { .collect(Collectors.toList()); assertEquals(expectedImages, tokenStrings); + return tokens; } private @NonNull String getTokenImage(TokenEntry t) { diff --git a/pmd-perl/src/test/resources/net/sourceforge/pmd/lang/perl/cpd/testdata/sample.pl b/pmd-perl/src/test/resources/net/sourceforge/pmd/lang/perl/cpd/testdata/sample.pl index b7abc18a25..04925e4cc7 100644 --- a/pmd-perl/src/test/resources/net/sourceforge/pmd/lang/perl/cpd/testdata/sample.pl +++ b/pmd-perl/src/test/resources/net/sourceforge/pmd/lang/perl/cpd/testdata/sample.pl @@ -16,7 +16,8 @@ while (<>) { # s/#/\n/g; if (/YOUR_PATTERN_GOES_HERE/) { - print "Matched: |$`<$&>$'|\n"; + print 'Matched: |$ ` + <$&>$|\n'; # multiline string # If you need these for testing patterns with # memories, uncomment them as well # print " And memory one got <$1>\n"; diff --git a/pmd-perl/src/test/resources/net/sourceforge/pmd/lang/perl/cpd/testdata/sample.txt b/pmd-perl/src/test/resources/net/sourceforge/pmd/lang/perl/cpd/testdata/sample.txt index e4e548f58f..06f5c73a50 100644 --- a/pmd-perl/src/test/resources/net/sourceforge/pmd/lang/perl/cpd/testdata/sample.txt +++ b/pmd-perl/src/test/resources/net/sourceforge/pmd/lang/perl/cpd/testdata/sample.txt @@ -30,18 +30,19 @@ L18 [{] 35 36 L19 [print] 2 7 - ["Matched: |$`<$&>$'|\\n"] 8 31 - [;] 31 32 -L24 + ['Matched: |$ `\n\t<$&>$|\\n'] 8 11 +L20 + [;] 11 12 +L25 [}] 5 6 [else] 7 11 [{] 12 13 -L25 +L26 [print] 2 7 ["No match.\\n"] 8 21 [;] 21 22 -L26 +L27 [}] 5 6 -L28 +L29 [}] 1 2 EOF