Fix linenos with multiline strings

This commit is contained in:
Clément Fournier
2020-09-01 17:41:48 +02:00
parent 5f4d489ec8
commit 9af59c503b
4 changed files with 36 additions and 9 deletions

View File

@ -76,12 +76,17 @@ public class AnyTokenizer implements Tokenizer {
continue; continue;
} }
int bline = lineNo;
int bcol = 1 + matcher.start() - lastLineStart; // + 1 because columns are 1 based int bcol = 1 + matcher.start() - lastLineStart; // + 1 because columns are 1 based
int ecol = StringUtil.columnNumberAt(image, image.length()); // this already outputs a 1-based column int ecol = StringUtil.columnNumberAt(image, image.length()); // this already outputs a 1-based column
if (ecol == image.length() + 1) { if (ecol == image.length() + 1) {
ecol = bcol + image.length(); // single-line token ecol = bcol + image.length(); // single-line token
} else {
// multiline, need to update the line count
lineNo += StringUtil.lineNumberAt(image, image.length()) - 1;
lastLineStart = matcher.start() + image.length() - ecol + 1;
} }
tokenEntries.add(new TokenEntry(image, sourceCode.getFileName(), lineNo, bcol, ecol)); tokenEntries.add(new TokenEntry(image, sourceCode.getFileName(), bline, bcol, ecol));
} }
} finally { } finally {
tokenEntries.add(TokenEntry.EOF); tokenEntries.add(TokenEntry.EOF);

View File

@ -13,6 +13,8 @@ import java.util.stream.Collectors;
import org.checkerframework.checker.nullness.qual.NonNull; import org.checkerframework.checker.nullness.qual.NonNull;
import org.junit.Test; import org.junit.Test;
import net.sourceforge.pmd.internal.util.IteratorUtil;
public class AnyTokenizerTest { public class AnyTokenizerTest {
@Test @Test
@ -27,7 +29,24 @@ public class AnyTokenizerTest {
compareResult(tokenizer, "a = \"oo\\n\"", listOf("a", "=", "\"oo\\n\"", "EOF")); compareResult(tokenizer, "a = \"oo\\n\"", listOf("a", "=", "\"oo\\n\"", "EOF"));
} }
private void compareResult(AnyTokenizer tokenizer, String source, List<String> expectedImages) { @Test
public void testMultilineString() {
AnyTokenizer tokenizer = new AnyTokenizer("//");
Tokens tokens = compareResult(tokenizer, "a = \"oo\n\";", listOf("a", "=", "\"oo\n\"", ";", "EOF"));
TokenEntry string = IteratorUtil.getNth(tokens.iterator(), 2);
assertEquals("\"oo\n\"", getTokenImage(string));
assertEquals(1, string.getBeginLine());
assertEquals(5, string.getBeginColumn());
assertEquals(2, string.getEndColumn()); // ends on line 2
TokenEntry semi = IteratorUtil.getNth(tokens.iterator(), 3);
assertEquals(";", getTokenImage(semi));
assertEquals(2, semi.getBeginLine());
assertEquals(2, semi.getBeginColumn());
assertEquals(3, semi.getEndColumn());
}
private Tokens compareResult(AnyTokenizer tokenizer, String source, List<String> expectedImages) {
SourceCode code = new SourceCode(new SourceCode.StringCodeLoader(source)); SourceCode code = new SourceCode(new SourceCode.StringCodeLoader(source));
Tokens tokens = new Tokens(); Tokens tokens = new Tokens();
tokenizer.tokenize(code, tokens); tokenizer.tokenize(code, tokens);
@ -36,6 +55,7 @@ public class AnyTokenizerTest {
.collect(Collectors.toList()); .collect(Collectors.toList());
assertEquals(expectedImages, tokenStrings); assertEquals(expectedImages, tokenStrings);
return tokens;
} }
private @NonNull String getTokenImage(TokenEntry t) { private @NonNull String getTokenImage(TokenEntry t) {

View File

@ -16,7 +16,8 @@ while (<>) {
# s/#/\n/g; # s/#/\n/g;
if (/YOUR_PATTERN_GOES_HERE/) { if (/YOUR_PATTERN_GOES_HERE/) {
print "Matched: |$`<$&>$'|\n"; print 'Matched: |$ `
<$&>$|\n'; # multiline string
# If you need these for testing patterns with # If you need these for testing patterns with
# memories, uncomment them as well # memories, uncomment them as well
# print " And memory one got <$1>\n"; # print " And memory one got <$1>\n";

View File

@ -30,18 +30,19 @@ L18
[{] 35 36 [{] 35 36
L19 L19
[print] 2 7 [print] 2 7
["Matched: |$`<$&>$'|\\n"] 8 31 ['Matched: |$ `\n\t<$&>$|\\n'] 8 11
[;] 31 32 L20
L24 [;] 11 12
L25
[}] 5 6 [}] 5 6
[else] 7 11 [else] 7 11
[{] 12 13 [{] 12 13
L25 L26
[print] 2 7 [print] 2 7
["No match.\\n"] 8 21 ["No match.\\n"] 8 21
[;] 21 22 [;] 21 22
L26 L27
[}] 5 6 [}] 5 6
L28 L29
[}] 1 2 [}] 1 2
EOF EOF