Add perl tests

This commit is contained in:
Clément Fournier
2020-09-01 17:29:35 +02:00
parent bc4b008b6e
commit 5331be392b
6 changed files with 128 additions and 7 deletions

View File

@ -29,8 +29,8 @@ public class AnyTokenizer implements Tokenizer {
"\\w++" // either a word
+ eolCommentFragment(singleLineCommentStart) // a comment
+ "|[^\"'\\s]" // a single separator char
+ "|\"(?:[^\"\\\\]++|\\\\\")*+\"" // a double-quoted string
+ "|'(?:[^'\\\\]++|\\\\')*+'" // a single-quoted string
+ "|\"(?:[^\"\\\\]++|\\\\.)*+\"" // a double-quoted string
+ "|'(?:[^'\\\\]++|\\\\.)*+'" // a single-quoted string
+ "|\n" // or a newline (to count lines), note that sourcecode normalizes line endings
);
}
@ -55,7 +55,7 @@ public class AnyTokenizer implements Tokenizer {
if (StringUtils.isBlank(start)) {
return "";
} else {
return "|(?:" + Pattern.quote(start) + "[^\n]++)"; // note: sourcecode normalizes line endings
return "|(?:" + Pattern.quote(start) + "[^\n]*+)"; // note: sourcecode normalizes line endings
}
}
@ -78,6 +78,9 @@ public class AnyTokenizer implements Tokenizer {
int bcol = 1 + matcher.start() - lastLineStart; // + 1 because columns are 1 based
int ecol = StringUtil.columnNumberAt(image, image.length()); // this already outputs a 1-based column
if (ecol == image.length() + 1) {
ecol = bcol + image.length(); // single-line token
}
tokenEntries.add(new TokenEntry(image, sourceCode.getFileName(), lineNo, bcol, ecol));
}
} finally {

View File

@ -18,15 +18,24 @@ public class AnyTokenizerTest {
@Test
public void testMultiLineMacros() {
AnyTokenizer tokenizer = new AnyTokenizer("//");
SourceCode code = new SourceCode(new SourceCode.StringCodeLoader(TEST1));
compareResult(tokenizer, TEST1, EXPECTED);
}
@Test
public void testStringEscape() {
AnyTokenizer tokenizer = new AnyTokenizer("//");
compareResult(tokenizer, "a = \"oo\\n\"", listOf("a", "=", "\"oo\\n\"", "EOF"));
}
private void compareResult(AnyTokenizer tokenizer, String source, List<String> expectedImages) {
SourceCode code = new SourceCode(new SourceCode.StringCodeLoader(source));
Tokens tokens = new Tokens();
tokenizer.tokenize(code, tokens);
assertEquals(31, tokens.size());
List<String> tokenStrings = tokens.getTokens().stream()
.map(this::getTokenImage)
.collect(Collectors.toList());
assertEquals(EXPECTED, tokenStrings);
assertEquals(expectedImages, tokenStrings);
}
private @NonNull String getTokenImage(TokenEntry t) {

View File

@ -6,6 +6,6 @@ package net.sourceforge.pmd.cpd;
public class PerlLanguage extends AbstractLanguage {
public PerlLanguage() {
super("Perl", "perl", new PerlTokenizer(), ".pm", ".pl", ".t");
super("Perl", "perl", new AnyTokenizer("#"), ".pm", ".pl", ".t");
}
}

View File

@ -0,0 +1,34 @@
/*
* BSD-style license; for more info see http://pmd.sourceforge.net/license.html
*/
package net.sourceforge.pmd.lang.perl.cpd;
import java.util.Properties;
import org.checkerframework.checker.nullness.qual.NonNull;
import org.junit.Test;
import net.sourceforge.pmd.cpd.PerlLanguage;
import net.sourceforge.pmd.cpd.Tokenizer;
import net.sourceforge.pmd.cpd.test.CpdTextComparisonTest;
/**
*
*/
public class PerlTokenizerTest extends CpdTextComparisonTest {
public PerlTokenizerTest() {
super(".pl");
}
@Override
public Tokenizer newTokenizer(@NonNull Properties properties) {
return new PerlLanguage().getTokenizer();
}
@Test
public void testSample() {
doTest("sample");
}
}

View File

@ -0,0 +1,28 @@
#!/usr/bin/perl -w
# courtesy of https://github.com/briandfoy/Learning-Perl-Sample-Files
# (no license)
use strict;
# This next line of code is used when you get to Chapter 9.
my $what = 'fred|barney';
while (<>) {
chomp;
# If you want to try matching strings which may contain
# newlines, here's the trick to use: Uncomment this next
# line, then use a pound sign ("#") wherever you mean to
# have a newline within your data string.
# s/#/\n/g;
if (/YOUR_PATTERN_GOES_HERE/) {
print "Matched: |$`<$&>$'|\n";
# If you need these for testing patterns with
# memories, uncomment them as well
# print " And memory one got <$1>\n";
# print " And memory two got <$2>\n";
} else {
print "No match.\n";
}
}

View File

@ -0,0 +1,47 @@
[Image] or [Truncated image[ Bcol Ecol
L5
[use] 1 4
[strict] 5 11
[;] 11 12
L8
[my] 1 3
[$] 4 5
[what] 5 9
[=] 10 11
['fred|barney'] 12 25
[;] 25 26
L10
[while] 1 6
[(] 7 8
[<] 8 9
[>] 9 10
[)] 10 11
[{] 12 13
L11
[chomp] 5 10
[;] 10 11
L18
[if] 5 7
[(] 8 9
[/] 9 10
[YOUR_PATTERN_GOES_HERE] 10 32
[/] 32 33
[)] 33 34
[{] 35 36
L19
[print] 2 7
["Matched: |$`<$&>$'|\\n"] 8 31
[;] 31 32
L24
[}] 5 6
[else] 7 11
[{] 12 13
L25
[print] 2 7
["No match.\\n"] 8 21
[;] 21 22
L26
[}] 5 6
L28
[}] 1 2
EOF