Ensure CPD uses tab width of 1 for tabs consistently

The columns that are reported by CPD were inconsistent across languages before. A language like Java (using a JavaCC-based tokenizer) would use a width of 8 for tabs, whereas a language like C# (using an Antlr-based tokenizer) would use 1 instead. This includes unit tests for most languages to ensure a tab character is counted as 1. The configuration for JavaCC has been adjusted to respect this as well.
2020-07-20 10:42:21 +02:00
parent 25405eb870
commit 6fb5ac59b9
45 changed files with 724 additions and 62 deletions
--- a/pmd-python/src/test/java/net/sourceforge/pmd/cpd/PythonTokenizerTest.java
+++ b/pmd-python/src/test/java/net/sourceforge/pmd/cpd/PythonTokenizerTest.java
@ -41,4 +41,9 @@ public class PythonTokenizerTest extends CpdTextComparisonTest {
    public void testBackticks() {
        doTest("backticks");
    }
+
+    @Test
+    public void testTabWidth() {
+        doTest("tabWidth");
+    }
 }
--- a/pmd-python/src/test/resources/net/sourceforge/pmd/lang/python/cpd/testdata/tabWidth.py
+++ b/pmd-python/src/test/resources/net/sourceforge/pmd/lang/python/cpd/testdata/tabWidth.py
@ -0,0 +1,4 @@
+def hello():
+	return 'Hello world'
+
+print(hello())
--- a/pmd-python/src/test/resources/net/sourceforge/pmd/lang/python/cpd/testdata/tabWidth.txt
+++ b/pmd-python/src/test/resources/net/sourceforge/pmd/lang/python/cpd/testdata/tabWidth.txt
@ -0,0 +1,18 @@
+    [Image] or [Truncated image[            Bcol      Ecol
+L1
+    [def]                                   1         3
+    [hello]                                 5         9
+    [(]                                     10        10
+    [)]                                     11        11
+    [:]                                     12        12
+L2
+    [return]                                2         7
+    ['Hello world']                         9         21
+L4
+    [print]                                 1         5
+    [(]                                     6         6
+    [hello]                                 7         11
+    [(]                                     12        12
+    [)]                                     13        13
+    [)]                                     14        14
+EOF