Better implementation for appendSuffix

2020-04-18 13:12:59 +02:00
parent c088a6add5
commit 1f8c5b65b3
9 changed files with 49 additions and 30 deletions
--- a/javacc-wrapper.xml
+++ b/javacc-wrapper.xml
@ -407,7 +407,7 @@ public final class ${token-constants-name} \{${line.separator}
    * be used as a basis for a CPD Tokenizer.
    */
  @net.sourceforge.pmd.annotation.InternalApi
-  public static net.sourceforge.pmd.lang.TokenManager<%%%API_PACK%%%.impl.javacc.JavaccToken> newTokenManager(%%%API_PACK%%%.CharStream cs) {
+  public static net.sourceforge.pmd.lang.TokenManager<%%%API_PACK%%%.impl.javacc.JavaccToken> newTokenManager(%%%API_PACK%%%.impl.javacc.CharStream cs) {
    return new %%%TOKEN_MGR_NAME%%%(cs);
  }

--- a/pmd-core/src/main/java/net/sourceforge/pmd/lang/ast/impl/javacc/CharStream.java
+++ b/pmd-core/src/main/java/net/sourceforge/pmd/lang/ast/impl/javacc/CharStream.java
@ -13,10 +13,6 @@ import net.sourceforge.pmd.util.document.TextDocument;

 /**
 * PMD flavour of character streams used by JavaCC parsers.
- *
- * TODO for when all JavaCC languages are aligned:
- * * rename methods to match decent naming conventions
- * * move to impl.javacc package
 */
 public final class CharStream {

@ -69,7 +65,7 @@ public final class CharStream {
     */
    public String getTokenImage() {
        StringBuilder sb = new StringBuilder();
-        cursor.markToString(sb);
+        cursor.appendMark(sb);
        return sb.toString();
    }

@ -84,8 +80,7 @@ public final class CharStream {
     * @throws IndexOutOfBoundsException If len is greater than the length of the current token
     */
    public void appendSuffix(StringBuilder sb, int len) {
-        String t = getTokenImage();
-        sb.append(t, t.length() - len, t.length());
+        cursor.appendMarkSuffix(sb, len);
    }


@ -105,13 +100,19 @@ public final class CharStream {
        cursor.backup(amount);
    }

-    /** Returns the column number of the last character for the current token. */
+    /**
+     * Returns the column number of the last character for the current token.
+     * This is only used for parse exceptions and is very inefficient.
+     */
    public int getEndColumn() {
        return endLocation().getEndColumn();
    }


-    /** Returns the line number of the last character for current token. */
+    /**
+     * Returns the line number of the last character for current token.
+     * This is only used for parse exceptions and is very inefficient.
+     */
    public int getEndLine() {
        return endLocation().getEndLine();
    }
--- a/pmd-core/src/main/java/net/sourceforge/pmd/lang/ast/impl/javacc/EscapeTracker.java
+++ b/pmd-core/src/main/java/net/sourceforge/pmd/lang/ast/impl/javacc/EscapeTracker.java
@ -226,30 +226,48 @@ class EscapeTracker {
            this.markOutOffset = outOffset;
        }

-        public void markToString(StringBuilder sb) {
+        public void appendMarkSuffix(StringBuilder sb, int suffixLen) {
            ensureMarked();
+            assert suffixLen <= markLength();

-            int prevLength = sb.length();

            if (markEscape == nextEscape) {
                // no escape in the marked range
-                sb.append(buf, mark, pos);
+                sb.append(buf, pos - suffixLen, pos);
            } else {
-                sb.ensureCapacity(markLength());
-
-                int cur = mark;
-                int esc = markEscape;
-                while (cur < pos && esc < nextEscape) {
-                    sb.append(buf, cur, invalidIdx(esc));
-                    cur = indexAfter(esc);
-                    esc += RECORD_SIZE;
+                if (suffixLen == markLength()) {
+                    appendMark(sb);
+                } else {
+                    // fallback inefficient implementation
+                    StringBuilder tmp = new StringBuilder();
+                    appendMark(tmp);
+                    sb.append(tmp, tmp.length() - suffixLen, tmp.length());
                }
-                // no more escape in the range, append everything until the pos
-                sb.append(buf, cur, pos);
-                assert sb.length() - prevLength == markLength() : sb + " should have length " + markLength();
            }
        }

+        public void appendMark(StringBuilder sb) {
+            if (markEscape == nextEscape) {
+                // no escape in the marked range
+                sb.append(buf, mark, pos);
+                return;
+            }
+
+            sb.ensureCapacity(markLength());
+            int prevLength = sb.length();
+
+            int cur = mark;
+            int esc = markEscape;
+            while (cur < pos && esc < nextEscape) {
+                sb.append(buf, cur, invalidIdx(esc));
+                cur = indexAfter(esc);
+                esc += RECORD_SIZE;
+            }
+            // no more escape in the range, append everything until the pos
+            sb.append(buf, cur, pos);
+            assert sb.length() - prevLength == markLength() : sb + " should have length " + markLength();
+        }
+
        private void ensureMarked() {
            if (mark == Integer.MAX_VALUE) {
                throw new IllegalStateException("Mark is not set");
--- a/pmd-cpp/etc/grammar/Cpp.jj
+++ b/pmd-cpp/etc/grammar/Cpp.jj
@ -32,7 +32,7 @@ options {
 PARSER_BEGIN(CppParserImpl)
 package net.sourceforge.pmd.lang.cpp.ast;

-import net.sourceforge.pmd.lang.ast.CharStream;
+import net.sourceforge.pmd.lang.ast.impl.javacc.CharStream;
 import net.sourceforge.pmd.lang.ast.TokenMgrError;

 public final class CppParserImpl {
--- a/pmd-javascript/etc/grammar/Ecmascript5.jj
+++ b/pmd-javascript/etc/grammar/Ecmascript5.jj
@ -15,7 +15,7 @@ options {
 PARSER_BEGIN(Ecmascript5ParserImpl)
 package net.sourceforge.pmd.lang.ecmascript5.ast;

-import net.sourceforge.pmd.lang.ast.CharStream;
+import net.sourceforge.pmd.lang.ast.impl.javacc.CharStream;
 import net.sourceforge.pmd.lang.ast.TokenMgrError;

 public class Ecmascript5ParserImpl {
--- a/pmd-matlab/etc/grammar/Matlab.jj
+++ b/pmd-matlab/etc/grammar/Matlab.jj
@ -21,7 +21,7 @@ options {
 PARSER_BEGIN(MatlabParserImpl)
 package net.sourceforge.pmd.lang.matlab.ast;

-import net.sourceforge.pmd.lang.ast.CharStream;
+import net.sourceforge.pmd.lang.ast.impl.javacc.CharStream;
 import net.sourceforge.pmd.lang.ast.TokenMgrError;

 public class MatlabParserImpl {
--- a/pmd-modelica/etc/grammar/Modelica.jjt
+++ b/pmd-modelica/etc/grammar/Modelica.jjt
@ -49,7 +49,7 @@ options {
 PARSER_BEGIN(ModelicaParserImpl)
 package net.sourceforge.pmd.lang.modelica.ast;

-import net.sourceforge.pmd.lang.ast.CharStream;
+import net.sourceforge.pmd.lang.ast.impl.javacc.CharStream;
 import net.sourceforge.pmd.lang.ast.TokenMgrError;

 class ModelicaParserImpl {
--- a/pmd-objectivec/etc/grammar/ObjectiveC.jj
+++ b/pmd-objectivec/etc/grammar/ObjectiveC.jj
@ -21,7 +21,7 @@ package net.sourceforge.pmd.lang.objectivec.ast;
 import java.io.*;
 import java.util.*;

-import net.sourceforge.pmd.lang.ast.CharStream;
+import net.sourceforge.pmd.lang.ast.impl.javacc.CharStream;
 import net.sourceforge.pmd.lang.ast.TokenMgrError;

 /**
--- a/pmd-python/etc/grammar/Python.jj
+++ b/pmd-python/etc/grammar/Python.jj
@ -17,7 +17,7 @@ PARSER_BEGIN(PythonParserImpl)

 package net.sourceforge.pmd.lang.python.ast;

-import net.sourceforge.pmd.lang.ast.CharStream;
+import net.sourceforge.pmd.lang.ast.impl.javacc.CharStream;
 import net.sourceforge.pmd.lang.ast.TokenMgrError;

 public class PythonParserImpl {