From 1f8c5b65b3e7ce4be59963553ed49a2c6d384ee0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Fournier?= Date: Sat, 18 Apr 2020 13:12:59 +0200 Subject: [PATCH] Better implementation for appendSuffix --- javacc-wrapper.xml | 2 +- .../pmd/lang/ast/impl/javacc/CharStream.java | 19 ++++---- .../lang/ast/impl/javacc/EscapeTracker.java | 46 +++++++++++++------ pmd-cpp/etc/grammar/Cpp.jj | 2 +- pmd-javascript/etc/grammar/Ecmascript5.jj | 2 +- pmd-matlab/etc/grammar/Matlab.jj | 2 +- pmd-modelica/etc/grammar/Modelica.jjt | 2 +- pmd-objectivec/etc/grammar/ObjectiveC.jj | 2 +- pmd-python/etc/grammar/Python.jj | 2 +- 9 files changed, 49 insertions(+), 30 deletions(-) diff --git a/javacc-wrapper.xml b/javacc-wrapper.xml index 5392693a56..b9cf9b1ba1 100644 --- a/javacc-wrapper.xml +++ b/javacc-wrapper.xml @@ -407,7 +407,7 @@ public final class ${token-constants-name} \{${line.separator} * be used as a basis for a CPD Tokenizer. */ @net.sourceforge.pmd.annotation.InternalApi - public static net.sourceforge.pmd.lang.TokenManager<%%%API_PACK%%%.impl.javacc.JavaccToken> newTokenManager(%%%API_PACK%%%.CharStream cs) { + public static net.sourceforge.pmd.lang.TokenManager<%%%API_PACK%%%.impl.javacc.JavaccToken> newTokenManager(%%%API_PACK%%%.impl.javacc.CharStream cs) { return new %%%TOKEN_MGR_NAME%%%(cs); } diff --git a/pmd-core/src/main/java/net/sourceforge/pmd/lang/ast/impl/javacc/CharStream.java b/pmd-core/src/main/java/net/sourceforge/pmd/lang/ast/impl/javacc/CharStream.java index d9be04d0a5..bc3c9606a0 100644 --- a/pmd-core/src/main/java/net/sourceforge/pmd/lang/ast/impl/javacc/CharStream.java +++ b/pmd-core/src/main/java/net/sourceforge/pmd/lang/ast/impl/javacc/CharStream.java @@ -13,10 +13,6 @@ import net.sourceforge.pmd.util.document.TextDocument; /** * PMD flavour of character streams used by JavaCC parsers. - * - * TODO for when all JavaCC languages are aligned: - * * rename methods to match decent naming conventions - * * move to impl.javacc package */ public final class CharStream { @@ -69,7 +65,7 @@ public final class CharStream { */ public String getTokenImage() { StringBuilder sb = new StringBuilder(); - cursor.markToString(sb); + cursor.appendMark(sb); return sb.toString(); } @@ -84,8 +80,7 @@ public final class CharStream { * @throws IndexOutOfBoundsException If len is greater than the length of the current token */ public void appendSuffix(StringBuilder sb, int len) { - String t = getTokenImage(); - sb.append(t, t.length() - len, t.length()); + cursor.appendMarkSuffix(sb, len); } @@ -105,13 +100,19 @@ public final class CharStream { cursor.backup(amount); } - /** Returns the column number of the last character for the current token. */ + /** + * Returns the column number of the last character for the current token. + * This is only used for parse exceptions and is very inefficient. + */ public int getEndColumn() { return endLocation().getEndColumn(); } - /** Returns the line number of the last character for current token. */ + /** + * Returns the line number of the last character for current token. + * This is only used for parse exceptions and is very inefficient. + */ public int getEndLine() { return endLocation().getEndLine(); } diff --git a/pmd-core/src/main/java/net/sourceforge/pmd/lang/ast/impl/javacc/EscapeTracker.java b/pmd-core/src/main/java/net/sourceforge/pmd/lang/ast/impl/javacc/EscapeTracker.java index 9745741c6c..a8e9f84d17 100644 --- a/pmd-core/src/main/java/net/sourceforge/pmd/lang/ast/impl/javacc/EscapeTracker.java +++ b/pmd-core/src/main/java/net/sourceforge/pmd/lang/ast/impl/javacc/EscapeTracker.java @@ -226,30 +226,48 @@ class EscapeTracker { this.markOutOffset = outOffset; } - public void markToString(StringBuilder sb) { + public void appendMarkSuffix(StringBuilder sb, int suffixLen) { ensureMarked(); + assert suffixLen <= markLength(); - int prevLength = sb.length(); if (markEscape == nextEscape) { // no escape in the marked range - sb.append(buf, mark, pos); + sb.append(buf, pos - suffixLen, pos); } else { - sb.ensureCapacity(markLength()); - - int cur = mark; - int esc = markEscape; - while (cur < pos && esc < nextEscape) { - sb.append(buf, cur, invalidIdx(esc)); - cur = indexAfter(esc); - esc += RECORD_SIZE; + if (suffixLen == markLength()) { + appendMark(sb); + } else { + // fallback inefficient implementation + StringBuilder tmp = new StringBuilder(); + appendMark(tmp); + sb.append(tmp, tmp.length() - suffixLen, tmp.length()); } - // no more escape in the range, append everything until the pos - sb.append(buf, cur, pos); - assert sb.length() - prevLength == markLength() : sb + " should have length " + markLength(); } } + public void appendMark(StringBuilder sb) { + if (markEscape == nextEscape) { + // no escape in the marked range + sb.append(buf, mark, pos); + return; + } + + sb.ensureCapacity(markLength()); + int prevLength = sb.length(); + + int cur = mark; + int esc = markEscape; + while (cur < pos && esc < nextEscape) { + sb.append(buf, cur, invalidIdx(esc)); + cur = indexAfter(esc); + esc += RECORD_SIZE; + } + // no more escape in the range, append everything until the pos + sb.append(buf, cur, pos); + assert sb.length() - prevLength == markLength() : sb + " should have length " + markLength(); + } + private void ensureMarked() { if (mark == Integer.MAX_VALUE) { throw new IllegalStateException("Mark is not set"); diff --git a/pmd-cpp/etc/grammar/Cpp.jj b/pmd-cpp/etc/grammar/Cpp.jj index 450f772c41..eed7cc58cf 100644 --- a/pmd-cpp/etc/grammar/Cpp.jj +++ b/pmd-cpp/etc/grammar/Cpp.jj @@ -32,7 +32,7 @@ options { PARSER_BEGIN(CppParserImpl) package net.sourceforge.pmd.lang.cpp.ast; -import net.sourceforge.pmd.lang.ast.CharStream; +import net.sourceforge.pmd.lang.ast.impl.javacc.CharStream; import net.sourceforge.pmd.lang.ast.TokenMgrError; public final class CppParserImpl { diff --git a/pmd-javascript/etc/grammar/Ecmascript5.jj b/pmd-javascript/etc/grammar/Ecmascript5.jj index e180c2f760..a9cf14bcbb 100644 --- a/pmd-javascript/etc/grammar/Ecmascript5.jj +++ b/pmd-javascript/etc/grammar/Ecmascript5.jj @@ -15,7 +15,7 @@ options { PARSER_BEGIN(Ecmascript5ParserImpl) package net.sourceforge.pmd.lang.ecmascript5.ast; -import net.sourceforge.pmd.lang.ast.CharStream; +import net.sourceforge.pmd.lang.ast.impl.javacc.CharStream; import net.sourceforge.pmd.lang.ast.TokenMgrError; public class Ecmascript5ParserImpl { diff --git a/pmd-matlab/etc/grammar/Matlab.jj b/pmd-matlab/etc/grammar/Matlab.jj index 891a80f886..3ff1c8b27c 100644 --- a/pmd-matlab/etc/grammar/Matlab.jj +++ b/pmd-matlab/etc/grammar/Matlab.jj @@ -21,7 +21,7 @@ options { PARSER_BEGIN(MatlabParserImpl) package net.sourceforge.pmd.lang.matlab.ast; -import net.sourceforge.pmd.lang.ast.CharStream; +import net.sourceforge.pmd.lang.ast.impl.javacc.CharStream; import net.sourceforge.pmd.lang.ast.TokenMgrError; public class MatlabParserImpl { diff --git a/pmd-modelica/etc/grammar/Modelica.jjt b/pmd-modelica/etc/grammar/Modelica.jjt index 67039e9d1b..e2e7d63121 100644 --- a/pmd-modelica/etc/grammar/Modelica.jjt +++ b/pmd-modelica/etc/grammar/Modelica.jjt @@ -49,7 +49,7 @@ options { PARSER_BEGIN(ModelicaParserImpl) package net.sourceforge.pmd.lang.modelica.ast; -import net.sourceforge.pmd.lang.ast.CharStream; +import net.sourceforge.pmd.lang.ast.impl.javacc.CharStream; import net.sourceforge.pmd.lang.ast.TokenMgrError; class ModelicaParserImpl { diff --git a/pmd-objectivec/etc/grammar/ObjectiveC.jj b/pmd-objectivec/etc/grammar/ObjectiveC.jj index e42313b6c2..171ca581fa 100644 --- a/pmd-objectivec/etc/grammar/ObjectiveC.jj +++ b/pmd-objectivec/etc/grammar/ObjectiveC.jj @@ -21,7 +21,7 @@ package net.sourceforge.pmd.lang.objectivec.ast; import java.io.*; import java.util.*; -import net.sourceforge.pmd.lang.ast.CharStream; +import net.sourceforge.pmd.lang.ast.impl.javacc.CharStream; import net.sourceforge.pmd.lang.ast.TokenMgrError; /** diff --git a/pmd-python/etc/grammar/Python.jj b/pmd-python/etc/grammar/Python.jj index b9c2313b62..8087042e7c 100644 --- a/pmd-python/etc/grammar/Python.jj +++ b/pmd-python/etc/grammar/Python.jj @@ -17,7 +17,7 @@ PARSER_BEGIN(PythonParserImpl) package net.sourceforge.pmd.lang.python.ast; -import net.sourceforge.pmd.lang.ast.CharStream; +import net.sourceforge.pmd.lang.ast.impl.javacc.CharStream; import net.sourceforge.pmd.lang.ast.TokenMgrError; public class PythonParserImpl {