Better implementation for appendSuffix

This commit is contained in:
Clément Fournier
2020-04-18 13:12:59 +02:00
parent c088a6add5
commit 1f8c5b65b3
9 changed files with 49 additions and 30 deletions

View File

@ -407,7 +407,7 @@ public final class ${token-constants-name} \{${line.separator}
* be used as a basis for a CPD Tokenizer.
*/
@net.sourceforge.pmd.annotation.InternalApi
public static net.sourceforge.pmd.lang.TokenManager<%%%API_PACK%%%.impl.javacc.JavaccToken> newTokenManager(%%%API_PACK%%%.CharStream cs) {
public static net.sourceforge.pmd.lang.TokenManager<%%%API_PACK%%%.impl.javacc.JavaccToken> newTokenManager(%%%API_PACK%%%.impl.javacc.CharStream cs) {
return new %%%TOKEN_MGR_NAME%%%(cs);
}

View File

@ -13,10 +13,6 @@ import net.sourceforge.pmd.util.document.TextDocument;
/**
* PMD flavour of character streams used by JavaCC parsers.
*
* TODO for when all JavaCC languages are aligned:
* * rename methods to match decent naming conventions
* * move to impl.javacc package
*/
public final class CharStream {
@ -69,7 +65,7 @@ public final class CharStream {
*/
public String getTokenImage() {
StringBuilder sb = new StringBuilder();
cursor.markToString(sb);
cursor.appendMark(sb);
return sb.toString();
}
@ -84,8 +80,7 @@ public final class CharStream {
* @throws IndexOutOfBoundsException If len is greater than the length of the current token
*/
public void appendSuffix(StringBuilder sb, int len) {
String t = getTokenImage();
sb.append(t, t.length() - len, t.length());
cursor.appendMarkSuffix(sb, len);
}
@ -105,13 +100,19 @@ public final class CharStream {
cursor.backup(amount);
}
/** Returns the column number of the last character for the current token. */
/**
* Returns the column number of the last character for the current token.
* This is only used for parse exceptions and is very inefficient.
*/
public int getEndColumn() {
return endLocation().getEndColumn();
}
/** Returns the line number of the last character for current token. */
/**
* Returns the line number of the last character for current token.
* This is only used for parse exceptions and is very inefficient.
*/
public int getEndLine() {
return endLocation().getEndLine();
}

View File

@ -226,30 +226,48 @@ class EscapeTracker {
this.markOutOffset = outOffset;
}
public void markToString(StringBuilder sb) {
public void appendMarkSuffix(StringBuilder sb, int suffixLen) {
ensureMarked();
assert suffixLen <= markLength();
int prevLength = sb.length();
if (markEscape == nextEscape) {
// no escape in the marked range
sb.append(buf, mark, pos);
sb.append(buf, pos - suffixLen, pos);
} else {
sb.ensureCapacity(markLength());
int cur = mark;
int esc = markEscape;
while (cur < pos && esc < nextEscape) {
sb.append(buf, cur, invalidIdx(esc));
cur = indexAfter(esc);
esc += RECORD_SIZE;
if (suffixLen == markLength()) {
appendMark(sb);
} else {
// fallback inefficient implementation
StringBuilder tmp = new StringBuilder();
appendMark(tmp);
sb.append(tmp, tmp.length() - suffixLen, tmp.length());
}
// no more escape in the range, append everything until the pos
sb.append(buf, cur, pos);
assert sb.length() - prevLength == markLength() : sb + " should have length " + markLength();
}
}
public void appendMark(StringBuilder sb) {
if (markEscape == nextEscape) {
// no escape in the marked range
sb.append(buf, mark, pos);
return;
}
sb.ensureCapacity(markLength());
int prevLength = sb.length();
int cur = mark;
int esc = markEscape;
while (cur < pos && esc < nextEscape) {
sb.append(buf, cur, invalidIdx(esc));
cur = indexAfter(esc);
esc += RECORD_SIZE;
}
// no more escape in the range, append everything until the pos
sb.append(buf, cur, pos);
assert sb.length() - prevLength == markLength() : sb + " should have length " + markLength();
}
private void ensureMarked() {
if (mark == Integer.MAX_VALUE) {
throw new IllegalStateException("Mark is not set");

View File

@ -32,7 +32,7 @@ options {
PARSER_BEGIN(CppParserImpl)
package net.sourceforge.pmd.lang.cpp.ast;
import net.sourceforge.pmd.lang.ast.CharStream;
import net.sourceforge.pmd.lang.ast.impl.javacc.CharStream;
import net.sourceforge.pmd.lang.ast.TokenMgrError;
public final class CppParserImpl {

View File

@ -15,7 +15,7 @@ options {
PARSER_BEGIN(Ecmascript5ParserImpl)
package net.sourceforge.pmd.lang.ecmascript5.ast;
import net.sourceforge.pmd.lang.ast.CharStream;
import net.sourceforge.pmd.lang.ast.impl.javacc.CharStream;
import net.sourceforge.pmd.lang.ast.TokenMgrError;
public class Ecmascript5ParserImpl {

View File

@ -21,7 +21,7 @@ options {
PARSER_BEGIN(MatlabParserImpl)
package net.sourceforge.pmd.lang.matlab.ast;
import net.sourceforge.pmd.lang.ast.CharStream;
import net.sourceforge.pmd.lang.ast.impl.javacc.CharStream;
import net.sourceforge.pmd.lang.ast.TokenMgrError;
public class MatlabParserImpl {

View File

@ -49,7 +49,7 @@ options {
PARSER_BEGIN(ModelicaParserImpl)
package net.sourceforge.pmd.lang.modelica.ast;
import net.sourceforge.pmd.lang.ast.CharStream;
import net.sourceforge.pmd.lang.ast.impl.javacc.CharStream;
import net.sourceforge.pmd.lang.ast.TokenMgrError;
class ModelicaParserImpl {

View File

@ -21,7 +21,7 @@ package net.sourceforge.pmd.lang.objectivec.ast;
import java.io.*;
import java.util.*;
import net.sourceforge.pmd.lang.ast.CharStream;
import net.sourceforge.pmd.lang.ast.impl.javacc.CharStream;
import net.sourceforge.pmd.lang.ast.TokenMgrError;
/**

View File

@ -17,7 +17,7 @@ PARSER_BEGIN(PythonParserImpl)
package net.sourceforge.pmd.lang.python.ast;
import net.sourceforge.pmd.lang.ast.CharStream;
import net.sourceforge.pmd.lang.ast.impl.javacc.CharStream;
import net.sourceforge.pmd.lang.ast.TokenMgrError;
public class PythonParserImpl {