Abstract backslash escape readers
This commit is contained in:
@ -0,0 +1,68 @@
|
||||
/*
|
||||
* BSD-style license; for more info see http://pmd.sourceforge.net/license.html
|
||||
*/
|
||||
|
||||
package net.sourceforge.pmd.lang.ast.impl.javacc;
|
||||
|
||||
import static java.lang.Integer.min;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import net.sourceforge.pmd.util.document.Chars;
|
||||
|
||||
/**
|
||||
* A base class for readers that handle escapes starting with a backslash.
|
||||
*/
|
||||
public abstract class BackslashEscapeReader extends EscapeAwareReader {
|
||||
|
||||
private static final char BACKSLASH = '\\';
|
||||
|
||||
/**
|
||||
* An offset until which we read backslashes and decided they were not
|
||||
* an escape. The read procedure may cut off in the middle of the escape,
|
||||
* and turn an even num of backslashes into an odd one, so until we crossed
|
||||
* this offset, backslashes are not treated specially.
|
||||
*/
|
||||
private int savedNotEscapeSpecialEnd = Integer.MAX_VALUE;
|
||||
|
||||
|
||||
public BackslashEscapeReader(Chars input) {
|
||||
super(input);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected int gobbleMaxWithoutEscape(final int maxOff) throws IOException {
|
||||
int off = this.bufpos;
|
||||
boolean noBackSlash = false;
|
||||
int notEscapeEnd = this.savedNotEscapeSpecialEnd;
|
||||
while (off < maxOff && (noBackSlash = input.charAt(off) != BACKSLASH || notEscapeEnd < off)) {
|
||||
off++;
|
||||
}
|
||||
|
||||
if (noBackSlash || off == maxOff) {
|
||||
this.bufpos = off;
|
||||
return off;
|
||||
}
|
||||
|
||||
return handleBackslash(maxOff, off);
|
||||
}
|
||||
|
||||
protected abstract int handleBackslash(int maxOff, int firstBackslashOff) throws IOException;
|
||||
|
||||
@Override
|
||||
protected int recordEscape(int startOffsetInclusive, int lengthInSource, int translatedLength) {
|
||||
this.savedNotEscapeSpecialEnd = Integer.MAX_VALUE;
|
||||
return super.recordEscape(startOffsetInclusive, lengthInSource, translatedLength);
|
||||
}
|
||||
|
||||
protected int abortEscape(int off, int maxOff) {
|
||||
// not an escape sequence
|
||||
int min = min(maxOff, off);
|
||||
// save the number of backslashes that are part of the escape,
|
||||
// might have been cut in half by the maxReadahead
|
||||
this.savedNotEscapeSpecialEnd = min < off ? off : Integer.MAX_VALUE;
|
||||
this.bufpos = min;
|
||||
return min;
|
||||
}
|
||||
|
||||
}
|
@ -10,16 +10,27 @@ package net.sourceforge.pmd.lang.ast.impl.javacc;
|
||||
|
||||
import static java.lang.Integer.min;
|
||||
|
||||
import java.io.BufferedReader;
|
||||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
|
||||
import net.sourceforge.pmd.internal.util.AssertionUtil;
|
||||
import net.sourceforge.pmd.util.StringUtil;
|
||||
import net.sourceforge.pmd.util.document.Chars;
|
||||
|
||||
/**
|
||||
* A reader that can interpret escapes in its input text. It records where
|
||||
* escapes occurred, and can translate an offset in the translated
|
||||
* input document to a line+column position in the original input.
|
||||
* A reader that may interpret escapes in its input text. It records
|
||||
* where escapes occurred, and can translate an offset in the translated
|
||||
* document (the "output") to a line/column/offset coordinates in the
|
||||
* original input. It uses a single char buffer to store both input and
|
||||
* translated output, and is overall very optimised for the case where
|
||||
* there are very few escapes. {@link CharStream} is the API to navigate
|
||||
* on a translated document (with arbitrary backtrack abilities).
|
||||
*
|
||||
* <p>This is useful to back a {@link CharStream} for JavaCC implementation,
|
||||
* but can also be used as a plain {@link Reader} if using other parser/lexer
|
||||
* implementations. The reader behaviour is optimised for block IO and has
|
||||
* poor char-by-char performance. Use a {@link BufferedReader} if you need it.
|
||||
*
|
||||
* <p>The default implementation does not perform any escape translation.
|
||||
*/
|
||||
@ -39,7 +50,7 @@ public class EscapeAwareReader extends Reader {
|
||||
final EscapeTracker escapes = new EscapeTracker();
|
||||
|
||||
public EscapeAwareReader(Chars input) {
|
||||
assert input != null;
|
||||
AssertionUtil.requireParamNotNull("input", input);
|
||||
this.input = input.mutableCopy();
|
||||
bufpos = 0;
|
||||
}
|
||||
@ -91,15 +102,17 @@ public class EscapeAwareReader extends Reader {
|
||||
|
||||
/**
|
||||
* Returns the max offset, EXclusive, with which we can cut the input
|
||||
* array from the bufpos to dump it into the output array. This sets
|
||||
* the bufpos to where we should start the next jump.
|
||||
* array from the bufpos to dump it into the output array. This must
|
||||
* set the {@link #bufpos} to where we should start reading next (INclusive).
|
||||
* If applicable, it must also replace in the buffer the start of
|
||||
* the escape with its translation.
|
||||
*/
|
||||
protected int gobbleMaxWithoutEscape(int maxOff) throws IOException {
|
||||
return this.bufpos = maxOff;
|
||||
}
|
||||
|
||||
protected int recordEscape(final int startOffsetInclusive, int lengthInSource, int translatedLength) {
|
||||
assert lengthInSource > 0 && startOffsetInclusive >= 0;
|
||||
assert lengthInSource > 0 && lengthInSource >= translatedLength && startOffsetInclusive >= 0;
|
||||
this.escapes.recordEscape(startOffsetInclusive, lengthInSource, translatedLength);
|
||||
this.bufpos = startOffsetInclusive + lengthInSource;
|
||||
return startOffsetInclusive + translatedLength;
|
||||
@ -142,10 +155,20 @@ public class EscapeAwareReader extends Reader {
|
||||
return escapes.inputOffsetAt(outputOffset);
|
||||
}
|
||||
|
||||
/**
|
||||
* The parameter is an *input* offset, if you got this offset from
|
||||
* somewhere else than the input buffer you must first translate it
|
||||
* back with {@link #inputOffset(int)}. This implementation is very
|
||||
* inefficient but currently is only used for error messages (which
|
||||
* obviously are exceptional).
|
||||
*/
|
||||
public int getLine(int idxInInput) {
|
||||
return StringUtil.lineNumberAt(input, idxInInput);
|
||||
}
|
||||
|
||||
/**
|
||||
* @see #getLine(int)
|
||||
*/
|
||||
public int getColumn(int idxInInput) {
|
||||
return StringUtil.columnNumberAt(input, idxInInput);
|
||||
}
|
||||
|
@ -23,6 +23,9 @@ import net.sourceforge.pmd.util.document.Chars;
|
||||
* than the escape.
|
||||
* - C++ translates newline escapes (1 or 2 chars) to zero chars (an important corner case)
|
||||
* - Java translates arbitrary-length unicode escapes (>= 6 chars) to 1 char
|
||||
*
|
||||
* <p>This class is tightly coupled to what {@link EscapeAwareReader}
|
||||
* does with its buffer.
|
||||
*/
|
||||
class EscapeTracker {
|
||||
|
||||
|
@ -8,53 +8,29 @@
|
||||
|
||||
package net.sourceforge.pmd.lang.ast.impl.javacc;
|
||||
|
||||
import static java.lang.Integer.min;
|
||||
|
||||
import java.io.BufferedReader;
|
||||
import java.io.IOException;
|
||||
|
||||
import net.sourceforge.pmd.util.document.Chars;
|
||||
|
||||
/**
|
||||
* An implementation of java.io.Reader that translates Java unicode escapes.
|
||||
* This implementation has efficient block IO but poor char-by-char performance.
|
||||
* If this is required, wrap it into a {@link BufferedReader}.
|
||||
* An implementation of {@link EscapeAwareReader} that translates Java
|
||||
* unicode escapes.
|
||||
*/
|
||||
@SuppressWarnings("PMD.AssignmentInOperand")
|
||||
public final class JavaInputReader extends EscapeAwareReader {
|
||||
|
||||
/**
|
||||
* An offset until which we read backslashes and decided they were not
|
||||
* an escape. The read procedure may cut off in the middle of the escape,
|
||||
* and turn an even num of backslashes into an odd one, so until we crossed
|
||||
* this offset, backslashes are not treated specially.
|
||||
*/
|
||||
private int savedNotEscapeSpecialEnd = Integer.MAX_VALUE;
|
||||
public final class JavaInputReader extends BackslashEscapeReader {
|
||||
|
||||
public JavaInputReader(Chars input) {
|
||||
super(input);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected int gobbleMaxWithoutEscape(final int maxOff) throws IOException {
|
||||
int off = this.bufpos;
|
||||
boolean noBackSlash = false;
|
||||
int notEscapeEnd = this.savedNotEscapeSpecialEnd;
|
||||
while (off < maxOff && (noBackSlash = input.charAt(off) != '\\' || notEscapeEnd < off)) {
|
||||
off++;
|
||||
}
|
||||
|
||||
if (noBackSlash) {
|
||||
this.bufpos = off;
|
||||
return off;
|
||||
}
|
||||
|
||||
final int firstBslashOff = off;
|
||||
protected int handleBackslash(final int maxOff, final int firstBackslashOff) throws IOException {
|
||||
int off = firstBackslashOff;
|
||||
while (off < input.length() && input.charAt(off) == '\\') {
|
||||
off++;
|
||||
}
|
||||
|
||||
int bslashCount = off - firstBslashOff;
|
||||
int bslashCount = off - firstBackslashOff;
|
||||
// is there an escape at offset firstBslashOff?
|
||||
if ((bslashCount & 1) == 1 // odd number of backslashes
|
||||
&& off < input.length() && input.charAt(off) == 'u') { // at least one 'u'
|
||||
@ -63,17 +39,10 @@ public final class JavaInputReader extends EscapeAwareReader {
|
||||
// consume all the 'u's
|
||||
off++;
|
||||
}
|
||||
int end = replaceFirstBackslashWithEscape(firstBslashOff, off - 1);
|
||||
this.savedNotEscapeSpecialEnd = Integer.MAX_VALUE;
|
||||
return recordEscape(firstBslashOff, end - firstBslashOff, 1);
|
||||
int end = replaceFirstBackslashWithEscape(firstBackslashOff, off - 1);
|
||||
return recordEscape(firstBackslashOff, end - firstBackslashOff, 1);
|
||||
} else {
|
||||
// not an escape sequence
|
||||
int min = min(maxOff, off);
|
||||
// save the number of backslashes that are part of the escape,
|
||||
// might have been cut in half by the maxReadahead
|
||||
this.savedNotEscapeSpecialEnd = min < off ? off : Integer.MAX_VALUE;
|
||||
this.bufpos = min;
|
||||
return min;
|
||||
return abortEscape(off, maxOff);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -4,53 +4,30 @@
|
||||
|
||||
package net.sourceforge.pmd.lang.cpp.ast;
|
||||
|
||||
import static java.lang.Integer.min;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import net.sourceforge.pmd.lang.ast.impl.javacc.EscapeAwareReader;
|
||||
import net.sourceforge.pmd.lang.ast.impl.javacc.BackslashEscapeReader;
|
||||
import net.sourceforge.pmd.util.document.Chars;
|
||||
|
||||
public class CppEscapeReader extends EscapeAwareReader {
|
||||
public class CppEscapeReader extends BackslashEscapeReader {
|
||||
|
||||
private static final char NEWLINE = '\n';
|
||||
private static final char CARRIAGE_RETURN = '\r';
|
||||
|
||||
private int savedNotEscapeSpecialEnd = Integer.MAX_VALUE;
|
||||
|
||||
public CppEscapeReader(Chars input) {
|
||||
super(input);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected int gobbleMaxWithoutEscape(final int maxOff) throws IOException {
|
||||
int off = this.bufpos;
|
||||
boolean noBackSlash = false;
|
||||
int notEscapeEnd = this.savedNotEscapeSpecialEnd;
|
||||
while (off < maxOff && (noBackSlash = input.charAt(off) != '\\' || notEscapeEnd < off)) {
|
||||
off++;
|
||||
}
|
||||
protected int handleBackslash(int maxOff, final int backSlashOff) {
|
||||
int off = backSlashOff;
|
||||
|
||||
if (noBackSlash || off == maxOff) {
|
||||
this.bufpos = off;
|
||||
return off;
|
||||
}
|
||||
|
||||
final int backSlackOff = off++;
|
||||
if (input.charAt(off) == NEWLINE) {
|
||||
return recordEscape(backSlackOff, 2, 0);
|
||||
return recordEscape(backSlashOff, 2, 0);
|
||||
} else if (input.charAt(off) == CARRIAGE_RETURN) {
|
||||
if (input.charAt(++off) == NEWLINE) {
|
||||
return recordEscape(backSlackOff, 3, 0);
|
||||
return recordEscape(backSlashOff, 3, 0);
|
||||
}
|
||||
}
|
||||
|
||||
// not an escape sequence
|
||||
int min = min(maxOff, off);
|
||||
// save the number of backslashes that are part of the escape,
|
||||
// might have been cut in half by the maxReadahead
|
||||
this.savedNotEscapeSpecialEnd = min < off ? off : Integer.MAX_VALUE;
|
||||
this.bufpos = min;
|
||||
return min;
|
||||
return abortEscape(off, maxOff);
|
||||
}
|
||||
}
|
||||
|
Reference in New Issue
Block a user