From b779abd08b500a5f94ff592f5e72bbfc9de4e252 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Fournier?= Date: Sat, 18 Apr 2020 07:25:22 +0200 Subject: [PATCH] Align cpd --- .../pmd/cpd/internal/JavaCCTokenizer.java | 18 +-- .../lang/ast/impl/io/EscapeAwareReader.java | 28 ++--- .../pmd/lang/ast/impl/io/EscapeTracker.java | 8 +- .../pmd/lang/ast/impl/io/JavaInputReader.java | 29 ++--- .../pmd/lang/ast/impl/io/NewCharStream.java | 8 +- .../ast/impl/javacc/CharStreamFactory.java | 69 ---------- .../lang/ast/impl/javacc/JavaCharStream.java | 119 ------------------ .../ast/impl/javacc/JavaccTokenDocument.java | 13 ++ .../ast/impl/javacc/SimpleCharStream.java | 20 --- .../net/sourceforge/pmd/cpd/CPPTokenizer.java | 45 ++++--- .../pmd/lang/cpp/ast/CppCharStream.java | 80 ------------ .../pmd/lang/cpp/ast/CppEscapeReader.java | 61 +++++++++ .../cpp/ast => cpd}/CppCharStreamTest.java | 21 +++- .../sourceforge/pmd/cpd/JavaTokenizer.java | 8 +- .../pmd/lang/java/ast/JavaParser.java | 6 - .../pmd/lang/java/ast/JavaTokenDocument.java | 8 ++ .../net/sourceforge/pmd/cpd/JSPTokenizer.java | 19 ++- .../pmd/lang/plsql/ast/PLSQLParser.java | 1 - .../sourceforge/pmd/cpd/PythonTokenizer.java | 6 +- .../net/sourceforge/pmd/cpd/VfTokenizer.java | 19 ++- 20 files changed, 198 insertions(+), 388 deletions(-) delete mode 100644 pmd-core/src/main/java/net/sourceforge/pmd/lang/ast/impl/javacc/CharStreamFactory.java delete mode 100644 pmd-core/src/main/java/net/sourceforge/pmd/lang/ast/impl/javacc/JavaCharStream.java delete mode 100644 pmd-core/src/main/java/net/sourceforge/pmd/lang/ast/impl/javacc/SimpleCharStream.java delete mode 100644 pmd-cpp/src/main/java/net/sourceforge/pmd/lang/cpp/ast/CppCharStream.java create mode 100644 pmd-cpp/src/main/java/net/sourceforge/pmd/lang/cpp/ast/CppEscapeReader.java rename pmd-cpp/src/test/java/net/sourceforge/pmd/{lang/cpp/ast => cpd}/CppCharStreamTest.java (61%) diff --git a/pmd-core/src/main/java/net/sourceforge/pmd/cpd/internal/JavaCCTokenizer.java b/pmd-core/src/main/java/net/sourceforge/pmd/cpd/internal/JavaCCTokenizer.java index 4c4d248aaf..5e3c06a735 100644 --- a/pmd-core/src/main/java/net/sourceforge/pmd/cpd/internal/JavaCCTokenizer.java +++ b/pmd-core/src/main/java/net/sourceforge/pmd/cpd/internal/JavaCCTokenizer.java @@ -5,9 +5,6 @@ package net.sourceforge.pmd.cpd.internal; import java.io.IOException; -import java.io.Reader; - -import org.apache.commons.io.input.CharSequenceReader; import net.sourceforge.pmd.cpd.SourceCode; import net.sourceforge.pmd.cpd.TokenEntry; @@ -18,20 +15,23 @@ import net.sourceforge.pmd.cpd.token.TokenFilter; import net.sourceforge.pmd.lang.TokenManager; import net.sourceforge.pmd.lang.ast.CharStream; import net.sourceforge.pmd.lang.ast.TokenMgrError; -import net.sourceforge.pmd.lang.ast.impl.javacc.CharStreamFactory; +import net.sourceforge.pmd.lang.ast.impl.io.NewCharStream; import net.sourceforge.pmd.lang.ast.impl.javacc.JavaccToken; -import net.sourceforge.pmd.util.IOUtil; +import net.sourceforge.pmd.lang.ast.impl.javacc.JavaccTokenDocument; +import net.sourceforge.pmd.util.document.TextDocument; +import net.sourceforge.pmd.util.document.io.TextFile; public abstract class JavaCCTokenizer implements Tokenizer { @SuppressWarnings("PMD.CloseResource") protected TokenManager getLexerForSource(SourceCode sourceCode) throws IOException { - Reader reader = IOUtil.skipBOM(new CharSequenceReader(sourceCode.getCodeBuffer())); - return makeLexerImpl(makeCharStream(reader)); + TextDocument textDocument = TextDocument.create(TextFile.cpdCompat(sourceCode)); + JavaccTokenDocument tokenDoc = newTokenDoc(textDocument); + return makeLexerImpl(NewCharStream.open(tokenDoc)); } - protected CharStream makeCharStream(Reader sourceCode) throws IOException { - return CharStreamFactory.simpleCharStream(sourceCode); + protected JavaccTokenDocument newTokenDoc(TextDocument textDoc) { + return new JavaccTokenDocument(textDoc); } protected abstract TokenManager makeLexerImpl(CharStream sourceCode); diff --git a/pmd-core/src/main/java/net/sourceforge/pmd/lang/ast/impl/io/EscapeAwareReader.java b/pmd-core/src/main/java/net/sourceforge/pmd/lang/ast/impl/io/EscapeAwareReader.java index e026544557..69ee25d9d7 100644 --- a/pmd-core/src/main/java/net/sourceforge/pmd/lang/ast/impl/io/EscapeAwareReader.java +++ b/pmd-core/src/main/java/net/sourceforge/pmd/lang/ast/impl/io/EscapeAwareReader.java @@ -11,6 +11,7 @@ import java.io.Reader; import java.nio.CharBuffer; import net.sourceforge.pmd.util.StringUtil; +import net.sourceforge.pmd.util.document.Chars; /** * A reader that optionally escapes its input text. It records where @@ -34,29 +35,18 @@ public class EscapeAwareReader extends Reader { * replace the first char with a backslash. We can report unnecessary * escapes that way. */ - protected char[] input; + protected Chars input; /** Position of the next char to read in the input. */ protected int bufpos; /** Keep track of adjustments to make to the offsets, caused by unicode escapes. */ final EscapeTracker escapes = new EscapeTracker(); - public EscapeAwareReader(CharSequence input, int startIdxInclusive, int endIdxExclusive) { + public EscapeAwareReader(Chars input) { assert input != null; - assert startIdxInclusive >= 0; - assert endIdxExclusive >= 0; - assert endIdxExclusive >= startIdxInclusive; - - int len = endIdxExclusive - startIdxInclusive; - - this.input = new char[len]; - input.toString().getChars(startIdxInclusive, endIdxExclusive, this.input, 0); + this.input = input.mutableCopy(); bufpos = 0; } - public EscapeAwareReader(CharSequence input) { - this(input, 0, input.length()); - } - /** * Translate all the characters in the buffer. */ @@ -68,12 +58,12 @@ public class EscapeAwareReader extends Reader { @Override public int read(final char[] cbuf, final int off, final int len) throws IOException { ensureOpen(); - if (this.bufpos == input.length) { + if (this.bufpos == input.length()) { return -1; } int readChars = 0; - while (readChars < len && this.bufpos < input.length) { + while (readChars < len && this.bufpos < input.length()) { int bpos = this.bufpos; int nextJump = gobbleMaxWithoutEscape(bpos, len - readChars); int newlyReadChars = nextJump - bpos; @@ -82,9 +72,9 @@ public class EscapeAwareReader extends Reader { if (newlyReadChars != 0) { if (cbuf != null) { - System.arraycopy(input, bpos, cbuf, off + readChars, newlyReadChars); + input.getChars(bpos, cbuf, off + readChars, newlyReadChars); } - } else if (nextJump == input.length) { + } else if (nextJump == input.length()) { // eof break; } @@ -100,7 +90,7 @@ public class EscapeAwareReader extends Reader { * the bufpos to where we should start the next jump. */ protected int gobbleMaxWithoutEscape(final int bufpos, final int maxReadahead) throws IOException { - return this.bufpos = min(bufpos + maxReadahead, input.length); + return this.bufpos = min(bufpos + maxReadahead, input.length()); } protected void recordEscape(final int startOffsetInclusive, int length) { diff --git a/pmd-core/src/main/java/net/sourceforge/pmd/lang/ast/impl/io/EscapeTracker.java b/pmd-core/src/main/java/net/sourceforge/pmd/lang/ast/impl/io/EscapeTracker.java index 8e39d172d3..ecd74755b3 100644 --- a/pmd-core/src/main/java/net/sourceforge/pmd/lang/ast/impl/io/EscapeTracker.java +++ b/pmd-core/src/main/java/net/sourceforge/pmd/lang/ast/impl/io/EscapeTracker.java @@ -4,6 +4,8 @@ package net.sourceforge.pmd.lang.ast.impl.io; +import net.sourceforge.pmd.util.document.Chars; + /** * Records where escapes occurred in the input document. This is quite * an inefficient way to deal with it, yet in the common case where there @@ -94,18 +96,18 @@ class EscapeTracker { * identified by the {@link #escapeRecords}. It must hold * that buf.length is the original source length. */ - private final char[] buf; + private final Chars buf; private int mark = Integer.MAX_VALUE; private int markEscape; private int markOutOffset; - Cursor(char[] buf) { + Cursor(Chars buf) { this.buf = buf; } char next() { - char c = buf[pos]; + char c = buf.charAt(pos); if (nextEscape < escapeRecords.length && pos == escapeRecords[nextEscape]) { pos += escapeRecords[nextEscape + 1]; // add escape length diff --git a/pmd-core/src/main/java/net/sourceforge/pmd/lang/ast/impl/io/JavaInputReader.java b/pmd-core/src/main/java/net/sourceforge/pmd/lang/ast/impl/io/JavaInputReader.java index a3bbcd4dfa..d5b84c7c1d 100644 --- a/pmd-core/src/main/java/net/sourceforge/pmd/lang/ast/impl/io/JavaInputReader.java +++ b/pmd-core/src/main/java/net/sourceforge/pmd/lang/ast/impl/io/JavaInputReader.java @@ -9,6 +9,8 @@ import static java.lang.Integer.min; import java.io.BufferedReader; import java.io.IOException; +import net.sourceforge.pmd.util.document.Chars; + /** * An implementation of java.io.Reader that translates Java unicode escapes. * This implementation has efficient block IO but poor char-by-char performance. @@ -25,15 +27,10 @@ public final class JavaInputReader extends EscapeAwareReader { */ private int savedNotEscapeSpecialEnd = Integer.MAX_VALUE; - public JavaInputReader(CharSequence input, int startIdxInclusive, int endIdxExclusive) { - super(input, startIdxInclusive, endIdxExclusive); - } - - public JavaInputReader(CharSequence input) { + public JavaInputReader(Chars input) { super(input); } - /** * Returns the max offset, EXclusive, with which we can cut the input * array from the bufpos to dump it into the output array. This sets @@ -42,10 +39,10 @@ public final class JavaInputReader extends EscapeAwareReader { @Override protected int gobbleMaxWithoutEscape(final int bufpos, final int maxReadahead) throws IOException { int off = bufpos; - int max = min(bufpos + maxReadahead, input.length); + int max = min(bufpos + maxReadahead, input.length()); boolean noBackSlash = false; int notEscapeEnd = this.savedNotEscapeSpecialEnd; - while (off < max && (noBackSlash = input[off] != '\\' || notEscapeEnd < off)) { + while (off < max && (noBackSlash = input.charAt(off) != '\\' || notEscapeEnd < off)) { off++; } @@ -55,15 +52,15 @@ public final class JavaInputReader extends EscapeAwareReader { } final int firstBslashOff = off; - while (off < input.length && input[off] == '\\') { + while (off < input.length() && input.charAt(off) == '\\') { off++; } int bslashCount = off - firstBslashOff; // this condition is "is there an escape at offset firstBslashOff" if ((bslashCount & 1) == 1 // odd number of backslashes - && off < input.length - 4 // at least 5 chars to form the escape ('u' + 4 hex digits) - && input[off] == 'u') { // the char after the last backslash is a 'u' + && off < input.length() - 4 // at least 5 chars to form the escape ('u' + 4 hex digits) + && input.charAt(off) == 'u') { // the char after the last backslash is a 'u' replaceFirstBackslashWithEscape(firstBslashOff, off); this.savedNotEscapeSpecialEnd = Integer.MAX_VALUE; @@ -84,12 +81,12 @@ public final class JavaInputReader extends EscapeAwareReader { private void replaceFirstBackslashWithEscape(int posOfFirstBackSlash, int offOfTheU) throws IOException { try { char c = (char) - ( hexVal(input[++offOfTheU]) << 12 - | hexVal(input[++offOfTheU]) << 8 - | hexVal(input[++offOfTheU]) << 4 - | hexVal(input[++offOfTheU]) + ( hexVal(input.charAt(++offOfTheU)) << 12 + | hexVal(input.charAt(++offOfTheU)) << 8 + | hexVal(input.charAt(++offOfTheU)) << 4 + | hexVal(input.charAt(++offOfTheU)) ); - input[posOfFirstBackSlash] = c; // replace the start char of the backslash + input.set(posOfFirstBackSlash, c); // replace the start char of the backslash } catch (NumberFormatException e) { String message = "Invalid escape sequence at line " diff --git a/pmd-core/src/main/java/net/sourceforge/pmd/lang/ast/impl/io/NewCharStream.java b/pmd-core/src/main/java/net/sourceforge/pmd/lang/ast/impl/io/NewCharStream.java index 18fa601b90..3b50b6433e 100644 --- a/pmd-core/src/main/java/net/sourceforge/pmd/lang/ast/impl/io/NewCharStream.java +++ b/pmd-core/src/main/java/net/sourceforge/pmd/lang/ast/impl/io/NewCharStream.java @@ -15,15 +15,17 @@ public class NewCharStream implements CharStream { private final JavaccTokenDocument document; private final Cursor cursor; - public NewCharStream(JavaccTokenDocument document, EscapeTracker.Cursor cursor) { + private NewCharStream(JavaccTokenDocument document, EscapeTracker.Cursor cursor) { this.document = document; this.cursor = cursor; } - public static CharStream consume(EscapeAwareReader reader, JavaccTokenDocument doc) throws IOException { - try (EscapeAwareReader r = reader) { + public static CharStream open(JavaccTokenDocument doc) { + try (EscapeAwareReader reader = doc.newReader(doc.getTextDocument().getText())) { reader.translate(); return new NewCharStream(doc, reader.escapes.new Cursor(reader.input)); + } catch (IOException e) { + throw new RuntimeException(e); } } diff --git a/pmd-core/src/main/java/net/sourceforge/pmd/lang/ast/impl/javacc/CharStreamFactory.java b/pmd-core/src/main/java/net/sourceforge/pmd/lang/ast/impl/javacc/CharStreamFactory.java deleted file mode 100644 index dc1f0ce238..0000000000 --- a/pmd-core/src/main/java/net/sourceforge/pmd/lang/ast/impl/javacc/CharStreamFactory.java +++ /dev/null @@ -1,69 +0,0 @@ -/* - * BSD-style license; for more info see http://pmd.sourceforge.net/license.html - */ - -package net.sourceforge.pmd.lang.ast.impl.javacc; - -import java.io.IOException; -import java.io.Reader; -import java.util.function.Function; - -import org.apache.commons.io.IOUtils; - -import net.sourceforge.pmd.lang.ast.CharStream; -import net.sourceforge.pmd.util.document.TextDocument; -import net.sourceforge.pmd.util.document.CpdCompat; - -public final class CharStreamFactory { - - private CharStreamFactory() { - // util class - } - - /** - * A char stream that doesn't perform any escape translation. - */ - public static CharStream simpleCharStream(Reader input) { - return simpleCharStream(input, JavaccTokenDocument::new); - } - - /** - * A char stream that doesn't perform any escape translation. - */ - public static CharStream simpleCharStream(Reader input, Function documentMaker) { - String source = toString(input); - JavaccTokenDocument document = documentMaker.apply(TextDocument.readOnlyString(source, CpdCompat.dummyVersion())); - return new SimpleCharStream(document); - } - - /** - * A char stream that translates java unicode sequences. - */ - public static CharStream javaCharStream(Reader input) { - return javaCharStream(input, JavaccTokenDocument::new); - } - - /** - * A char stream that translates java unicode sequences. - */ - public static CharStream javaCharStream(Reader input, Function documentMaker) { - String source = toString(input); - JavaccTokenDocument tokens = documentMaker.apply(TextDocument.readOnlyString(source, CpdCompat.dummyVersion())); - return new JavaCharStream(tokens); - } - - /** - * @deprecated This shouldn't be used. IOExceptions should be handled properly, - * ie it should be expected that creating a parse may throw an IOException, - * in both CPD and PMD - */ - @Deprecated - public static String toString(Reader dstream) { - try (Reader r = dstream) { - return IOUtils.toString(r); - } catch (IOException e) { - throw new RuntimeException(e); - } - } - -} diff --git a/pmd-core/src/main/java/net/sourceforge/pmd/lang/ast/impl/javacc/JavaCharStream.java b/pmd-core/src/main/java/net/sourceforge/pmd/lang/ast/impl/javacc/JavaCharStream.java deleted file mode 100644 index 87f7d6b059..0000000000 --- a/pmd-core/src/main/java/net/sourceforge/pmd/lang/ast/impl/javacc/JavaCharStream.java +++ /dev/null @@ -1,119 +0,0 @@ -/* - * BSD-style license; for more info see http://pmd.sourceforge.net/license.html - */ - -package net.sourceforge.pmd.lang.ast.impl.javacc; - -import java.io.EOFException; -import java.io.IOException; - -import net.sourceforge.pmd.util.document.Chars; - -/** - * This stream buffers the whole file in memory before parsing, - * and track start/end offsets of tokens. This allows building {@link JavaccToken}. - * The buffer is assumed to be composed of only ASCII characters, - * and the stream unescapes Unicode escapes. The {@link #getTokenDocument() token document} - * stores the original file with escapes and all. - */ -public class JavaCharStream extends JavaCharStreamBase { - - // full text with nothing escaped and all - private final Chars fullText; - private final JavaccTokenDocument document; - - private int[] startOffsets; - - public JavaCharStream(JavaccTokenDocument document) { - super(document.getTextDocument().newReader()); - this.fullText = document.getFullText(); - this.document = document; - this.startOffsets = new int[bufsize]; - maxNextCharInd = fullText.length(); - - nextCharBuf = null; - } - - @Override - protected void ExpandBuff(boolean wrapAround) { - int[] newStartOffsets = new int[bufsize + 2048]; - - if (wrapAround) { - System.arraycopy(startOffsets, tokenBegin, newStartOffsets, 0, bufsize - tokenBegin); - System.arraycopy(startOffsets, 0, newStartOffsets, bufsize - tokenBegin, bufpos); - startOffsets = newStartOffsets; - } else { - System.arraycopy(startOffsets, tokenBegin, newStartOffsets, 0, bufsize - tokenBegin); - startOffsets = newStartOffsets; - } - - super.ExpandBuff(wrapAround); - } - - @Override - protected void UpdateLineColumn(char c) { - startOffsets[bufpos] = nextCharInd; - super.UpdateLineColumn(c); - } - - @Override - public int getStartOffset() { - return startOffsets[tokenBegin]; - } - - @Override - public int getEndOffset() { - if (isAtEof()) { - return fullText.length(); - } else { - return startOffsets[bufpos] + 1; // + 1 for exclusive - } - } - - @Override - public JavaccTokenDocument getTokenDocument() { - return document; - } - - @Override - public String GetImage() { - if (bufpos >= tokenBegin) { - return new String(buffer, tokenBegin, bufpos - tokenBegin + 1); - } else { - return new String(buffer, tokenBegin, bufsize - tokenBegin) - + new String(buffer, 0, bufpos + 1); - } - } - - @Override - protected char ReadByte() throws IOException { - ++nextCharInd; - - if (isAtEof()) { - if (bufpos != 0) { - --bufpos; - if (bufpos < 0) { - bufpos += bufsize; - } - } else { - bufline[bufpos] = line; - bufcolumn[bufpos] = column; - startOffsets[bufpos] = fullText.length(); - } - throw new EOFException(); - } - - return fullText.charAt(nextCharInd); - } - - private boolean isAtEof() { - return nextCharInd >= fullText.length(); - } - - - @Override - protected void FillBuff() { - throw new IllegalStateException("Buffer shouldn't be refilled"); - } - -} diff --git a/pmd-core/src/main/java/net/sourceforge/pmd/lang/ast/impl/javacc/JavaccTokenDocument.java b/pmd-core/src/main/java/net/sourceforge/pmd/lang/ast/impl/javacc/JavaccTokenDocument.java index bb5c3c608c..8990aacd04 100644 --- a/pmd-core/src/main/java/net/sourceforge/pmd/lang/ast/impl/javacc/JavaccTokenDocument.java +++ b/pmd-core/src/main/java/net/sourceforge/pmd/lang/ast/impl/javacc/JavaccTokenDocument.java @@ -9,6 +9,9 @@ import org.checkerframework.checker.nullness.qual.Nullable; import net.sourceforge.pmd.lang.ast.CharStream; import net.sourceforge.pmd.lang.ast.impl.TokenDocument; +import net.sourceforge.pmd.lang.ast.impl.io.EscapeAwareReader; +import net.sourceforge.pmd.lang.ast.impl.io.NewCharStream; +import net.sourceforge.pmd.util.document.Chars; import net.sourceforge.pmd.util.document.TextDocument; /** @@ -23,6 +26,16 @@ public class JavaccTokenDocument extends TokenDocument { super(textDocument); } + + public EscapeAwareReader newReader(Chars text) { + return new EscapeAwareReader(text); + } + + public CharStream newCharStream() { + return NewCharStream.open(this); + } + + /** * Open the document. This is only meant to be used by a Javacc-generated * parser. diff --git a/pmd-core/src/main/java/net/sourceforge/pmd/lang/ast/impl/javacc/SimpleCharStream.java b/pmd-core/src/main/java/net/sourceforge/pmd/lang/ast/impl/javacc/SimpleCharStream.java deleted file mode 100644 index d453b34797..0000000000 --- a/pmd-core/src/main/java/net/sourceforge/pmd/lang/ast/impl/javacc/SimpleCharStream.java +++ /dev/null @@ -1,20 +0,0 @@ -/* - * BSD-style license; for more info see http://pmd.sourceforge.net/license.html - */ - -package net.sourceforge.pmd.lang.ast.impl.javacc; - -/** - * A char stream that does not perform unicode escaping. - */ -public class SimpleCharStream extends JavaCharStream { - - public SimpleCharStream(JavaccTokenDocument document) { - super(document); - } - - @Override - protected boolean doEscape() { - return false; - } -} diff --git a/pmd-cpp/src/main/java/net/sourceforge/pmd/cpd/CPPTokenizer.java b/pmd-cpp/src/main/java/net/sourceforge/pmd/cpd/CPPTokenizer.java index 5b145f05a7..6aa98771b6 100644 --- a/pmd-cpp/src/main/java/net/sourceforge/pmd/cpd/CPPTokenizer.java +++ b/pmd-cpp/src/main/java/net/sourceforge/pmd/cpd/CPPTokenizer.java @@ -6,18 +6,21 @@ package net.sourceforge.pmd.cpd; import java.io.BufferedReader; import java.io.IOException; -import java.io.Reader; -import java.io.StringReader; import java.util.Properties; +import org.checkerframework.checker.nullness.qual.Nullable; + import net.sourceforge.pmd.PMD; import net.sourceforge.pmd.cpd.internal.JavaCCTokenizer; import net.sourceforge.pmd.lang.TokenManager; import net.sourceforge.pmd.lang.ast.CharStream; +import net.sourceforge.pmd.lang.ast.impl.io.EscapeAwareReader; import net.sourceforge.pmd.lang.ast.impl.javacc.JavaccToken; -import net.sourceforge.pmd.lang.cpp.ast.CppCharStream; +import net.sourceforge.pmd.lang.ast.impl.javacc.JavaccTokenDocument; +import net.sourceforge.pmd.lang.cpp.ast.CppEscapeReader; import net.sourceforge.pmd.lang.cpp.ast.CppTokenKinds; -import net.sourceforge.pmd.util.IOUtil; +import net.sourceforge.pmd.util.document.Chars; +import net.sourceforge.pmd.util.document.TextDocument; /** * The C++ tokenizer. @@ -32,6 +35,12 @@ public class CPPTokenizer extends JavaCCTokenizer { setProperties(new Properties()); // set the defaults } + // override to make it visible in tests + @Override + protected TokenManager getLexerForSource(SourceCode sourceCode) throws IOException { + return super.getLexerForSource(sourceCode); + } + /** * Sets the possible options for the C++ tokenizer. * @@ -54,12 +63,12 @@ public class CPPTokenizer extends JavaCCTokenizer { } } - private String maybeSkipBlocks(String test) throws IOException { + private Chars maybeSkipBlocks(Chars test) throws IOException { if (!skipBlocks) { return test; } - try (BufferedReader reader = new BufferedReader(new StringReader(test))) { + try (BufferedReader reader = new BufferedReader(test.newReader())) { StringBuilder filtered = new StringBuilder(test.length()); String line; boolean skip = false; @@ -75,26 +84,28 @@ public class CPPTokenizer extends JavaCCTokenizer { // always add a new line to keep the line-numbering filtered.append(PMD.EOL); } - return filtered.toString(); + return Chars.wrap(filtered, false); } } @Override - protected CharStream makeCharStream(Reader sourceCode) { - return CppCharStream.newCppCharStream(sourceCode); + protected JavaccTokenDocument newTokenDoc(TextDocument textDoc) { + return new JavaccTokenDocument(textDoc) { + @Override + public EscapeAwareReader newReader(Chars text) { + return new CppEscapeReader(text); + } + + @Override + protected @Nullable String describeKindImpl(int kind) { + return CppTokenKinds.describe(kind); + } + }; } @Override protected TokenManager makeLexerImpl(CharStream sourceCode) { return CppTokenKinds.newTokenManager(sourceCode); } - - @SuppressWarnings("PMD.CloseResource") - @Override - protected TokenManager getLexerForSource(SourceCode sourceCode) throws IOException { - Reader reader = IOUtil.skipBOM(new StringReader(maybeSkipBlocks(sourceCode.getCodeBuffer().toString()))); - CharStream charStream = makeCharStream(reader); - return makeLexerImpl(charStream); - } } diff --git a/pmd-cpp/src/main/java/net/sourceforge/pmd/lang/cpp/ast/CppCharStream.java b/pmd-cpp/src/main/java/net/sourceforge/pmd/lang/cpp/ast/CppCharStream.java deleted file mode 100644 index c7f7aec7c2..0000000000 --- a/pmd-cpp/src/main/java/net/sourceforge/pmd/lang/cpp/ast/CppCharStream.java +++ /dev/null @@ -1,80 +0,0 @@ -/* - * BSD-style license; for more info see http://pmd.sourceforge.net/license.html - */ - -package net.sourceforge.pmd.lang.cpp.ast; - -import java.io.IOException; -import java.io.Reader; -import java.util.regex.Pattern; - -import org.checkerframework.checker.nullness.qual.Nullable; - -import net.sourceforge.pmd.lang.ast.impl.javacc.CharStreamFactory; -import net.sourceforge.pmd.lang.ast.impl.javacc.JavaccTokenDocument; -import net.sourceforge.pmd.lang.ast.impl.javacc.SimpleCharStream; -import net.sourceforge.pmd.util.document.TextDocument; -import net.sourceforge.pmd.util.document.CpdCompat; - -/** - * A SimpleCharStream, that supports the continuation of lines via backslash+newline, - * which is used in C/C++. - * - * @author Andreas Dangel - */ -public class CppCharStream extends SimpleCharStream { - - private static final Pattern CONTINUATION = Pattern.compile("\\\\\\n|\\\\\\r\\n"); - private static final char BACKSLASH = '\\'; - private static final char NEWLINE = '\n'; - private static final char CARRIAGE_RETURN = '\r'; - - CppCharStream(JavaccTokenDocument document) { - super(document); - } - - - @Override - public char readChar() throws IOException { - char c = super.readChar(); - if (c == BACKSLASH) { - char c1 = super.readChar(); - if (c1 == NEWLINE) { - c = super.readChar(); - } else if (c1 == CARRIAGE_RETURN) { - char c2 = super.readChar(); - if (c2 == NEWLINE) { - c = super.readChar(); - } else { - backup(2); - } - } else { - backup(1); - } - } - return c; - } - - @Override - public char[] GetSuffix(int len) { - String image = GetImage(); - return image.substring(image.length() - len, image.length()).toCharArray(); - } - - @Override - public String GetImage() { - String image = super.GetImage(); - return CONTINUATION.matcher(image).replaceAll(""); - } - - public static CppCharStream newCppCharStream(Reader dstream) { - String source = CharStreamFactory.toString(dstream); - JavaccTokenDocument document = new JavaccTokenDocument(TextDocument.readOnlyString(source, CpdCompat.dummyVersion())) { - @Override - protected @Nullable String describeKindImpl(int kind) { - return CppTokenKinds.describe(kind); - } - }; - return new CppCharStream(document); - } -} diff --git a/pmd-cpp/src/main/java/net/sourceforge/pmd/lang/cpp/ast/CppEscapeReader.java b/pmd-cpp/src/main/java/net/sourceforge/pmd/lang/cpp/ast/CppEscapeReader.java new file mode 100644 index 0000000000..378dfb1011 --- /dev/null +++ b/pmd-cpp/src/main/java/net/sourceforge/pmd/lang/cpp/ast/CppEscapeReader.java @@ -0,0 +1,61 @@ +/* + * BSD-style license; for more info see http://pmd.sourceforge.net/license.html + */ + +package net.sourceforge.pmd.lang.cpp.ast; + +import static java.lang.Integer.min; + +import java.io.IOException; + +import net.sourceforge.pmd.lang.ast.impl.io.EscapeAwareReader; +import net.sourceforge.pmd.util.document.Chars; + +public class CppEscapeReader extends EscapeAwareReader { + + private static final char NEWLINE = '\n'; + private static final char CARRIAGE_RETURN = '\r'; + + private int savedNotEscapeSpecialEnd = Integer.MAX_VALUE; + + public CppEscapeReader(Chars input) { + super(input); + } + + @Override + protected int gobbleMaxWithoutEscape(int bufpos, int maxReadahead) throws IOException { + int off = bufpos; + int max = min(bufpos + maxReadahead, input.length()); + boolean noBackSlash = false; + int notEscapeEnd = this.savedNotEscapeSpecialEnd; + while (off < max && (noBackSlash = input.charAt(off) != '\\' || notEscapeEnd < off)) { + off++; + } + + if (noBackSlash) { + this.bufpos = off; + return off; + } + + final int backSlackOff = off++; + if (input.charAt(off) == NEWLINE) { + recordEscape(backSlackOff, 2); + this.bufpos = off + 2; + return backSlackOff; + } else if (input.charAt(off) == CARRIAGE_RETURN) { + if (input.charAt(++off) == NEWLINE) { + recordEscape(backSlackOff, 3); + this.bufpos = off + 3; + return backSlackOff; + } + } + + // not an escape sequence + int min = min(bufpos + maxReadahead, off); + // save the number of backslashes that are part of the escape, + // might have been cut in half by the maxReadahead + this.savedNotEscapeSpecialEnd = min < off ? off : Integer.MAX_VALUE; + this.bufpos = min; + return min; + } +} diff --git a/pmd-cpp/src/test/java/net/sourceforge/pmd/lang/cpp/ast/CppCharStreamTest.java b/pmd-cpp/src/test/java/net/sourceforge/pmd/cpd/CppCharStreamTest.java similarity index 61% rename from pmd-cpp/src/test/java/net/sourceforge/pmd/lang/cpp/ast/CppCharStreamTest.java rename to pmd-cpp/src/test/java/net/sourceforge/pmd/cpd/CppCharStreamTest.java index 8196b4b079..9d6df7dd49 100644 --- a/pmd-cpp/src/test/java/net/sourceforge/pmd/lang/cpp/ast/CppCharStreamTest.java +++ b/pmd-cpp/src/test/java/net/sourceforge/pmd/cpd/CppCharStreamTest.java @@ -2,38 +2,47 @@ * BSD-style license; for more info see http://pmd.sourceforge.net/license.html */ -package net.sourceforge.pmd.lang.cpp.ast; +package net.sourceforge.pmd.cpd; import static org.junit.Assert.assertEquals; import java.io.IOException; -import java.io.StringReader; +import org.checkerframework.checker.nullness.qual.NonNull; import org.junit.Test; +import net.sourceforge.pmd.lang.ast.CharStream; +import net.sourceforge.pmd.lang.ast.impl.io.NewCharStream; +import net.sourceforge.pmd.util.document.TextDocument; + public class CppCharStreamTest { + @NonNull + public CharStream charStreamFor(String source) { + return NewCharStream.open(new CPPTokenizer().newTokenDoc(TextDocument.readOnlyString(source))); + } + @Test public void testContinuationUnix() throws IOException { - CppCharStream stream = CppCharStream.newCppCharStream(new StringReader("a\\\nb")); + CharStream stream = charStreamFor("a\\\nb"); assertStream(stream, "ab"); } @Test public void testContinuationWindows() throws IOException { // note that the \r is normalized to a \n by the TextFile - CppCharStream stream = CppCharStream.newCppCharStream(new StringReader("a\\\r\nb")); + CharStream stream = charStreamFor("a\\\r\nb"); assertStream(stream, "ab"); } @Test public void testBackup() throws IOException { // note that the \r is normalized to a \n by the TextFile - CppCharStream stream = CppCharStream.newCppCharStream(new StringReader("a\\b\\qc")); + CharStream stream = charStreamFor("a\\b\\\rc"); assertStream(stream, "a\\b\\qc"); } - private void assertStream(CppCharStream stream, String token) throws IOException { + private void assertStream(CharStream stream, String token) throws IOException { char c = stream.BeginToken(); assertEquals(token.charAt(0), c); for (int i = 1; i < token.length(); i++) { diff --git a/pmd-java/src/main/java/net/sourceforge/pmd/cpd/JavaTokenizer.java b/pmd-java/src/main/java/net/sourceforge/pmd/cpd/JavaTokenizer.java index b57c509d2e..83c69ae100 100644 --- a/pmd-java/src/main/java/net/sourceforge/pmd/cpd/JavaTokenizer.java +++ b/pmd-java/src/main/java/net/sourceforge/pmd/cpd/JavaTokenizer.java @@ -5,7 +5,6 @@ package net.sourceforge.pmd.cpd; import java.io.IOException; -import java.io.Reader; import java.util.Deque; import java.util.LinkedList; import java.util.Properties; @@ -15,10 +14,11 @@ import net.sourceforge.pmd.cpd.token.JavaCCTokenFilter; import net.sourceforge.pmd.cpd.token.TokenFilter; import net.sourceforge.pmd.lang.TokenManager; import net.sourceforge.pmd.lang.ast.CharStream; -import net.sourceforge.pmd.lang.ast.impl.javacc.CharStreamFactory; import net.sourceforge.pmd.lang.ast.impl.javacc.JavaccToken; +import net.sourceforge.pmd.lang.ast.impl.javacc.JavaccTokenDocument; import net.sourceforge.pmd.lang.java.ast.InternalApiBridge; import net.sourceforge.pmd.lang.java.ast.JavaTokenKinds; +import net.sourceforge.pmd.util.document.TextDocument; public class JavaTokenizer extends JavaCCTokenizer { @@ -44,8 +44,8 @@ public class JavaTokenizer extends JavaCCTokenizer { } @Override - protected CharStream makeCharStream(Reader sourceCode) { - return CharStreamFactory.javaCharStream(sourceCode, InternalApiBridge::javaTokenDoc); + protected JavaccTokenDocument newTokenDoc(TextDocument textDoc) { + return InternalApiBridge.javaTokenDoc(textDoc); } @Override diff --git a/pmd-java/src/main/java/net/sourceforge/pmd/lang/java/ast/JavaParser.java b/pmd-java/src/main/java/net/sourceforge/pmd/lang/java/ast/JavaParser.java index d7af60de77..a3bf759260 100644 --- a/pmd-java/src/main/java/net/sourceforge/pmd/lang/java/ast/JavaParser.java +++ b/pmd-java/src/main/java/net/sourceforge/pmd/lang/java/ast/JavaParser.java @@ -6,7 +6,6 @@ package net.sourceforge.pmd.lang.java.ast; import net.sourceforge.pmd.lang.ast.CharStream; import net.sourceforge.pmd.lang.ast.ParseException; -import net.sourceforge.pmd.lang.ast.impl.javacc.JavaCharStream; import net.sourceforge.pmd.lang.ast.impl.javacc.JavaccTokenDocument; import net.sourceforge.pmd.lang.ast.impl.javacc.JjtreeParserAdapter; import net.sourceforge.pmd.lang.java.ast.internal.LanguageLevelChecker; @@ -32,11 +31,6 @@ public class JavaParser extends JjtreeParserAdapter { return new JavaTokenDocument(textDocument); } - @Override - protected CharStream newCharStream(JavaccTokenDocument tokenDocument) { - return new JavaCharStream(tokenDocument); - } - @Override protected ASTCompilationUnit parseImpl(CharStream cs, ParserTask task) throws ParseException { JavaParserImpl parser = new JavaParserImpl(cs); diff --git a/pmd-java/src/main/java/net/sourceforge/pmd/lang/java/ast/JavaTokenDocument.java b/pmd-java/src/main/java/net/sourceforge/pmd/lang/java/ast/JavaTokenDocument.java index e00a9102d6..a3a67d2fb0 100644 --- a/pmd-java/src/main/java/net/sourceforge/pmd/lang/java/ast/JavaTokenDocument.java +++ b/pmd-java/src/main/java/net/sourceforge/pmd/lang/java/ast/JavaTokenDocument.java @@ -15,8 +15,11 @@ import static net.sourceforge.pmd.lang.java.ast.JavaTokenKinds.WHITESPACE; import org.checkerframework.checker.nullness.qual.Nullable; import net.sourceforge.pmd.lang.ast.CharStream; +import net.sourceforge.pmd.lang.ast.impl.io.EscapeAwareReader; +import net.sourceforge.pmd.lang.ast.impl.io.JavaInputReader; import net.sourceforge.pmd.lang.ast.impl.javacc.JavaccToken; import net.sourceforge.pmd.lang.ast.impl.javacc.JavaccTokenDocument; +import net.sourceforge.pmd.util.document.Chars; import net.sourceforge.pmd.util.document.TextDocument; /** @@ -43,6 +46,11 @@ final class JavaTokenDocument extends JavaccTokenDocument { } + @Override + public EscapeAwareReader newReader(Chars text) { + return new JavaInputReader(text); + } + @Override protected @Nullable String describeKindImpl(int kind) { return JavaTokenKinds.describe(kind); diff --git a/pmd-jsp/src/main/java/net/sourceforge/pmd/cpd/JSPTokenizer.java b/pmd-jsp/src/main/java/net/sourceforge/pmd/cpd/JSPTokenizer.java index b6383affca..fcb65f40f2 100644 --- a/pmd-jsp/src/main/java/net/sourceforge/pmd/cpd/JSPTokenizer.java +++ b/pmd-jsp/src/main/java/net/sourceforge/pmd/cpd/JSPTokenizer.java @@ -4,15 +4,16 @@ package net.sourceforge.pmd.cpd; -import java.io.IOException; -import java.io.Reader; - import net.sourceforge.pmd.cpd.internal.JavaCCTokenizer; import net.sourceforge.pmd.lang.TokenManager; import net.sourceforge.pmd.lang.ast.CharStream; -import net.sourceforge.pmd.lang.ast.impl.javacc.CharStreamFactory; +import net.sourceforge.pmd.lang.ast.impl.io.EscapeAwareReader; +import net.sourceforge.pmd.lang.ast.impl.io.JavaInputReader; import net.sourceforge.pmd.lang.ast.impl.javacc.JavaccToken; +import net.sourceforge.pmd.lang.ast.impl.javacc.JavaccTokenDocument; import net.sourceforge.pmd.lang.jsp.ast.JspTokenKinds; +import net.sourceforge.pmd.util.document.Chars; +import net.sourceforge.pmd.util.document.TextDocument; public class JSPTokenizer extends JavaCCTokenizer { @@ -22,7 +23,13 @@ public class JSPTokenizer extends JavaCCTokenizer { } @Override - protected CharStream makeCharStream(Reader sourceCode) throws IOException { - return CharStreamFactory.javaCharStream(sourceCode); + protected JavaccTokenDocument newTokenDoc(TextDocument textDoc) { + return new JavaccTokenDocument(textDoc) { + @Override + public EscapeAwareReader newReader(Chars text) { + return new JavaInputReader(text); + } + }; } + } diff --git a/pmd-plsql/src/main/java/net/sourceforge/pmd/lang/plsql/ast/PLSQLParser.java b/pmd-plsql/src/main/java/net/sourceforge/pmd/lang/plsql/ast/PLSQLParser.java index 65cb990593..ed2cb3e08d 100644 --- a/pmd-plsql/src/main/java/net/sourceforge/pmd/lang/plsql/ast/PLSQLParser.java +++ b/pmd-plsql/src/main/java/net/sourceforge/pmd/lang/plsql/ast/PLSQLParser.java @@ -11,7 +11,6 @@ import net.sourceforge.pmd.lang.ast.ParseException; import net.sourceforge.pmd.lang.ast.impl.javacc.JavaccTokenDocument; import net.sourceforge.pmd.lang.ast.impl.javacc.JjtreeParserAdapter; import net.sourceforge.pmd.util.document.TextDocument; -import net.sourceforge.pmd.util.document.TextDocument; public class PLSQLParser extends JjtreeParserAdapter { diff --git a/pmd-python/src/main/java/net/sourceforge/pmd/cpd/PythonTokenizer.java b/pmd-python/src/main/java/net/sourceforge/pmd/cpd/PythonTokenizer.java index 8e9a9a3d46..9e61ea2067 100644 --- a/pmd-python/src/main/java/net/sourceforge/pmd/cpd/PythonTokenizer.java +++ b/pmd-python/src/main/java/net/sourceforge/pmd/cpd/PythonTokenizer.java @@ -4,7 +4,6 @@ package net.sourceforge.pmd.cpd; -import java.io.Reader; import java.util.regex.Pattern; import org.checkerframework.checker.nullness.qual.Nullable; @@ -12,7 +11,6 @@ import org.checkerframework.checker.nullness.qual.Nullable; import net.sourceforge.pmd.cpd.internal.JavaCCTokenizer; import net.sourceforge.pmd.lang.TokenManager; import net.sourceforge.pmd.lang.ast.CharStream; -import net.sourceforge.pmd.lang.ast.impl.javacc.CharStreamFactory; import net.sourceforge.pmd.lang.ast.impl.javacc.JavaccToken; import net.sourceforge.pmd.lang.ast.impl.javacc.JavaccTokenDocument; import net.sourceforge.pmd.lang.python.ast.PythonTokenKinds; @@ -31,8 +29,8 @@ public class PythonTokenizer extends JavaCCTokenizer { } @Override - protected CharStream makeCharStream(Reader sourceCode) { - return CharStreamFactory.simpleCharStream(sourceCode, PythonTokenDocument::new); + protected JavaccTokenDocument newTokenDoc(TextDocument textDoc) { + return new PythonTokenDocument(textDoc); } private static class PythonTokenDocument extends JavaccTokenDocument { diff --git a/pmd-visualforce/src/main/java/net/sourceforge/pmd/cpd/VfTokenizer.java b/pmd-visualforce/src/main/java/net/sourceforge/pmd/cpd/VfTokenizer.java index f4166db699..8fa9212144 100644 --- a/pmd-visualforce/src/main/java/net/sourceforge/pmd/cpd/VfTokenizer.java +++ b/pmd-visualforce/src/main/java/net/sourceforge/pmd/cpd/VfTokenizer.java @@ -4,15 +4,16 @@ package net.sourceforge.pmd.cpd; -import java.io.IOException; -import java.io.Reader; - import net.sourceforge.pmd.cpd.internal.JavaCCTokenizer; import net.sourceforge.pmd.lang.TokenManager; import net.sourceforge.pmd.lang.ast.CharStream; -import net.sourceforge.pmd.lang.ast.impl.javacc.CharStreamFactory; +import net.sourceforge.pmd.lang.ast.impl.io.EscapeAwareReader; +import net.sourceforge.pmd.lang.ast.impl.io.JavaInputReader; import net.sourceforge.pmd.lang.ast.impl.javacc.JavaccToken; +import net.sourceforge.pmd.lang.ast.impl.javacc.JavaccTokenDocument; import net.sourceforge.pmd.lang.vf.ast.VfTokenKinds; +import net.sourceforge.pmd.util.document.Chars; +import net.sourceforge.pmd.util.document.TextDocument; /** * @author sergey.gorbaty @@ -25,7 +26,13 @@ public class VfTokenizer extends JavaCCTokenizer { } @Override - protected CharStream makeCharStream(Reader sourceCode) throws IOException { - return CharStreamFactory.javaCharStream(sourceCode); + protected JavaccTokenDocument newTokenDoc(TextDocument textDoc) { + return new JavaccTokenDocument(textDoc) { + @Override + public EscapeAwareReader newReader(Chars text) { + return new JavaInputReader(text); + } + }; } + }