From b779abd08b500a5f94ff592f5e72bbfc9de4e252 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Cl=C3=A9ment=20Fournier?= <clement.fournier76@gmail.com>
Date: Sat, 18 Apr 2020 07:25:22 +0200
Subject: [PATCH] Align cpd

---
 .../pmd/cpd/internal/JavaCCTokenizer.java     |  18 +--
 .../lang/ast/impl/io/EscapeAwareReader.java   |  28 ++---
 .../pmd/lang/ast/impl/io/EscapeTracker.java   |   8 +-
 .../pmd/lang/ast/impl/io/JavaInputReader.java |  29 ++---
 .../pmd/lang/ast/impl/io/NewCharStream.java   |   8 +-
 .../ast/impl/javacc/CharStreamFactory.java    |  69 ----------
 .../lang/ast/impl/javacc/JavaCharStream.java  | 119 ------------------
 .../ast/impl/javacc/JavaccTokenDocument.java  |  13 ++
 .../ast/impl/javacc/SimpleCharStream.java     |  20 ---
 .../net/sourceforge/pmd/cpd/CPPTokenizer.java |  45 ++++---
 .../pmd/lang/cpp/ast/CppCharStream.java       |  80 ------------
 .../pmd/lang/cpp/ast/CppEscapeReader.java     |  61 +++++++++
 .../cpp/ast => cpd}/CppCharStreamTest.java    |  21 +++-
 .../sourceforge/pmd/cpd/JavaTokenizer.java    |   8 +-
 .../pmd/lang/java/ast/JavaParser.java         |   6 -
 .../pmd/lang/java/ast/JavaTokenDocument.java  |   8 ++
 .../net/sourceforge/pmd/cpd/JSPTokenizer.java |  19 ++-
 .../pmd/lang/plsql/ast/PLSQLParser.java       |   1 -
 .../sourceforge/pmd/cpd/PythonTokenizer.java  |   6 +-
 .../net/sourceforge/pmd/cpd/VfTokenizer.java  |  19 ++-
 20 files changed, 198 insertions(+), 388 deletions(-)
 delete mode 100644 pmd-core/src/main/java/net/sourceforge/pmd/lang/ast/impl/javacc/CharStreamFactory.java
 delete mode 100644 pmd-core/src/main/java/net/sourceforge/pmd/lang/ast/impl/javacc/JavaCharStream.java
 delete mode 100644 pmd-core/src/main/java/net/sourceforge/pmd/lang/ast/impl/javacc/SimpleCharStream.java
 delete mode 100644 pmd-cpp/src/main/java/net/sourceforge/pmd/lang/cpp/ast/CppCharStream.java
 create mode 100644 pmd-cpp/src/main/java/net/sourceforge/pmd/lang/cpp/ast/CppEscapeReader.java
 rename pmd-cpp/src/test/java/net/sourceforge/pmd/{lang/cpp/ast => cpd}/CppCharStreamTest.java (61%)
diff --git a/pmd-core/src/main/java/net/sourceforge/pmd/cpd/internal/JavaCCTokenizer.java b/pmd-core/src/main/java/net/sourceforge/pmd/cpd/internal/JavaCCTokenizer.java
index 4c4d248aaf..5e3c06a735 100644
--- a/pmd-core/src/main/java/net/sourceforge/pmd/cpd/internal/JavaCCTokenizer.java
+++ b/pmd-core/src/main/java/net/sourceforge/pmd/cpd/internal/JavaCCTokenizer.java
@@ -5,9 +5,6 @@
 package net.sourceforge.pmd.cpd.internal;
 
 import java.io.IOException;
-import java.io.Reader;
-
-import org.apache.commons.io.input.CharSequenceReader;
 
 import net.sourceforge.pmd.cpd.SourceCode;
 import net.sourceforge.pmd.cpd.TokenEntry;
@@ -18,20 +15,23 @@ import net.sourceforge.pmd.cpd.token.TokenFilter;
 import net.sourceforge.pmd.lang.TokenManager;
 import net.sourceforge.pmd.lang.ast.CharStream;
 import net.sourceforge.pmd.lang.ast.TokenMgrError;
-import net.sourceforge.pmd.lang.ast.impl.javacc.CharStreamFactory;
+import net.sourceforge.pmd.lang.ast.impl.io.NewCharStream;
 import net.sourceforge.pmd.lang.ast.impl.javacc.JavaccToken;
-import net.sourceforge.pmd.util.IOUtil;
+import net.sourceforge.pmd.lang.ast.impl.javacc.JavaccTokenDocument;
+import net.sourceforge.pmd.util.document.TextDocument;
+import net.sourceforge.pmd.util.document.io.TextFile;
 
 public abstract class JavaCCTokenizer implements Tokenizer {
 
     @SuppressWarnings("PMD.CloseResource")
     protected TokenManager<JavaccToken> getLexerForSource(SourceCode sourceCode) throws IOException {
-        Reader reader = IOUtil.skipBOM(new CharSequenceReader(sourceCode.getCodeBuffer()));
-        return makeLexerImpl(makeCharStream(reader));
+        TextDocument textDocument = TextDocument.create(TextFile.cpdCompat(sourceCode));
+        JavaccTokenDocument tokenDoc = newTokenDoc(textDocument);
+        return makeLexerImpl(NewCharStream.open(tokenDoc));
     }
 
-    protected CharStream makeCharStream(Reader sourceCode) throws IOException {
-        return CharStreamFactory.simpleCharStream(sourceCode);
+    protected JavaccTokenDocument newTokenDoc(TextDocument textDoc) {
+        return new JavaccTokenDocument(textDoc);
     }
 
     protected abstract TokenManager<JavaccToken> makeLexerImpl(CharStream sourceCode);
diff --git a/pmd-core/src/main/java/net/sourceforge/pmd/lang/ast/impl/io/EscapeAwareReader.java b/pmd-core/src/main/java/net/sourceforge/pmd/lang/ast/impl/io/EscapeAwareReader.java
index e026544557..69ee25d9d7 100644
--- a/pmd-core/src/main/java/net/sourceforge/pmd/lang/ast/impl/io/EscapeAwareReader.java
+++ b/pmd-core/src/main/java/net/sourceforge/pmd/lang/ast/impl/io/EscapeAwareReader.java
@@ -11,6 +11,7 @@ import java.io.Reader;
 import java.nio.CharBuffer;
 
 import net.sourceforge.pmd.util.StringUtil;
+import net.sourceforge.pmd.util.document.Chars;
 
 /**
  * A reader that optionally escapes its input text. It records where
@@ -34,29 +35,18 @@ public class EscapeAwareReader extends Reader {
      * replace the first char with a backslash. We can report unnecessary
      * escapes that way.
      */
-    protected char[] input;
+    protected Chars input;
     /** Position of the next char to read in the input. */
     protected int bufpos;
     /** Keep track of adjustments to make to the offsets, caused by unicode escapes. */
     final EscapeTracker escapes = new EscapeTracker();
 
-    public EscapeAwareReader(CharSequence input, int startIdxInclusive, int endIdxExclusive) {
+    public EscapeAwareReader(Chars input) {
         assert input != null;
-        assert startIdxInclusive >= 0;
-        assert endIdxExclusive >= 0;
-        assert endIdxExclusive >= startIdxInclusive;
-
-        int len = endIdxExclusive - startIdxInclusive;
-
-        this.input = new char[len];
-        input.toString().getChars(startIdxInclusive, endIdxExclusive, this.input, 0);
+        this.input = input.mutableCopy();
         bufpos = 0;
     }
 
-    public EscapeAwareReader(CharSequence input) {
-        this(input, 0, input.length());
-    }
-
     /**
      * Translate all the characters in the buffer.
      */
@@ -68,12 +58,12 @@ public class EscapeAwareReader extends Reader {
     @Override
     public int read(final char[] cbuf, final int off, final int len) throws IOException {
         ensureOpen();
-        if (this.bufpos == input.length) {
+        if (this.bufpos == input.length()) {
             return -1;
         }
 
         int readChars = 0;
-        while (readChars < len && this.bufpos < input.length) {
+        while (readChars < len && this.bufpos < input.length()) {
             int bpos = this.bufpos;
             int nextJump = gobbleMaxWithoutEscape(bpos, len - readChars);
             int newlyReadChars = nextJump - bpos;
@@ -82,9 +72,9 @@ public class EscapeAwareReader extends Reader {
 
             if (newlyReadChars != 0) {
                 if (cbuf != null) {
-                    System.arraycopy(input, bpos, cbuf, off + readChars, newlyReadChars);
+                    input.getChars(bpos, cbuf, off + readChars, newlyReadChars);
                 }
-            } else if (nextJump == input.length) {
+            } else if (nextJump == input.length()) {
                 // eof
                 break;
             }
@@ -100,7 +90,7 @@ public class EscapeAwareReader extends Reader {
      * the bufpos to where we should start the next jump.
      */
     protected int gobbleMaxWithoutEscape(final int bufpos, final int maxReadahead) throws IOException {
-        return this.bufpos = min(bufpos + maxReadahead, input.length);
+        return this.bufpos = min(bufpos + maxReadahead, input.length());
     }
 
     protected void recordEscape(final int startOffsetInclusive, int length) {
diff --git a/pmd-core/src/main/java/net/sourceforge/pmd/lang/ast/impl/io/EscapeTracker.java b/pmd-core/src/main/java/net/sourceforge/pmd/lang/ast/impl/io/EscapeTracker.java
index 8e39d172d3..ecd74755b3 100644
--- a/pmd-core/src/main/java/net/sourceforge/pmd/lang/ast/impl/io/EscapeTracker.java
+++ b/pmd-core/src/main/java/net/sourceforge/pmd/lang/ast/impl/io/EscapeTracker.java
@@ -4,6 +4,8 @@
 
 package net.sourceforge.pmd.lang.ast.impl.io;
 
+import net.sourceforge.pmd.util.document.Chars;
+
 /**
  * Records where escapes occurred in the input document. This is quite
  * an inefficient way to deal with it, yet in the common case where there
@@ -94,18 +96,18 @@ class EscapeTracker {
          * identified by the {@link #escapeRecords}. It must hold
          * that buf.length is the original source length.
          */
-        private final char[] buf;
+        private final Chars buf;
 
         private int mark = Integer.MAX_VALUE;
         private int markEscape;
         private int markOutOffset;
 
-        Cursor(char[] buf) {
+        Cursor(Chars buf) {
             this.buf = buf;
         }
 
         char next() {
-            char c = buf[pos];
+            char c = buf.charAt(pos);
 
             if (nextEscape < escapeRecords.length && pos == escapeRecords[nextEscape]) {
                 pos += escapeRecords[nextEscape + 1]; // add escape length
diff --git a/pmd-core/src/main/java/net/sourceforge/pmd/lang/ast/impl/io/JavaInputReader.java b/pmd-core/src/main/java/net/sourceforge/pmd/lang/ast/impl/io/JavaInputReader.java
index a3bbcd4dfa..d5b84c7c1d 100644
--- a/pmd-core/src/main/java/net/sourceforge/pmd/lang/ast/impl/io/JavaInputReader.java
+++ b/pmd-core/src/main/java/net/sourceforge/pmd/lang/ast/impl/io/JavaInputReader.java
@@ -9,6 +9,8 @@ import static java.lang.Integer.min;
 import java.io.BufferedReader;
 import java.io.IOException;
 
+import net.sourceforge.pmd.util.document.Chars;
+
 /**
  * An implementation of java.io.Reader that translates Java unicode escapes.
  * This implementation has efficient block IO but poor char-by-char performance.
@@ -25,15 +27,10 @@ public final class JavaInputReader extends EscapeAwareReader {
      */
     private int savedNotEscapeSpecialEnd = Integer.MAX_VALUE;
 
-    public JavaInputReader(CharSequence input, int startIdxInclusive, int endIdxExclusive) {
-        super(input, startIdxInclusive, endIdxExclusive);
-    }
-
-    public JavaInputReader(CharSequence input) {
+    public JavaInputReader(Chars input) {
         super(input);
     }
 
-
     /**
      * Returns the max offset, EXclusive, with which we can cut the input
      * array from the bufpos to dump it into the output array. This sets
@@ -42,10 +39,10 @@ public final class JavaInputReader extends EscapeAwareReader {
     @Override
     protected int gobbleMaxWithoutEscape(final int bufpos, final int maxReadahead) throws IOException {
         int off = bufpos;
-        int max = min(bufpos + maxReadahead, input.length);
+        int max = min(bufpos + maxReadahead, input.length());
         boolean noBackSlash = false;
         int notEscapeEnd = this.savedNotEscapeSpecialEnd;
-        while (off < max && (noBackSlash = input[off] != '\\' || notEscapeEnd < off)) {
+        while (off < max && (noBackSlash = input.charAt(off) != '\\' || notEscapeEnd < off)) {
             off++;
         }
 
@@ -55,15 +52,15 @@ public final class JavaInputReader extends EscapeAwareReader {
         }
 
         final int firstBslashOff = off;
-        while (off < input.length && input[off] == '\\') {
+        while (off < input.length() && input.charAt(off) == '\\') {
             off++;
         }
 
         int bslashCount = off - firstBslashOff;
         // this condition is "is there an escape at offset firstBslashOff"
         if ((bslashCount & 1) == 1    // odd number of backslashes
-            && off < input.length - 4 // at least 5 chars to form the escape ('u' + 4 hex digits)
-            && input[off] == 'u') {   // the char after the last backslash is a 'u'
+            && off < input.length() - 4 // at least 5 chars to form the escape ('u' + 4 hex digits)
+            && input.charAt(off) == 'u') {   // the char after the last backslash is a 'u'
 
             replaceFirstBackslashWithEscape(firstBslashOff, off);
             this.savedNotEscapeSpecialEnd = Integer.MAX_VALUE;
@@ -84,12 +81,12 @@ public final class JavaInputReader extends EscapeAwareReader {
     private void replaceFirstBackslashWithEscape(int posOfFirstBackSlash, int offOfTheU) throws IOException {
         try {
             char c = (char)
-                    ( hexVal(input[++offOfTheU]) << 12
-                    | hexVal(input[++offOfTheU]) << 8
-                    | hexVal(input[++offOfTheU]) << 4
-                    | hexVal(input[++offOfTheU])
+                    ( hexVal(input.charAt(++offOfTheU)) << 12
+                    | hexVal(input.charAt(++offOfTheU)) << 8
+                    | hexVal(input.charAt(++offOfTheU)) << 4
+                    | hexVal(input.charAt(++offOfTheU))
                     );
-            input[posOfFirstBackSlash] = c; // replace the start char of the backslash
+            input.set(posOfFirstBackSlash, c); // replace the start char of the backslash
         } catch (NumberFormatException e) {
 
             String message = "Invalid escape sequence at line "
diff --git a/pmd-core/src/main/java/net/sourceforge/pmd/lang/ast/impl/io/NewCharStream.java b/pmd-core/src/main/java/net/sourceforge/pmd/lang/ast/impl/io/NewCharStream.java
index 18fa601b90..3b50b6433e 100644
--- a/pmd-core/src/main/java/net/sourceforge/pmd/lang/ast/impl/io/NewCharStream.java
+++ b/pmd-core/src/main/java/net/sourceforge/pmd/lang/ast/impl/io/NewCharStream.java
@@ -15,15 +15,17 @@ public class NewCharStream implements CharStream {
     private final JavaccTokenDocument document;
     private final Cursor cursor;
 
-    public NewCharStream(JavaccTokenDocument document, EscapeTracker.Cursor cursor) {
+    private NewCharStream(JavaccTokenDocument document, EscapeTracker.Cursor cursor) {
         this.document = document;
         this.cursor = cursor;
     }
 
-    public static CharStream consume(EscapeAwareReader reader, JavaccTokenDocument doc) throws IOException {
-        try (EscapeAwareReader r = reader) {
+    public static CharStream open(JavaccTokenDocument doc) {
+        try (EscapeAwareReader reader = doc.newReader(doc.getTextDocument().getText())) {
             reader.translate();
             return new NewCharStream(doc, reader.escapes.new Cursor(reader.input));
+        } catch (IOException e) {
+            throw new RuntimeException(e);
         }
     }
 
diff --git a/pmd-core/src/main/java/net/sourceforge/pmd/lang/ast/impl/javacc/CharStreamFactory.java b/pmd-core/src/main/java/net/sourceforge/pmd/lang/ast/impl/javacc/CharStreamFactory.java
deleted file mode 100644
index dc1f0ce238..0000000000
--- a/pmd-core/src/main/java/net/sourceforge/pmd/lang/ast/impl/javacc/CharStreamFactory.java
+++ /dev/null
@@ -1,69 +0,0 @@
-/*
- * BSD-style license; for more info see http://pmd.sourceforge.net/license.html
- */
-
-package net.sourceforge.pmd.lang.ast.impl.javacc;
-
-import java.io.IOException;
-import java.io.Reader;
-import java.util.function.Function;
-
-import org.apache.commons.io.IOUtils;
-
-import net.sourceforge.pmd.lang.ast.CharStream;
-import net.sourceforge.pmd.util.document.TextDocument;
-import net.sourceforge.pmd.util.document.CpdCompat;
-
-public final class CharStreamFactory {
-
-    private CharStreamFactory() {
-        // util class
-    }
-
-    /**
-     * A char stream that doesn't perform any escape translation.
-     */
-    public static CharStream simpleCharStream(Reader input) {
-        return simpleCharStream(input, JavaccTokenDocument::new);
-    }
-
-    /**
-     * A char stream that doesn't perform any escape translation.
-     */
-    public static CharStream simpleCharStream(Reader input, Function<? super TextDocument, ? extends JavaccTokenDocument> documentMaker) {
-        String source = toString(input);
-        JavaccTokenDocument document = documentMaker.apply(TextDocument.readOnlyString(source, CpdCompat.dummyVersion()));
-        return new SimpleCharStream(document);
-    }
-
-    /**
-     * A char stream that translates java unicode sequences.
-     */
-    public static CharStream javaCharStream(Reader input) {
-        return javaCharStream(input, JavaccTokenDocument::new);
-    }
-
-    /**
-     * A char stream that translates java unicode sequences.
-     */
-    public static CharStream javaCharStream(Reader input, Function<? super TextDocument, ? extends JavaccTokenDocument> documentMaker) {
-        String source = toString(input);
-        JavaccTokenDocument tokens = documentMaker.apply(TextDocument.readOnlyString(source, CpdCompat.dummyVersion()));
-        return new JavaCharStream(tokens);
-    }
-
-    /**
-     * @deprecated This shouldn't be used. IOExceptions should be handled properly,
-     *     ie it should be expected that creating a parse may throw an IOException,
-     *     in both CPD and PMD
-     */
-    @Deprecated
-    public static String toString(Reader dstream) {
-        try (Reader r = dstream) {
-            return IOUtils.toString(r);
-        } catch (IOException e) {
-            throw new RuntimeException(e);
-        }
-    }
-
-}
diff --git a/pmd-core/src/main/java/net/sourceforge/pmd/lang/ast/impl/javacc/JavaCharStream.java b/pmd-core/src/main/java/net/sourceforge/pmd/lang/ast/impl/javacc/JavaCharStream.java
deleted file mode 100644
index 87f7d6b059..0000000000
--- a/pmd-core/src/main/java/net/sourceforge/pmd/lang/ast/impl/javacc/JavaCharStream.java
+++ /dev/null
@@ -1,119 +0,0 @@
-/*
- * BSD-style license; for more info see http://pmd.sourceforge.net/license.html
- */
-
-package net.sourceforge.pmd.lang.ast.impl.javacc;
-
-import java.io.EOFException;
-import java.io.IOException;
-
-import net.sourceforge.pmd.util.document.Chars;
-
-/**
- * This stream buffers the whole file in memory before parsing,
- * and track start/end offsets of tokens. This allows building {@link JavaccToken}.
- * The buffer is assumed to be composed of only ASCII characters,
- * and the stream unescapes Unicode escapes. The {@link #getTokenDocument() token document}
- * stores the original file with escapes and all.
- */
-public class JavaCharStream extends JavaCharStreamBase {
-
-    // full text with nothing escaped and all
-    private final Chars fullText;
-    private final JavaccTokenDocument document;
-
-    private int[] startOffsets;
-
-    public JavaCharStream(JavaccTokenDocument document) {
-        super(document.getTextDocument().newReader());
-        this.fullText = document.getFullText();
-        this.document = document;
-        this.startOffsets = new int[bufsize];
-        maxNextCharInd = fullText.length();
-
-        nextCharBuf = null;
-    }
-
-    @Override
-    protected void ExpandBuff(boolean wrapAround) {
-        int[] newStartOffsets = new int[bufsize + 2048];
-
-        if (wrapAround) {
-            System.arraycopy(startOffsets, tokenBegin, newStartOffsets, 0, bufsize - tokenBegin);
-            System.arraycopy(startOffsets, 0, newStartOffsets, bufsize - tokenBegin, bufpos);
-            startOffsets = newStartOffsets;
-        } else {
-            System.arraycopy(startOffsets, tokenBegin, newStartOffsets, 0, bufsize - tokenBegin);
-            startOffsets = newStartOffsets;
-        }
-
-        super.ExpandBuff(wrapAround);
-    }
-
-    @Override
-    protected void UpdateLineColumn(char c) {
-        startOffsets[bufpos] = nextCharInd;
-        super.UpdateLineColumn(c);
-    }
-
-    @Override
-    public int getStartOffset() {
-        return startOffsets[tokenBegin];
-    }
-
-    @Override
-    public int getEndOffset() {
-        if (isAtEof()) {
-            return fullText.length();
-        } else {
-            return startOffsets[bufpos] + 1; // + 1 for exclusive
-        }
-    }
-
-    @Override
-    public JavaccTokenDocument getTokenDocument() {
-        return document;
-    }
-
-    @Override
-    public String GetImage() {
-        if (bufpos >= tokenBegin) {
-            return new String(buffer, tokenBegin, bufpos - tokenBegin + 1);
-        } else {
-            return new String(buffer, tokenBegin, bufsize - tokenBegin)
-                + new String(buffer, 0, bufpos + 1);
-        }
-    }
-
-    @Override
-    protected char ReadByte() throws IOException {
-        ++nextCharInd;
-
-        if (isAtEof()) {
-            if (bufpos != 0) {
-                --bufpos;
-                if (bufpos < 0) {
-                    bufpos += bufsize;
-                }
-            } else {
-                bufline[bufpos] = line;
-                bufcolumn[bufpos] = column;
-                startOffsets[bufpos] = fullText.length();
-            }
-            throw new EOFException();
-        }
-
-        return fullText.charAt(nextCharInd);
-    }
-
-    private boolean isAtEof() {
-        return nextCharInd >= fullText.length();
-    }
-
-
-    @Override
-    protected void FillBuff() {
-        throw new IllegalStateException("Buffer shouldn't be refilled");
-    }
-
-}
diff --git a/pmd-core/src/main/java/net/sourceforge/pmd/lang/ast/impl/javacc/JavaccTokenDocument.java b/pmd-core/src/main/java/net/sourceforge/pmd/lang/ast/impl/javacc/JavaccTokenDocument.java
index bb5c3c608c..8990aacd04 100644
--- a/pmd-core/src/main/java/net/sourceforge/pmd/lang/ast/impl/javacc/JavaccTokenDocument.java
+++ b/pmd-core/src/main/java/net/sourceforge/pmd/lang/ast/impl/javacc/JavaccTokenDocument.java
@@ -9,6 +9,9 @@ import org.checkerframework.checker.nullness.qual.Nullable;
 
 import net.sourceforge.pmd.lang.ast.CharStream;
 import net.sourceforge.pmd.lang.ast.impl.TokenDocument;
+import net.sourceforge.pmd.lang.ast.impl.io.EscapeAwareReader;
+import net.sourceforge.pmd.lang.ast.impl.io.NewCharStream;
+import net.sourceforge.pmd.util.document.Chars;
 import net.sourceforge.pmd.util.document.TextDocument;
 
 /**
@@ -23,6 +26,16 @@ public class JavaccTokenDocument extends TokenDocument<JavaccToken> {
         super(textDocument);
     }
 
+
+    public EscapeAwareReader newReader(Chars text) {
+        return new EscapeAwareReader(text);
+    }
+
+    public CharStream newCharStream() {
+        return NewCharStream.open(this);
+    }
+
+
     /**
      * Open the document. This is only meant to be used by a Javacc-generated
      * parser.
diff --git a/pmd-core/src/main/java/net/sourceforge/pmd/lang/ast/impl/javacc/SimpleCharStream.java b/pmd-core/src/main/java/net/sourceforge/pmd/lang/ast/impl/javacc/SimpleCharStream.java
deleted file mode 100644
index d453b34797..0000000000
--- a/pmd-core/src/main/java/net/sourceforge/pmd/lang/ast/impl/javacc/SimpleCharStream.java
+++ /dev/null
@@ -1,20 +0,0 @@
-/*
- * BSD-style license; for more info see http://pmd.sourceforge.net/license.html
- */
-
-package net.sourceforge.pmd.lang.ast.impl.javacc;
-
-/**
- * A char stream that does not perform unicode escaping.
- */
-public class SimpleCharStream extends JavaCharStream {
-
-    public SimpleCharStream(JavaccTokenDocument document) {
-        super(document);
-    }
-
-    @Override
-    protected boolean doEscape() {
-        return false;
-    }
-}
diff --git a/pmd-cpp/src/main/java/net/sourceforge/pmd/cpd/CPPTokenizer.java b/pmd-cpp/src/main/java/net/sourceforge/pmd/cpd/CPPTokenizer.java
index 5b145f05a7..6aa98771b6 100644
--- a/pmd-cpp/src/main/java/net/sourceforge/pmd/cpd/CPPTokenizer.java
+++ b/pmd-cpp/src/main/java/net/sourceforge/pmd/cpd/CPPTokenizer.java
@@ -6,18 +6,21 @@ package net.sourceforge.pmd.cpd;
 
 import java.io.BufferedReader;
 import java.io.IOException;
-import java.io.Reader;
-import java.io.StringReader;
 import java.util.Properties;
 
+import org.checkerframework.checker.nullness.qual.Nullable;
+
 import net.sourceforge.pmd.PMD;
 import net.sourceforge.pmd.cpd.internal.JavaCCTokenizer;
 import net.sourceforge.pmd.lang.TokenManager;
 import net.sourceforge.pmd.lang.ast.CharStream;
+import net.sourceforge.pmd.lang.ast.impl.io.EscapeAwareReader;
 import net.sourceforge.pmd.lang.ast.impl.javacc.JavaccToken;
-import net.sourceforge.pmd.lang.cpp.ast.CppCharStream;
+import net.sourceforge.pmd.lang.ast.impl.javacc.JavaccTokenDocument;
+import net.sourceforge.pmd.lang.cpp.ast.CppEscapeReader;
 import net.sourceforge.pmd.lang.cpp.ast.CppTokenKinds;
-import net.sourceforge.pmd.util.IOUtil;
+import net.sourceforge.pmd.util.document.Chars;
+import net.sourceforge.pmd.util.document.TextDocument;
 
 /**
  * The C++ tokenizer.
@@ -32,6 +35,12 @@ public class CPPTokenizer extends JavaCCTokenizer {
         setProperties(new Properties()); // set the defaults
     }
 
+    // override to make it visible in tests
+    @Override
+    protected TokenManager<JavaccToken> getLexerForSource(SourceCode sourceCode) throws IOException {
+        return super.getLexerForSource(sourceCode);
+    }
+
     /**
      * Sets the possible options for the C++ tokenizer.
      *
@@ -54,12 +63,12 @@ public class CPPTokenizer extends JavaCCTokenizer {
         }
     }
 
-    private String maybeSkipBlocks(String test) throws IOException {
+    private Chars maybeSkipBlocks(Chars test) throws IOException {
         if (!skipBlocks) {
             return test;
         }
 
-        try (BufferedReader reader = new BufferedReader(new StringReader(test))) {
+        try (BufferedReader reader = new BufferedReader(test.newReader())) {
             StringBuilder filtered = new StringBuilder(test.length());
             String line;
             boolean skip = false;
@@ -75,26 +84,28 @@ public class CPPTokenizer extends JavaCCTokenizer {
                 // always add a new line to keep the line-numbering
                 filtered.append(PMD.EOL);
             }
-            return filtered.toString();
+            return Chars.wrap(filtered, false);
         }
     }
 
 
     @Override
-    protected CharStream makeCharStream(Reader sourceCode) {
-        return CppCharStream.newCppCharStream(sourceCode);
+    protected JavaccTokenDocument newTokenDoc(TextDocument textDoc) {
+        return new JavaccTokenDocument(textDoc) {
+            @Override
+            public EscapeAwareReader newReader(Chars text) {
+                return new CppEscapeReader(text);
+            }
+
+            @Override
+            protected @Nullable String describeKindImpl(int kind) {
+                return CppTokenKinds.describe(kind);
+            }
+        };
     }
 
     @Override
     protected TokenManager<JavaccToken> makeLexerImpl(CharStream sourceCode) {
         return CppTokenKinds.newTokenManager(sourceCode);
     }
-
-    @SuppressWarnings("PMD.CloseResource")
-    @Override
-    protected TokenManager<JavaccToken> getLexerForSource(SourceCode sourceCode) throws IOException {
-        Reader reader = IOUtil.skipBOM(new StringReader(maybeSkipBlocks(sourceCode.getCodeBuffer().toString())));
-        CharStream charStream = makeCharStream(reader);
-        return makeLexerImpl(charStream);
-    }
 }
diff --git a/pmd-cpp/src/main/java/net/sourceforge/pmd/lang/cpp/ast/CppCharStream.java b/pmd-cpp/src/main/java/net/sourceforge/pmd/lang/cpp/ast/CppCharStream.java
deleted file mode 100644
index c7f7aec7c2..0000000000
--- a/pmd-cpp/src/main/java/net/sourceforge/pmd/lang/cpp/ast/CppCharStream.java
+++ /dev/null
@@ -1,80 +0,0 @@
-/*
- * BSD-style license; for more info see http://pmd.sourceforge.net/license.html
- */
-
-package net.sourceforge.pmd.lang.cpp.ast;
-
-import java.io.IOException;
-import java.io.Reader;
-import java.util.regex.Pattern;
-
-import org.checkerframework.checker.nullness.qual.Nullable;
-
-import net.sourceforge.pmd.lang.ast.impl.javacc.CharStreamFactory;
-import net.sourceforge.pmd.lang.ast.impl.javacc.JavaccTokenDocument;
-import net.sourceforge.pmd.lang.ast.impl.javacc.SimpleCharStream;
-import net.sourceforge.pmd.util.document.TextDocument;
-import net.sourceforge.pmd.util.document.CpdCompat;
-
-/**
- * A SimpleCharStream, that supports the continuation of lines via backslash+newline,
- * which is used in C/C++.
- *
- * @author Andreas Dangel
- */
-public class CppCharStream extends SimpleCharStream {
-
-    private static final Pattern CONTINUATION = Pattern.compile("\\\\\\n|\\\\\\r\\n");
-    private static final char BACKSLASH = '\\';
-    private static final char NEWLINE = '\n';
-    private static final char CARRIAGE_RETURN = '\r';
-
-    CppCharStream(JavaccTokenDocument document) {
-        super(document);
-    }
-
-
-    @Override
-    public char readChar() throws IOException {
-        char c = super.readChar();
-        if (c == BACKSLASH) {
-            char c1 = super.readChar();
-            if (c1 == NEWLINE) {
-                c = super.readChar();
-            } else if (c1 == CARRIAGE_RETURN) {
-                char c2 = super.readChar();
-                if (c2 == NEWLINE) {
-                    c = super.readChar();
-                } else {
-                    backup(2);
-                }
-            } else {
-                backup(1);
-            }
-        }
-        return c;
-    }
-
-    @Override
-    public char[] GetSuffix(int len) {
-        String image = GetImage();
-        return image.substring(image.length() - len, image.length()).toCharArray();
-    }
-
-    @Override
-    public String GetImage() {
-        String image = super.GetImage();
-        return CONTINUATION.matcher(image).replaceAll("");
-    }
-
-    public static CppCharStream newCppCharStream(Reader dstream) {
-        String source = CharStreamFactory.toString(dstream);
-        JavaccTokenDocument document = new JavaccTokenDocument(TextDocument.readOnlyString(source, CpdCompat.dummyVersion())) {
-            @Override
-            protected @Nullable String describeKindImpl(int kind) {
-                return CppTokenKinds.describe(kind);
-            }
-        };
-        return new CppCharStream(document);
-    }
-}
diff --git a/pmd-cpp/src/main/java/net/sourceforge/pmd/lang/cpp/ast/CppEscapeReader.java b/pmd-cpp/src/main/java/net/sourceforge/pmd/lang/cpp/ast/CppEscapeReader.java
new file mode 100644
index 0000000000..378dfb1011
--- /dev/null
+++ b/pmd-cpp/src/main/java/net/sourceforge/pmd/lang/cpp/ast/CppEscapeReader.java
@@ -0,0 +1,61 @@
+/*
+ * BSD-style license; for more info see http://pmd.sourceforge.net/license.html
+ */
+
+package net.sourceforge.pmd.lang.cpp.ast;
+
+import static java.lang.Integer.min;
+
+import java.io.IOException;
+
+import net.sourceforge.pmd.lang.ast.impl.io.EscapeAwareReader;
+import net.sourceforge.pmd.util.document.Chars;
+
+public class CppEscapeReader extends EscapeAwareReader {
+
+    private static final char NEWLINE = '\n';
+    private static final char CARRIAGE_RETURN = '\r';
+
+    private int savedNotEscapeSpecialEnd = Integer.MAX_VALUE;
+
+    public CppEscapeReader(Chars input) {
+        super(input);
+    }
+
+    @Override
+    protected int gobbleMaxWithoutEscape(int bufpos, int maxReadahead) throws IOException {
+        int off = bufpos;
+        int max = min(bufpos + maxReadahead, input.length());
+        boolean noBackSlash = false;
+        int notEscapeEnd = this.savedNotEscapeSpecialEnd;
+        while (off < max && (noBackSlash = input.charAt(off) != '\\' || notEscapeEnd < off)) {
+            off++;
+        }
+
+        if (noBackSlash) {
+            this.bufpos = off;
+            return off;
+        }
+
+        final int backSlackOff = off++;
+        if (input.charAt(off) == NEWLINE) {
+            recordEscape(backSlackOff, 2);
+            this.bufpos = off + 2;
+            return backSlackOff;
+        } else if (input.charAt(off) == CARRIAGE_RETURN) {
+            if (input.charAt(++off) == NEWLINE) {
+                recordEscape(backSlackOff, 3);
+                this.bufpos = off + 3;
+                return backSlackOff;
+            }
+        }
+
+        // not an escape sequence
+        int min = min(bufpos + maxReadahead, off);
+        // save the number of backslashes that are part of the escape,
+        // might have been cut in half by the maxReadahead
+        this.savedNotEscapeSpecialEnd = min < off ? off : Integer.MAX_VALUE;
+        this.bufpos = min;
+        return min;
+    }
+}
diff --git a/pmd-cpp/src/test/java/net/sourceforge/pmd/lang/cpp/ast/CppCharStreamTest.java b/pmd-cpp/src/test/java/net/sourceforge/pmd/cpd/CppCharStreamTest.java
similarity index 61%
rename from pmd-cpp/src/test/java/net/sourceforge/pmd/lang/cpp/ast/CppCharStreamTest.java
rename to pmd-cpp/src/test/java/net/sourceforge/pmd/cpd/CppCharStreamTest.java
index 8196b4b079..9d6df7dd49 100644
--- a/pmd-cpp/src/test/java/net/sourceforge/pmd/lang/cpp/ast/CppCharStreamTest.java
+++ b/pmd-cpp/src/test/java/net/sourceforge/pmd/cpd/CppCharStreamTest.java
@@ -2,38 +2,47 @@
  * BSD-style license; for more info see http://pmd.sourceforge.net/license.html
  */
 
-package net.sourceforge.pmd.lang.cpp.ast;
+package net.sourceforge.pmd.cpd;
 
 import static org.junit.Assert.assertEquals;
 
 import java.io.IOException;
-import java.io.StringReader;
 
+import org.checkerframework.checker.nullness.qual.NonNull;
 import org.junit.Test;
 
+import net.sourceforge.pmd.lang.ast.CharStream;
+import net.sourceforge.pmd.lang.ast.impl.io.NewCharStream;
+import net.sourceforge.pmd.util.document.TextDocument;
+
 public class CppCharStreamTest {
 
+    @NonNull
+    public CharStream charStreamFor(String source) {
+        return NewCharStream.open(new CPPTokenizer().newTokenDoc(TextDocument.readOnlyString(source)));
+    }
+
     @Test
     public void testContinuationUnix() throws IOException {
-        CppCharStream stream = CppCharStream.newCppCharStream(new StringReader("a\\\nb"));
+        CharStream stream = charStreamFor("a\\\nb");
         assertStream(stream, "ab");
     }
 
     @Test
     public void testContinuationWindows() throws IOException {
         // note that the \r is normalized to a \n by the TextFile
-        CppCharStream stream = CppCharStream.newCppCharStream(new StringReader("a\\\r\nb"));
+        CharStream stream = charStreamFor("a\\\r\nb");
         assertStream(stream, "ab");
     }
 
     @Test
     public void testBackup() throws IOException {
         // note that the \r is normalized to a \n by the TextFile
-        CppCharStream stream = CppCharStream.newCppCharStream(new StringReader("a\\b\\qc"));
+        CharStream stream = charStreamFor("a\\b\\\rc");
         assertStream(stream, "a\\b\\qc");
     }
 
-    private void assertStream(CppCharStream stream, String token) throws IOException {
+    private void assertStream(CharStream stream, String token) throws IOException {
         char c = stream.BeginToken();
         assertEquals(token.charAt(0), c);
         for (int i = 1; i < token.length(); i++) {
diff --git a/pmd-java/src/main/java/net/sourceforge/pmd/cpd/JavaTokenizer.java b/pmd-java/src/main/java/net/sourceforge/pmd/cpd/JavaTokenizer.java
index b57c509d2e..83c69ae100 100644
--- a/pmd-java/src/main/java/net/sourceforge/pmd/cpd/JavaTokenizer.java
+++ b/pmd-java/src/main/java/net/sourceforge/pmd/cpd/JavaTokenizer.java
@@ -5,7 +5,6 @@
 package net.sourceforge.pmd.cpd;
 
 import java.io.IOException;
-import java.io.Reader;
 import java.util.Deque;
 import java.util.LinkedList;
 import java.util.Properties;
@@ -15,10 +14,11 @@ import net.sourceforge.pmd.cpd.token.JavaCCTokenFilter;
 import net.sourceforge.pmd.cpd.token.TokenFilter;
 import net.sourceforge.pmd.lang.TokenManager;
 import net.sourceforge.pmd.lang.ast.CharStream;
-import net.sourceforge.pmd.lang.ast.impl.javacc.CharStreamFactory;
 import net.sourceforge.pmd.lang.ast.impl.javacc.JavaccToken;
+import net.sourceforge.pmd.lang.ast.impl.javacc.JavaccTokenDocument;
 import net.sourceforge.pmd.lang.java.ast.InternalApiBridge;
 import net.sourceforge.pmd.lang.java.ast.JavaTokenKinds;
+import net.sourceforge.pmd.util.document.TextDocument;
 
 public class JavaTokenizer extends JavaCCTokenizer {
 
@@ -44,8 +44,8 @@ public class JavaTokenizer extends JavaCCTokenizer {
     }
 
     @Override
-    protected CharStream makeCharStream(Reader sourceCode) {
-        return CharStreamFactory.javaCharStream(sourceCode, InternalApiBridge::javaTokenDoc);
+    protected JavaccTokenDocument newTokenDoc(TextDocument textDoc) {
+        return InternalApiBridge.javaTokenDoc(textDoc);
     }
 
     @Override
diff --git a/pmd-java/src/main/java/net/sourceforge/pmd/lang/java/ast/JavaParser.java b/pmd-java/src/main/java/net/sourceforge/pmd/lang/java/ast/JavaParser.java
index d7af60de77..a3bf759260 100644
--- a/pmd-java/src/main/java/net/sourceforge/pmd/lang/java/ast/JavaParser.java
+++ b/pmd-java/src/main/java/net/sourceforge/pmd/lang/java/ast/JavaParser.java
@@ -6,7 +6,6 @@ package net.sourceforge.pmd.lang.java.ast;
 
 import net.sourceforge.pmd.lang.ast.CharStream;
 import net.sourceforge.pmd.lang.ast.ParseException;
-import net.sourceforge.pmd.lang.ast.impl.javacc.JavaCharStream;
 import net.sourceforge.pmd.lang.ast.impl.javacc.JavaccTokenDocument;
 import net.sourceforge.pmd.lang.ast.impl.javacc.JjtreeParserAdapter;
 import net.sourceforge.pmd.lang.java.ast.internal.LanguageLevelChecker;
@@ -32,11 +31,6 @@ public class JavaParser extends JjtreeParserAdapter<ASTCompilationUnit> {
         return new JavaTokenDocument(textDocument);
     }
 
-    @Override
-    protected CharStream newCharStream(JavaccTokenDocument tokenDocument) {
-        return new JavaCharStream(tokenDocument);
-    }
-
     @Override
     protected ASTCompilationUnit parseImpl(CharStream cs, ParserTask task) throws ParseException {
         JavaParserImpl parser = new JavaParserImpl(cs);
diff --git a/pmd-java/src/main/java/net/sourceforge/pmd/lang/java/ast/JavaTokenDocument.java b/pmd-java/src/main/java/net/sourceforge/pmd/lang/java/ast/JavaTokenDocument.java
index e00a9102d6..a3a67d2fb0 100644
--- a/pmd-java/src/main/java/net/sourceforge/pmd/lang/java/ast/JavaTokenDocument.java
+++ b/pmd-java/src/main/java/net/sourceforge/pmd/lang/java/ast/JavaTokenDocument.java
@@ -15,8 +15,11 @@ import static net.sourceforge.pmd.lang.java.ast.JavaTokenKinds.WHITESPACE;
 import org.checkerframework.checker.nullness.qual.Nullable;
 
 import net.sourceforge.pmd.lang.ast.CharStream;
+import net.sourceforge.pmd.lang.ast.impl.io.EscapeAwareReader;
+import net.sourceforge.pmd.lang.ast.impl.io.JavaInputReader;
 import net.sourceforge.pmd.lang.ast.impl.javacc.JavaccToken;
 import net.sourceforge.pmd.lang.ast.impl.javacc.JavaccTokenDocument;
+import net.sourceforge.pmd.util.document.Chars;
 import net.sourceforge.pmd.util.document.TextDocument;
 
 /**
@@ -43,6 +46,11 @@ final class JavaTokenDocument extends JavaccTokenDocument {
     }
 
 
+    @Override
+    public EscapeAwareReader newReader(Chars text) {
+        return new JavaInputReader(text);
+    }
+
     @Override
     protected @Nullable String describeKindImpl(int kind) {
         return JavaTokenKinds.describe(kind);
diff --git a/pmd-jsp/src/main/java/net/sourceforge/pmd/cpd/JSPTokenizer.java b/pmd-jsp/src/main/java/net/sourceforge/pmd/cpd/JSPTokenizer.java
index b6383affca..fcb65f40f2 100644
--- a/pmd-jsp/src/main/java/net/sourceforge/pmd/cpd/JSPTokenizer.java
+++ b/pmd-jsp/src/main/java/net/sourceforge/pmd/cpd/JSPTokenizer.java
@@ -4,15 +4,16 @@
 
 package net.sourceforge.pmd.cpd;
 
-import java.io.IOException;
-import java.io.Reader;
-
 import net.sourceforge.pmd.cpd.internal.JavaCCTokenizer;
 import net.sourceforge.pmd.lang.TokenManager;
 import net.sourceforge.pmd.lang.ast.CharStream;
-import net.sourceforge.pmd.lang.ast.impl.javacc.CharStreamFactory;
+import net.sourceforge.pmd.lang.ast.impl.io.EscapeAwareReader;
+import net.sourceforge.pmd.lang.ast.impl.io.JavaInputReader;
 import net.sourceforge.pmd.lang.ast.impl.javacc.JavaccToken;
+import net.sourceforge.pmd.lang.ast.impl.javacc.JavaccTokenDocument;
 import net.sourceforge.pmd.lang.jsp.ast.JspTokenKinds;
+import net.sourceforge.pmd.util.document.Chars;
+import net.sourceforge.pmd.util.document.TextDocument;
 
 public class JSPTokenizer extends JavaCCTokenizer {
 
@@ -22,7 +23,13 @@ public class JSPTokenizer extends JavaCCTokenizer {
     }
 
     @Override
-    protected CharStream makeCharStream(Reader sourceCode) throws IOException {
-        return CharStreamFactory.javaCharStream(sourceCode);
+    protected JavaccTokenDocument newTokenDoc(TextDocument textDoc) {
+        return new JavaccTokenDocument(textDoc) {
+            @Override
+            public EscapeAwareReader newReader(Chars text) {
+                return new JavaInputReader(text);
+            }
+        };
     }
+
 }
diff --git a/pmd-plsql/src/main/java/net/sourceforge/pmd/lang/plsql/ast/PLSQLParser.java b/pmd-plsql/src/main/java/net/sourceforge/pmd/lang/plsql/ast/PLSQLParser.java
index 65cb990593..ed2cb3e08d 100644
--- a/pmd-plsql/src/main/java/net/sourceforge/pmd/lang/plsql/ast/PLSQLParser.java
+++ b/pmd-plsql/src/main/java/net/sourceforge/pmd/lang/plsql/ast/PLSQLParser.java
@@ -11,7 +11,6 @@ import net.sourceforge.pmd.lang.ast.ParseException;
 import net.sourceforge.pmd.lang.ast.impl.javacc.JavaccTokenDocument;
 import net.sourceforge.pmd.lang.ast.impl.javacc.JjtreeParserAdapter;
 import net.sourceforge.pmd.util.document.TextDocument;
-import net.sourceforge.pmd.util.document.TextDocument;
 
 public class PLSQLParser extends JjtreeParserAdapter<ASTInput> {
 
diff --git a/pmd-python/src/main/java/net/sourceforge/pmd/cpd/PythonTokenizer.java b/pmd-python/src/main/java/net/sourceforge/pmd/cpd/PythonTokenizer.java
index 8e9a9a3d46..9e61ea2067 100644
--- a/pmd-python/src/main/java/net/sourceforge/pmd/cpd/PythonTokenizer.java
+++ b/pmd-python/src/main/java/net/sourceforge/pmd/cpd/PythonTokenizer.java
@@ -4,7 +4,6 @@
 
 package net.sourceforge.pmd.cpd;
 
-import java.io.Reader;
 import java.util.regex.Pattern;
 
 import org.checkerframework.checker.nullness.qual.Nullable;
@@ -12,7 +11,6 @@ import org.checkerframework.checker.nullness.qual.Nullable;
 import net.sourceforge.pmd.cpd.internal.JavaCCTokenizer;
 import net.sourceforge.pmd.lang.TokenManager;
 import net.sourceforge.pmd.lang.ast.CharStream;
-import net.sourceforge.pmd.lang.ast.impl.javacc.CharStreamFactory;
 import net.sourceforge.pmd.lang.ast.impl.javacc.JavaccToken;
 import net.sourceforge.pmd.lang.ast.impl.javacc.JavaccTokenDocument;
 import net.sourceforge.pmd.lang.python.ast.PythonTokenKinds;
@@ -31,8 +29,8 @@ public class PythonTokenizer extends JavaCCTokenizer {
     }
 
     @Override
-    protected CharStream makeCharStream(Reader sourceCode) {
-        return CharStreamFactory.simpleCharStream(sourceCode, PythonTokenDocument::new);
+    protected JavaccTokenDocument newTokenDoc(TextDocument textDoc) {
+        return new PythonTokenDocument(textDoc);
     }
 
     private static class PythonTokenDocument extends JavaccTokenDocument {
diff --git a/pmd-visualforce/src/main/java/net/sourceforge/pmd/cpd/VfTokenizer.java b/pmd-visualforce/src/main/java/net/sourceforge/pmd/cpd/VfTokenizer.java
index f4166db699..8fa9212144 100644
--- a/pmd-visualforce/src/main/java/net/sourceforge/pmd/cpd/VfTokenizer.java
+++ b/pmd-visualforce/src/main/java/net/sourceforge/pmd/cpd/VfTokenizer.java
@@ -4,15 +4,16 @@
 
 package net.sourceforge.pmd.cpd;
 
-import java.io.IOException;
-import java.io.Reader;
-
 import net.sourceforge.pmd.cpd.internal.JavaCCTokenizer;
 import net.sourceforge.pmd.lang.TokenManager;
 import net.sourceforge.pmd.lang.ast.CharStream;
-import net.sourceforge.pmd.lang.ast.impl.javacc.CharStreamFactory;
+import net.sourceforge.pmd.lang.ast.impl.io.EscapeAwareReader;
+import net.sourceforge.pmd.lang.ast.impl.io.JavaInputReader;
 import net.sourceforge.pmd.lang.ast.impl.javacc.JavaccToken;
+import net.sourceforge.pmd.lang.ast.impl.javacc.JavaccTokenDocument;
 import net.sourceforge.pmd.lang.vf.ast.VfTokenKinds;
+import net.sourceforge.pmd.util.document.Chars;
+import net.sourceforge.pmd.util.document.TextDocument;
 
 /**
  * @author sergey.gorbaty
@@ -25,7 +26,13 @@ public class VfTokenizer extends JavaCCTokenizer {
     }
 
     @Override
-    protected CharStream makeCharStream(Reader sourceCode) throws IOException {
-        return CharStreamFactory.javaCharStream(sourceCode);
+    protected JavaccTokenDocument newTokenDoc(TextDocument textDoc) {
+        return new JavaccTokenDocument(textDoc) {
+            @Override
+            public EscapeAwareReader newReader(Chars text) {
+                return new JavaInputReader(text);
+            }
+        };
     }
+
 }