Extract changes from #2166 to 7.0.x

* Make Java nodes text-available * Introduce shared JavaccToken in pmd-core * Use factory to produce char streams Tests are still on java-grammar, since they use the DSL & newer AST structure. This is to prepare for other changes that concern all javacc languages and should not be done on java-grammar
2020-01-10 18:26:50 +01:00
parent d6296bd85f
commit 99700d7526
56 changed files with 1119 additions and 462 deletions
--- a/pmd-core/src/main/ant/alljavacc.xml
+++ b/pmd-core/src/main/ant/alljavacc.xml
@ -2,6 +2,17 @@

    <property name="javacc-home.path" value="target/lib" />

+    <property name="tmp-package" value="net.sourceforge.pmd.lang.ast.dummy" />
+
+    <property name="tmp-package.dir" value="${target}/net/sourceforge/pmd/lang/ast/dummy" />
+
+    <property name="base-ast-package" value="net.sourceforge.pmd.lang.ast" />
+    <property name="base-ast-package.dir" value="${target}/net/sourceforge/pmd/lang/ast" />
+
+    <property name="target-package" value="${base-ast-package}" />
+    <property name="target-package.dir" value="${base-ast-package.dir}" />
+
+
    <target name="alljavacc"
            description="Generates all JavaCC aspects within PMD"
            depends="checkUpToDate,init,dummyjjtree,cleanup" />
@ -27,78 +38,98 @@

    <target name="dummyjjtree" description="Generates the reusable JavaCC aspects" unless="javaccBuildNotRequired">

-        <delete dir="${target}/net/sourceforge/pmd/lang/ast/dummy" />
-        <mkdir dir="${target}/net/sourceforge/pmd/lang/ast/dummy" />
+        <delete dir="${tmp-package.dir}" />
+        <mkdir dir="${tmp-package.dir}" />
        <echo>Using JavaCC home: ${javacc-home.path}</echo>
        <jjtree target="etc/grammar/dummy.jjt"
-                outputdirectory="${target}/net/sourceforge/pmd/lang/ast/dummy"
+                outputdirectory="${tmp-package.dir}"
                javacchome="${javacc-home.path}" />

        <!-- Generate CharStream interface -->
        <javacc usercharstream="true"
-                target="${target}/net/sourceforge/pmd/lang/ast/dummy/dummy.jj"
-                outputdirectory="${target}/net/sourceforge/pmd/lang/ast/dummy"
+                target="${tmp-package.dir}/dummy.jj"
+                outputdirectory="${tmp-package.dir}"
                javacchome="${javacc-home.path}" />
-        <replace file="${target}/net/sourceforge/pmd/lang/ast/dummy/CharStream.java"
-                 token="net.sourceforge.pmd.lang.ast.dummy"
-                 value="net.sourceforge.pmd.lang.ast" />
-        <move overwrite="true"
-              file="${target}/net/sourceforge/pmd/lang/ast/dummy/CharStream.java"
-              tofile="${target}/net/sourceforge/pmd/lang/ast/CharStream.java" />

        <!-- Generate ASCII w/ Unicode Escapes CharStream implementation -->
        <javacc usercharstream="false"
                unicodeinput="false"
                javaunicodeescape="true"
                static="false"
-                target="${target}/net/sourceforge/pmd/lang/ast/dummy/dummy.jj"
-                outputdirectory="${target}/net/sourceforge/pmd/lang/ast/dummy"
+                target="${tmp-package.dir}/dummy.jj"
+                outputdirectory="${tmp-package.dir}"
                javacchome="${javacc-home.path}" />
-        <replace file="${target}/net/sourceforge/pmd/lang/ast/dummy/JavaCharStream.java"
-                 token="net.sourceforge.pmd.lang.ast.dummy"
-                 value="net.sourceforge.pmd.lang.ast" />
-        <replace file="${target}/net/sourceforge/pmd/lang/ast/dummy/JavaCharStream.java"
-                 token="class JavaCharStream"
-                 value="class JavaCharStream implements CharStream" />
-        <move overwrite="true"
-              file="${target}/net/sourceforge/pmd/lang/ast/dummy/JavaCharStream.java"
-              tofile="${target}/net/sourceforge/pmd/lang/ast/JavaCharStream.java" />
+
+        <replace file="${tmp-package.dir}/JavaCharStream.java"
+                 token="${tmp-package}"
+                 value="${target-package}">
+            <fileset dir="${tmp-package.dir}">
+            </fileset>
+        </replace>
+
+
+        <!-- Patch JavaCharStream        -->
+
+        <replace file="${tmp-package.dir}/JavaCharStream.java"
+                 token="JavaCharStream"
+                 value="JavaCharStreamBase"/>
+        <replace file="${tmp-package.dir}/JavaCharStream.java"
+                 token="class JavaCharStreamBase"
+                 value="abstract class JavaCharStreamBase implements ${target-package}.CharStream" />
+        <replace file="${tmp-package.dir}/JavaCharStream.java"
+                 token="char c;"
+                 value="char c; beforeReadChar();" />
+         <replace file="${tmp-package.dir}/JavaCharStream.java"
+                 token="/** Read a character. */"
+                 value="protected void beforeReadChar() { }" />
+
+       <move overwrite="true"
+              file="${tmp-package.dir}/JavaCharStream.java"
+              tofile="${target-package.dir}/JavaCharStreamBase.java" />

        <!-- Generate ASCII w/o Unicode Escapes CharStream implementation -->
        <javacc usercharstream="false"
                unicodeinput="false"
                javaunicodeescape="false"
                static="false"
-                target="${target}/net/sourceforge/pmd/lang/ast/dummy/dummy.jj"
-                outputdirectory="${target}/net/sourceforge/pmd/lang/ast/dummy"
+                target="${tmp-package.dir}/dummy.jj"
+                outputdirectory="${tmp-package.dir}"
                javacchome="${javacc-home.path}" />
-        <replace file="${target}/net/sourceforge/pmd/lang/ast/dummy/SimpleCharStream.java"
-                 token="net.sourceforge.pmd.lang.ast.dummy"
-                 value="net.sourceforge.pmd.lang.ast" />
-        <replace file="${target}/net/sourceforge/pmd/lang/ast/dummy/SimpleCharStream.java"
+
+        <replace file="${tmp-package.dir}/SimpleCharStream.java"
                 token="public class SimpleCharStream"
                 value="public class SimpleCharStream implements CharStream" />
-        <move overwrite="true"
-              file="${target}/net/sourceforge/pmd/lang/ast/dummy/SimpleCharStream.java"
-              tofile="${target}/net/sourceforge/pmd/lang/ast/SimpleCharStream.java" />
-        <replace file="${target}/net/sourceforge/pmd/lang/ast/dummy/TokenMgrError.java"
-                 token="net.sourceforge.pmd.lang.ast.dummy"
-                 value="net.sourceforge.pmd.lang.ast" />
-        <replace file="${target}/net/sourceforge/pmd/lang/ast/dummy/TokenMgrError.java"
+
+        <replace file="${tmp-package.dir}/TokenMgrError.java"
+                 token="${target-package}"
+                 value="${base-ast-package}" />
+
+        <replace file="${tmp-package.dir}/TokenMgrError.java"
                 token="extends Error"
                 value="extends RuntimeException" />
-        <replace file="${target}/net/sourceforge/pmd/lang/ast/dummy/TokenMgrError.java"
+
+        <replace file="${tmp-package.dir}/TokenMgrError.java"
                 token="static final int"
                 value="public static final int" />
-        <replace file="${target}/net/sourceforge/pmd/lang/ast/dummy/TokenMgrError.java">
-            <replacetoken><![CDATA["Lexical error at line "]]></replacetoken>
-            <replacevalue><![CDATA["Lexical error in file " + AbstractTokenManager.getFileName() + " at line "]]></replacevalue>
-        </replace>
-        <move overwrite="true"
-              file="${target}/net/sourceforge/pmd/lang/ast/dummy/TokenMgrError.java"
-              tofile="${target}/net/sourceforge/pmd/lang/ast/TokenMgrError.java" />

-        <delete dir="${target}/net/sourceforge/pmd/lang/ast/dummy" />
+        <replace file="${tmp-package.dir}/TokenMgrError.java">
+            <replacetoken><![CDATA["Lexical error at line "]]></replacetoken>
+            <replacevalue>&quot;Lexical error in file &quot; + net.sourceforge.pmd.lang.ast.AbstractTokenManager.getFileName() + &quot; at line &quot;</replacevalue>
+        </replace>
+
+        <move overwrite="true"
+              file="${tmp-package.dir}/TokenMgrError.java"
+              tofile="${base-ast-package.dir}/TokenMgrError.java" />
+
+        <move overwrite="true"
+              todir="${target-package.dir}">
+            <fileset dir="${tmp-package.dir}">
+                <include name="SimpleCharStream.java" />
+                <include name="CharStream.java" />
+            </fileset>
+        </move>
+
+        <delete dir="${tmp-package.dir}" />

    </target>

--- a/pmd-core/src/main/java/net/sourceforge/pmd/lang/ast/AbstractTokenManager.java
+++ b/pmd-core/src/main/java/net/sourceforge/pmd/lang/ast/AbstractTokenManager.java
@ -1,4 +1,4 @@
-/**
+/*
 * BSD-style license; for more info see http://pmd.sourceforge.net/license.html
 */

--- a/pmd-core/src/main/java/net/sourceforge/pmd/lang/ast/GenericToken.java
+++ b/pmd-core/src/main/java/net/sourceforge/pmd/lang/ast/GenericToken.java
@ -23,11 +23,28 @@ public interface GenericToken {
    GenericToken getPreviousComment();

    /**
-     * Gets the token's text.
-     * @return the token's text
+     * Returns the token's text.
     */
    String getImage();

+    // TODO these default implementations are here for compatibility because
+    //  the functionality is only used in pmd-java for now, though it could
+    //  be ported. I prefer doing this as changing all the GenericToken in
+    //  pmd-java to JavaccToken
+
+
+    /** Inclusive start offset in the source file text. */
+    default int getStartInDocument() {
+        return -1;
+    }
+
+
+    /** Exclusive end offset in the source file text. */
+    default int getEndInDocument() {
+        return -1;
+    }
+
+
    /**
     * Gets the line where the token's region begins
     * @return a non-negative integer containing the begin line
--- a/pmd-core/src/main/java/net/sourceforge/pmd/lang/ast/JavaCharStream.java
+++ b/pmd-core/src/main/java/net/sourceforge/pmd/lang/ast/JavaCharStream.java
@ -0,0 +1,117 @@
+/*
+ * BSD-style license; for more info see http://pmd.sourceforge.net/license.html
+ */
+
+package net.sourceforge.pmd.lang.ast;
+
+import java.io.IOException;
+import java.io.Reader;
+import java.io.StringReader;
+
+import org.apache.commons.io.IOUtils;
+
+import net.sourceforge.pmd.lang.ast.impl.TokenDocument;
+import net.sourceforge.pmd.lang.ast.impl.javacc.JavaccToken;
+
+/**
+ * This stream buffers the whole file in memory before parsing,
+ * and track start/end offsets of tokens. This allows building {@link JavaccToken}.
+ * The buffer is assumed to be composed of only ASCII characters,
+ * and the stream unescapes Unicode escapes. The {@link #getTokenDocument() token document}
+ * stores the original file with escapes and all.
+ *
+ * TODO this is to be moved into the impl.javacc subpackage
+ */
+public class JavaCharStream extends JavaCharStreamBase {
+
+    // full text with nothing escaped and all
+    private final String fullText;
+    private final TokenDocument document;
+
+    private int[] startOffsets;
+
+    public JavaCharStream(String fulltext) {
+        super(new StringReader(fulltext));
+        this.fullText = fulltext;
+        this.document = new TokenDocument(fullText);
+        this.startOffsets = new int[bufsize];
+        maxNextCharInd = fullText.length();
+
+        nextCharBuf = null;
+    }
+
+    public JavaCharStream(Reader toDump) {
+        this(toString(toDump));
+    }
+
+    @Override
+    protected void ExpandBuff(boolean wrapAround) {
+        int[] newStartOffsets = new int[bufsize + 2048];
+
+        if (wrapAround) {
+            System.arraycopy(startOffsets, tokenBegin, newStartOffsets, 0, bufsize - tokenBegin);
+            System.arraycopy(startOffsets, 0, newStartOffsets, bufsize - tokenBegin, bufpos);
+            startOffsets = newStartOffsets;
+        } else {
+            System.arraycopy(startOffsets, tokenBegin, newStartOffsets, 0, bufsize - tokenBegin);
+            startOffsets = newStartOffsets;
+        }
+
+        super.ExpandBuff(wrapAround);
+    }
+
+    @Override
+    protected void beforeReadChar() {
+        if (bufpos + 1 < available) {
+            startOffsets[bufpos + 1] = nextCharInd + 1;
+        }
+    }
+
+    public int getStartOffset() {
+        return startOffsets[tokenBegin];
+    }
+
+    public int getEndOffset() {
+        if (bufpos >= startOffsets.length) {
+            return fullText.length();
+        } else {
+            return startOffsets[bufpos] + 1; // + 1 for exclusive
+        }
+    }
+
+    public TokenDocument getTokenDocument() {
+        return document;
+    }
+
+    @Override
+    protected char ReadByte() throws IOException {
+        ++nextCharInd;
+
+        if (nextCharInd >= fullText.length()) {
+            if (bufpos != 0) {
+                --bufpos;
+                backup(0);
+            } else {
+                bufline[bufpos] = line;
+                bufcolumn[bufpos] = column;
+            }
+            throw new IOException();
+        }
+
+        return fullText.charAt(nextCharInd);
+    }
+
+
+    @Override
+    protected void FillBuff() {
+        throw new IllegalStateException("Buffer shouldn't be refilled");
+    }
+
+    private static String toString(Reader dstream) {
+        try (Reader r = dstream) {
+            return IOUtils.toString(r);
+        } catch (IOException e) {
+            throw new RuntimeException(e);
+        }
+    }
+}
--- a/pmd-core/src/main/java/net/sourceforge/pmd/lang/ast/TextAvailableNode.java
+++ b/pmd-core/src/main/java/net/sourceforge/pmd/lang/ast/TextAvailableNode.java
@ -0,0 +1,38 @@
+/*
+ * BSD-style license; for more info see http://pmd.sourceforge.net/license.html
+ */
+
+package net.sourceforge.pmd.lang.ast;
+
+import net.sourceforge.pmd.lang.ast.xpath.NoAttribute;
+
+/**
+ * Refinement of {@link Node} for nodes that can provide the underlying
+ * source text.
+ *
+ * @since 7.0.0
+ */
+public interface TextAvailableNode extends Node {
+
+    /*
+      Note for future: I initially implemented a CharSequence that shares
+      the char array for the full file, which seems advantageous, but tbh
+      is out of scope of the first prototype
+
+      Problem with using strings is that I suspect it can be very easy to
+      create significant memory issues without paying attention...
+
+      See 046958adad for the removal commit
+     */
+
+
+    /**
+     * Returns the original source code underlying this node. In
+     * particular, for a {@link RootNode}, returns the whole text
+     * of the file.
+     */
+    @NoAttribute
+    CharSequence getText();
+
+
+}
--- a/pmd-core/src/main/java/net/sourceforge/pmd/lang/ast/impl/TokenDocument.java
+++ b/pmd-core/src/main/java/net/sourceforge/pmd/lang/ast/impl/TokenDocument.java
@ -0,0 +1,40 @@
+/*
+ * BSD-style license; for more info see http://pmd.sourceforge.net/license.html
+ */
+
+package net.sourceforge.pmd.lang.ast.impl;
+
+import net.sourceforge.pmd.annotation.Experimental;
+import net.sourceforge.pmd.lang.ast.SourceCodePositioner;
+import net.sourceforge.pmd.util.StringUtil;
+
+/**
+ * Maybe this can be used to eg double link tokens, provide an identity
+ * for them, idk.
+ */
+@Experimental
+public class TokenDocument {
+
+    private final String fullText;
+    private final SourceCodePositioner positioner;
+
+    public TokenDocument(String fullText) {
+        this.fullText = fullText;
+        positioner = new SourceCodePositioner(fullText);
+    }
+
+    /** Returns the original text of the file (without escaping). */
+    public String getFullText() {
+        return fullText;
+    }
+
+
+    public int lineNumberFromOffset(int offset) {
+        return positioner.lineNumberFromOffset(offset);
+    }
+
+    public int columnFromOffset(int offsetInclusive) {
+        return StringUtil.columnNumberAt(fullText, offsetInclusive);
+    }
+
+}
--- a/pmd-core/src/main/java/net/sourceforge/pmd/lang/ast/impl/javacc/CharStreamFactory.java
+++ b/pmd-core/src/main/java/net/sourceforge/pmd/lang/ast/impl/javacc/CharStreamFactory.java
@ -0,0 +1,68 @@
+/*
+ * BSD-style license; for more info see http://pmd.sourceforge.net/license.html
+ */
+
+package net.sourceforge.pmd.lang.ast.impl.javacc;
+
+import java.io.IOException;
+import java.io.Reader;
+import java.util.function.Function;
+
+import org.apache.commons.io.IOUtils;
+
+import net.sourceforge.pmd.lang.ast.CharStream;
+import net.sourceforge.pmd.lang.ast.JavaCharStream;
+import net.sourceforge.pmd.lang.ast.SimpleCharStream;
+import net.sourceforge.pmd.lang.ast.impl.TokenDocument;
+
+@SuppressWarnings("PMD.UnusedFormalParameter") // for later
+public final class CharStreamFactory {
+
+    private CharStreamFactory() {
+        // util class
+    }
+
+    /**
+     * A char stream that doesn't perform any escape translation.
+     */
+    public static CharStream simpleCharStream(Reader input) {
+        return simpleCharStream(input, TokenDocument::new);
+    }
+
+    /**
+     * A char stream that doesn't perform any escape translation.
+     */
+    public static CharStream simpleCharStream(Reader input, Function<? super String, ? extends TokenDocument> documentMaker) {
+        return new SimpleCharStream(input);
+    }
+
+    /**
+     * A char stream that translates java unicode sequences.
+     */
+    public static CharStream javaCharStream(Reader input) {
+        return javaCharStream(input, TokenDocument::new);
+    }
+
+    /**
+     * A char stream that translates java unicode sequences.
+     */
+    public static CharStream javaCharStream(Reader input, Function<? super String, ? extends TokenDocument> documentMaker) {
+        String source = toString(input);
+        return new JavaCharStream(source);
+    }
+
+    /**
+     * @deprecated This shouldn't be used. IOExceptions should be handled properly,
+     *     ie it should be expected that creating a parse may throw an IOException,
+     *     in both CPD and PMD
+     */
+    @Deprecated
+    public static String toString(Reader dstream) {
+        try (Reader r = dstream) {
+            return IOUtils.toString(r);
+        } catch (IOException e) {
+            throw new RuntimeException(e);
+        }
+    }
+
+}
--- a/pmd-core/src/main/java/net/sourceforge/pmd/lang/ast/impl/javacc/JavaccToken.java
+++ b/pmd-core/src/main/java/net/sourceforge/pmd/lang/ast/impl/javacc/JavaccToken.java
@ -0,0 +1,155 @@
+/*
+ * BSD-style license; for more info see http://pmd.sourceforge.net/license.html
+ */
+
+package net.sourceforge.pmd.lang.ast.impl.javacc;
+
+import net.sourceforge.pmd.lang.ast.GenericToken;
+import net.sourceforge.pmd.lang.ast.impl.TokenDocument;
+
+/**
+ * A generic token implementation for JavaCC parsers. Will probably help
+ * remove those duplicated implementations that all have the same name.
+ *
+ * <p>Largely has the same interface as the default generated token class.
+ * The main difference is that the position of the token is encoded as
+ * a start and end offset in the source file, instead of a (begin,end)x(line,column)
+ * 4-tuple. This offers two practical advantages:
+ * <ul>
+ * <li>It allows retrieving easily the underlying text of a node (just
+ * need to cut a substring of the file text). Other attributes like lines
+ * and column bounds can be derived as well - though this should not be
+ * done systematically because it's costlier.
+ * <li>It's a bit lighter. Token instances are one of the most numerous
+ * class in a typical PMD run and this may reduce GC pressur.
+ * </ul>
+ *
+ * <p>TODO replace duplicates over PMD.
+ */
+public class JavaccToken implements GenericToken, java.io.Serializable {
+
+    /**
+     * The version identifier for this Serializable class.
+     * Increment only if the <i>serialized</i> form of the
+     * class changes.
+     */
+    private static final long serialVersionUID = 4L;
+
+    /**
+     * An integer that describes the kind of this token.  This numbering
+     * system is determined by JavaCCParser, and a table of these numbers is
+     * stored in the file ...Constants.java.
+     */
+    public final int kind;
+    /**
+     * A reference to the next regular (non-special) token from the input
+     * stream.  If this is the last token from the input stream, or if the
+     * token manager has not read tokens beyond this one, this field is
+     * set to null.  This is true only if this token is also a regular
+     * token.  Otherwise, see below for a description of the contents of
+     * this field.
+     */
+    public JavaccToken next;
+    /**
+     * This field is used to access special tokens that occur prior to this
+     * token, but after the immediately preceding regular (non-special) token.
+     * If there are no such special tokens, this field is set to null.
+     * When there are more than one such special token, this field refers
+     * to the last of these special tokens, which in turn refers to the next
+     * previous special token through its specialToken field, and so on
+     * until the first special token (whose specialToken field is null).
+     * The next fields of special tokens refer to other special tokens that
+     * immediately follow it (without an intervening regular token).  If there
+     * is no such token, this field is null.
+     */
+    public JavaccToken specialToken;
+
+    private final CharSequence image;
+    private final int startInclusive;
+    private final int endExclusive;
+    protected final TokenDocument document;
+
+    /** {@link #undefined()} */
+    private JavaccToken() {
+        this(null);
+    }
+
+    public JavaccToken(String image) {
+        this(-1, image, -1, -1, null);
+    }
+
+    /**
+     * Constructs a new token for the specified Image and Kind.
+     */
+    public JavaccToken(int kind,
+                       CharSequence image,
+                       int startInclusive,
+                       int endExclusive,
+                       TokenDocument document) {
+        this.kind = kind;
+        this.image = image;
+        this.startInclusive = startInclusive;
+        this.endExclusive = endExclusive;
+        this.document = document;
+    }
+
+
+    @Override
+    public GenericToken getNext() {
+        return next;
+    }
+
+    @Override
+    public GenericToken getPreviousComment() {
+        return specialToken;
+    }
+
+    @Override
+    public String getImage() {
+        return image.toString();
+    }
+
+    @Override
+    public int getStartInDocument() {
+        return startInclusive;
+    }
+
+    @Override
+    public int getEndInDocument() {
+        return endExclusive;
+    }
+
+    @Override
+    public int getBeginLine() {
+        return document == null ? -1 : document.lineNumberFromOffset(startInclusive);
+    }
+
+    @Override
+    public int getEndLine() {
+        return document == null ? -1 : document.lineNumberFromOffset(endExclusive - 1);
+    }
+
+    @Override
+    public int getBeginColumn() {
+        return document == null ? -1 : document.columnFromOffset(startInclusive);
+    }
+
+    @Override
+    public int getEndColumn() {
+        return document == null ? -1 : document.columnFromOffset(endExclusive - 1);
+    }
+
+    /**
+     * Returns the image.
+     */
+    @Override
+    public String toString() {
+        return image.toString();
+    }
+
+    public static JavaccToken undefined() {
+        return new JavaccToken();
+    }
+
+}
+
--- a/pmd-core/src/main/java/net/sourceforge/pmd/util/StringUtil.java
+++ b/pmd-core/src/main/java/net/sourceforge/pmd/util/StringUtil.java
@ -30,6 +30,55 @@ public final class StringUtil {
    private StringUtil() {
    }

+    /**
+     * Returns the (1-based) column number of the character at the given index.
+     * Line terminators are by convention taken to be part of the line they end,
+     * and not the new line they start. Each character has width 1 (including {@code \t}).
+     * The method also accepts that the given offset be the length of the
+     * string (in which case there's no targeted character), to get the column
+     * number of a character that would be inserted at the end of the string.
+     *
+     * <pre>
+     *
+     *     columnNumberAt("a\nb", 0)  = 1
+     *     columnNumberAt("a\nb", 1)  = 2
+     *     columnNumberAt("a\nb", 2)  = 1
+     *     columnNumberAt("a\nb", 3)  = 2   // charAt(3) doesn't exist though
+     *     columnNumberAt("a\nb", 4)  = -1
+     *
+     *     columnNumberAt("a\r\n", 2)  = 3
+     *
+     * </pre>
+     *
+     * @param charSeq         Char sequence
+     * @param offsetInclusive Offset in the sequence
+     * @return -1 if the offset is not in {@code [0, length]}, otherwise
+     * the column number
+     */
+    public static int columnNumberAt(CharSequence charSeq, final int offsetInclusive) {
+        if (offsetInclusive == charSeq.length()) {
+            return charSeq.length() == 0 ? 1 : 1 + columnNumberAt(charSeq, offsetInclusive - 1);
+        } else if (offsetInclusive > charSeq.length() || offsetInclusive < 0) {
+            return -1;
+        }
+
+        int col = 0;
+        char next = 0;
+        for (int i = offsetInclusive; i >= 0; i--) {
+            char c = charSeq.charAt(i);
+
+            if (offsetInclusive != i) {
+                if (c == '\n' || c == '\r' && next != '\n') {
+                    return col;
+                }
+            }
+
+            col++;
+            next = c;
+        }
+        return col;
+    }
+
    /**
     * Formats a double to a percentage, keeping {@code numDecimal} decimal places.
     *
--- a/pmd-core/src/test/java/net/sourceforge/pmd/util/StringUtilTest.java
+++ b/pmd-core/src/test/java/net/sourceforge/pmd/util/StringUtilTest.java
@ -15,6 +15,43 @@ public class StringUtilTest {
        assertEquals("faa", StringUtil.replaceString("foo", 'o', "a"));
    }

+    @Test
+    public void testColumnNumber() {
+        assertEquals(-1, StringUtil.columnNumberAt("f\rah\nb", -1));
+        assertEquals(1, StringUtil.columnNumberAt("f\rah\nb", 0));
+        assertEquals(2, StringUtil.columnNumberAt("f\rah\nb", 1));
+        assertEquals(1, StringUtil.columnNumberAt("f\rah\nb", 2));
+        assertEquals(2, StringUtil.columnNumberAt("f\rah\nb", 3));
+        assertEquals(3, StringUtil.columnNumberAt("f\rah\nb", 4));
+        assertEquals(1, StringUtil.columnNumberAt("f\rah\nb", 5));
+        assertEquals(2, StringUtil.columnNumberAt("f\rah\nb", 6));
+        assertEquals(-1, StringUtil.columnNumberAt("f\rah\nb", 7));
+    }
+
+    @Test
+    public void testColumnNumberCrLf() {
+        assertEquals(-1, StringUtil.columnNumberAt("f\r\nb", -1));
+        assertEquals(1, StringUtil.columnNumberAt("f\r\nb", 0));
+        assertEquals(2, StringUtil.columnNumberAt("f\r\nb", 1));
+        assertEquals(3, StringUtil.columnNumberAt("f\r\nb", 2));
+        assertEquals(1, StringUtil.columnNumberAt("f\r\nb", 3));
+        assertEquals(2, StringUtil.columnNumberAt("f\r\nb", 4));
+        assertEquals(-1, StringUtil.columnNumberAt("f\r\nb", 5));
+    }
+
+    @Test
+    public void testColumnNumberTrailing() {
+        assertEquals(1, StringUtil.columnNumberAt("\n", 0));
+        assertEquals(2, StringUtil.columnNumberAt("\n", 1));
+        assertEquals(-1, StringUtil.columnNumberAt("\n", 2));
+    }
+
+    @Test
+    public void testColumnNumberEmpty() {
+        assertEquals(1, StringUtil.columnNumberAt("", 0));
+        assertEquals(-1, StringUtil.columnNumberAt("", 1));
+    }
+
    @Test
    public void testReplaceWithMultipleChars() {
        assertEquals("faaaa", StringUtil.replaceString("foo", 'o', "aa"));
--- a/pmd-java/etc/grammar/Java.jjt
+++ b/pmd-java/etc/grammar/Java.jjt
@ -223,6 +223,8 @@ import java.util.ArrayList;
 import java.util.List;
 import java.util.Map;
 import net.sourceforge.pmd.lang.ast.CharStream;
+import net.sourceforge.pmd.lang.ast.GenericToken;
+import net.sourceforge.pmd.lang.ast.impl.javacc.JavaccToken;
 import net.sourceforge.pmd.lang.ast.TokenMgrError;
 import net.sourceforge.pmd.lang.ast.Node;

@ -453,7 +455,7 @@ class JavaParser {
    return getToken(1).image.equals("assert");
  }

-  private boolean isPrecededByComment(Token tok) {
+  private boolean isPrecededByComment(JavaccToken tok) {
      boolean res = false;
      while (!res && tok.specialToken != null) {
          tok = tok.specialToken;
@ -504,17 +506,21 @@ TOKEN_MGR_DECLS :

 SPECIAL_TOKEN :
 {
-  < HORIZONTAL_WHITESPACE: [" ", "\t", "\f"] >
-| < LINE_TERMINATOR: "\n" | "\r" | "\r\n" >
+ // those are private, just for code organisation
+  < #HORIZONTAL_WHITESPACE: [" ", "\t", "\f"] >
+| < #LINE_TERMINATOR: "\n" | "\r" | "\r\n" >
+  // this one is pushed, notice the (..)+ construct, to avoid
+  // creating one token per character
+| < WHITESPACE: ([" ", "\t", "\f", "\n", "\r"])+ >
 }

 SPECIAL_TOKEN :
 {
 < SINGLE_LINE_COMMENT: "//"(~["\n","\r"])* ("\n"|"\r"|"\r\n")? >
    {
-        int startOfNOPMD = matchedToken.image.indexOf(suppressMarker);
+        int startOfNOPMD = matchedToken.getImage().indexOf(suppressMarker);
        if (startOfNOPMD != -1) {
-            suppressMap.put(matchedToken.beginLine, matchedToken.image.substring(startOfNOPMD + suppressMarker.length()));
+            suppressMap.put(matchedToken.getBeginLine(), matchedToken.getImage().substring(startOfNOPMD + suppressMarker.length()));
        }
        comments.add(new SingleLineComment(matchedToken));
    }
@ -1627,20 +1633,8 @@ TOKEN :
 /* >'s need special attention due to generics syntax. */
 TOKEN :
 {
-  < RUNSIGNEDSHIFT: ">>>" >
-  {
-     matchedToken.kind = GT;
-     ((Token.GTToken)matchedToken).realKind = RUNSIGNEDSHIFT;
-     input_stream.backup(2);
-     matchedToken.image = ">";
-  }
-| < RSIGNEDSHIFT: ">>" >
-  {
-     matchedToken.kind = GT;
-     ((Token.GTToken)matchedToken).realKind = RSIGNEDSHIFT;
-     input_stream.backup(1);
-     matchedToken.image = ">";
-  }
+  < RUNSIGNEDSHIFT: ">>>" > { input_stream.backup(2); }
+| < RSIGNEDSHIFT: ">>" > { input_stream.backup(1); }
 | < GT: ">" >
 }

@ -1667,6 +1661,7 @@ ASTCompilationUnit CompilationUnit() :
  <EOF>
  {
     jjtThis.setComments(token_source.comments);
+     jjtThis.setTokenDocument(((net.sourceforge.pmd.lang.ast.JavaCharStream) token_source.input_stream).getTokenDocument());
     return jjtThis;
  }
 }
@ -2762,19 +2757,15 @@ void AssertStatement() :
 void RUNSIGNEDSHIFT(): // TODO 7.0.0 make #void
 {}
 {
-  ( LOOKAHEAD({ getToken(1).kind == GT &&
-                ((Token.GTToken)getToken(1)).realKind == RUNSIGNEDSHIFT} )
+   LOOKAHEAD({ JavaTokenFactory.getRealKind(getToken(1)) == RUNSIGNEDSHIFT})
   ">" ">" ">"
-  )
 }

 void RSIGNEDSHIFT(): // TODO 7.0.0 make #void
 {}
 {
-  ( LOOKAHEAD({ getToken(1).kind == GT &&
-                ((Token.GTToken)getToken(1)).realKind == RSIGNEDSHIFT} )
+  LOOKAHEAD({ JavaTokenFactory.getRealKind(getToken(1)) == RSIGNEDSHIFT})
  ">" ">"
-  )
 }

 /* Annotation syntax follows. */
--- a/pmd-java/src/main/ant/alljavacc.xml
+++ b/pmd-java/src/main/ant/alljavacc.xml
@ -2,6 +2,7 @@

    <property name="javacc-home.path" value="target/lib" />
    <property name="target-package-dir" value="${target}/net/sourceforge/pmd/lang/java/ast" />
+    <property name="stamp-file" value="${target}/../../last-generated-timestamp"/>

    <!-- Matches the names of deprecated node types to add a @Deprecated annotation -->
    <property name="deprecated-nodes-pattern" value="ASTR(UN)?SIGNEDSHIFT" />
@ -14,6 +15,10 @@
    <property name="generic-sideeffect-visitor-interface-file"
              value="${target-package-dir}/${generic-sideeffect-visitor-interface-name}.java" />

+    <property name="ast-core-package" value="net.sourceforge.pmd.lang.ast" />
+    <property name="ast-impl-package" value="${ast-core-package}.impl.javacc" />
+
+
    <!-- TARGETS -->

    <target name="alljavacc"
@ -21,7 +26,7 @@
            depends="checkUpToDate,init,javajjtree,cleanup" />

    <target name="checkUpToDate">
-        <uptodate property="javaccBuildNotRequired" targetfile="${target}/last-generated-timestamp">
+        <uptodate property="javaccBuildNotRequired" targetfile="${stamp-file}">
            <srcfiles dir="etc/grammar" includes="*.jj*"/>
            <srcfiles file="src/main/ant/alljavacc.xml" />
        </uptodate>
@ -33,7 +38,7 @@
        <copy file="${javacc.jar}" tofile="${javacc-home.path}/javacc.jar" />

        <mkdir dir="${target}"/>
-        <touch file="${target}/last-generated-timestamp"/>
+        <touch file="${stamp-file}"/>
    </target>

    <target name="cleanup">
@ -57,9 +62,120 @@
        <delete file="${target-package-dir}/CharStream.java" />
        <delete file="${target-package-dir}/TokenMgrError.java" />

+
+        <replace file="${target-package-dir}/JJTJavaParserState.java">
+            <replacefilter token="/*" value="/**"/>
+            <replacetoken><![CDATA[  /* Pushes a node on to the stack. */]]></replacetoken>
+            <replacevalue>
+                <![CDATA[
+ /**
+  * Extend the number of children of the current node of one to the left.
+  * If the node is closed, one additional node from the stack will be popped
+  * and added to its children. This allows mimicking "left-recursive" nodes,
+  * while keeping the parsing iterative.
+  *
+  * <p>Note that when the total number of children is definitely known, you
+  * can use "definite nodes", ie write the expected number of children (including
+  * the ones to the left) in the JJTree annotation (eg {@code #AdditiveExpression(2)}).
+  * So this is only useful when the number of children of the current node is not certain.
+  *
+  * <p>This method does not affect the stack unless the current jjtThis is
+  * closed in the future.
+  */
+  public void extendLeft() {
+    mk--;
+  }
+
+  /**
+   * Peek the nth node from the top of the stack.
+   * peekNode(0) == peekNode()
+   */
+  public Node peekNode(int n) {
+    return nodes.get(nodes.size() - n - 1);
+  }
+
+  public boolean isInjectionPending() {
+    return numPendingInjection > 0;
+  }
+
+  /** If non-zero, then the top "n" nodes of the stack will be injected as the first children of the next
+    * node to be opened. This is not very flexible, but it's enough. The grammar needs to take
+    * care of the order in which nodes are opened in a few places, in most cases this just means using
+    * eg A() B() #N(2) instead of (A() B()) #N, so as not to open N before A.
+    */
+  private int numPendingInjection;
+
+  public void injectRight(int n) {
+       numPendingInjection = n;
+  }
+
+  /* Pushes a node on to the stack. */]]>
+            </replacevalue>
+        </replace>
+
+        <replace file="${target-package-dir}/JJTJavaParserState.java">
+<!--  This is in openNodeScope.                                      -->
+<!--  If injection is pending, we bump the arity of the opened node. -->
+<!--  When it's closed, it will enclose the injected node.           -->
+            <replacetoken><![CDATA[mk = sp;]]></replacetoken>
+            <replacevalue><![CDATA[
+    mk = sp;
+    if (isInjectionPending()) {
+        mk -= numPendingInjection;
+        numPendingInjection = 0;
+    }]]>
+            </replacevalue>
+        </replace>
+
+
+        <replace token="new Token()" value="${ast-impl-package}.JavaccToken.undefined()">
+            <fileset dir="${target-package-dir}" />
+        </replace>
+
+        <!-- Map Javacc names to our names -->
+
+        <replaceregexp flags="g">
+            <regexp pattern="\bToken\b" />
+            <substitution expression="${ast-impl-package}.JavaccToken" />
+            <fileset dir="${target-package-dir}" />
+        </replaceregexp>
+
+
+        <replace file="${target-package-dir}/JavaParserTokenManager.java">
+            <replacetoken><![CDATA[t = JavaTokenFactory.newToken(jjmatchedKind, curTokenImage);
+
+   t.beginLine = beginLine;
+   t.endLine = endLine;
+   t.beginColumn = beginColumn;
+   t.endColumn = endColumn;]]></replacetoken>
+            <replacevalue>
+                <![CDATA[t = JavaTokenFactory.newToken(jjmatchedKind, curTokenImage, beginLine, endLine, beginColumn, endColumn);]]></replacevalue>
+        </replace>
+
+        <replaceregexp file="${target-package-dir}/JavaParserTokenManager.java" flags="s">
+            <regexp pattern="protected net.sourceforge.pmd.lang.ast.impl.javacc.JavaccToken jjFillToken.*?}" />
+            <substitution
+                    expression="protected net.sourceforge.pmd.lang.ast.impl.javacc.JavaccToken jjFillToken() {return JavaTokenFactory.newToken(jjmatchedKind, input_stream);}"/>
+        </replaceregexp>
+
+
+        <replace token=".image" value=".getImage()">
+            <fileset dir="${target-package-dir}"/>
+        </replace>
+
+        <replace token=".beginLine" value=".getBeginLine()">
+            <fileset dir="${target-package-dir}"/>
+        </replace>
+
+        <replace token=".beginColumn" value=".getBeginColumn()">
+            <fileset dir="${target-package-dir}"/>
+        </replace>
+
+
+
        <replace file="${target-package-dir}/JavaParserTokenManager.java"
-                 token="class JavaParserTokenManager"
-                 value="class JavaParserTokenManager extends net.sourceforge.pmd.lang.ast.AbstractTokenManager" />
+                 token="public class JavaParserTokenManager"
+                 value="import net.sourceforge.pmd.lang.ast.impl.javacc.JavaccToken; public class JavaParserTokenManager extends net.sourceforge.pmd.lang.ast.AbstractTokenManager" />
        <replace file="${target-package-dir}/JavaParser.java"
                 token="throw new Error"
                 value="throw new RuntimeException" />
@ -77,7 +193,7 @@
            <replacefilter token="JavaParserVisitor" value="${base-visitor-interface-name}" />
            <replacefilter token="SimpleNode" value="JavaNode" />
            <!-- Default methods -->
-            <replacefilter token="public Object" value="default Object" />
+            <replacefilter token="public Object visit(" value="default Object visit(" />
            <replacefilter token=");" value=") { return visit((JavaNode) node, data); }" />
            <replacefilter token="default Object visit(JavaNode node, Object data) { return visit((JavaNode) node, data); }"
                           value="default Object visit(JavaNode node, Object data) { return node.childrenAccept(this, data); }" />
@ -107,81 +223,10 @@
 public class]]></replacevalue>
        </replace>

-        <replace file="${target-package-dir}/Token.java">
-            <replacetoken><![CDATA[      default : return new Token(ofKind, image);
-    }
-  }
-]]></replacetoken>
-            <replacevalue><![CDATA[      case JavaParserConstants.RUNSIGNEDSHIFT :
-      case JavaParserConstants.RSIGNEDSHIFT :
-      case JavaParserConstants.GT:
-        return new GTToken(ofKind, image);
-      default : return new Token(ofKind, image);
-    }
-  }
-
-  public static final class GTToken extends Token {
-    public int realKind = JavaParserConstants.GT;
-    public GTToken(int ofKind, String image) {
-       super(ofKind, image);
-    }
-  }
-]]></replacevalue>
-        </replace>
-
-        <replace file="${target-package-dir}/Token.java">
-            <replacetoken>public class Token implements java.io.Serializable</replacetoken>
-            <replacevalue><![CDATA[import net.sourceforge.pmd.lang.ast.GenericToken;
-
-public class Token implements GenericToken, java.io.Serializable]]></replacevalue>
-        </replace>
-
-        <!--Add implementation methods of GenericToken-->
-        <replace file="${target-package-dir}/Token.java">
-            <replacetoken>public Token specialToken;</replacetoken>
-            <replacevalue><![CDATA[public Token specialToken;
-
-  @Override
-  public GenericToken getNext() {
-    return next;
-  }
-
-  @Override
-  public GenericToken getPreviousComment() {
-    return specialToken;
-  }
-
-  @Override
-  public String getImage() {
-    return image;
-  }
-
-  @Override
-  public int getBeginLine() {
-    return beginLine;
-  }
-
-  @Override
-  public int getEndLine() {
-    return endLine;
-  }
-
-  @Override
-  public int getBeginColumn() {
-    return beginColumn;
-  }
-
-  @Override
-  public int getEndColumn() {
-    return endColumn;
-  }
-
-]]></replacevalue>
-        </replace>
-
        <delete>
            <fileset dir="${target-package-dir}">
                <include name="AST*.java" />
+                <include name="Token.java"/>
            </fileset>
        </delete>
    </target>
--- a/pmd-java/src/main/java/net/sourceforge/pmd/cpd/JavaTokenizer.java
+++ b/pmd-java/src/main/java/net/sourceforge/pmd/cpd/JavaTokenizer.java
@ -15,9 +15,9 @@ import net.sourceforge.pmd.cpd.token.JavaCCTokenFilter;
 import net.sourceforge.pmd.cpd.token.TokenFilter;
 import net.sourceforge.pmd.lang.TokenManager;
 import net.sourceforge.pmd.lang.ast.GenericToken;
+import net.sourceforge.pmd.lang.ast.impl.javacc.JavaccToken;
 import net.sourceforge.pmd.lang.java.JavaTokenManager;
 import net.sourceforge.pmd.lang.java.ast.JavaParserConstants;
-import net.sourceforge.pmd.lang.java.ast.Token;

 public class JavaTokenizer extends JavaCCTokenizer {

@ -56,7 +56,7 @@ public class JavaTokenizer extends JavaCCTokenizer {
    @Override
    protected TokenEntry processToken(Tokens tokenEntries, GenericToken currentToken, String fileName) {
        String image = currentToken.getImage();
-        Token javaToken = (Token) currentToken;
+        JavaccToken javaToken = (JavaccToken) currentToken;

        constructorDetector.restoreConstructorToken(tokenEntries, javaToken);

@ -113,17 +113,18 @@ public class JavaTokenizer extends JavaCCTokenizer {

        @Override
        protected void analyzeToken(final GenericToken currentToken) {
-            detectAnnotations((Token) currentToken);
+            JavaccToken token = (JavaccToken) currentToken;
+            detectAnnotations(token);

-            skipSemicolon((Token) currentToken);
-            skipPackageAndImport((Token) currentToken);
-            skipAnnotationSuppression((Token) currentToken);
+            skipSemicolon(token);
+            skipPackageAndImport(token);
+            skipAnnotationSuppression(token);
            if (ignoreAnnotations) {
                skipAnnotations();
            }
        }

-        private void skipPackageAndImport(final Token currentToken) {
+        private void skipPackageAndImport(final JavaccToken currentToken) {
            if (currentToken.kind == JavaParserConstants.PACKAGE || currentToken.kind == JavaParserConstants.IMPORT) {
                discardingKeywords = true;
            } else if (discardingKeywords && currentToken.kind == JavaParserConstants.SEMICOLON) {
@ -131,22 +132,22 @@ public class JavaTokenizer extends JavaCCTokenizer {
            }
        }

-        private void skipSemicolon(final Token currentToken) {
+        private void skipSemicolon(final JavaccToken currentToken) {
            if (currentToken.kind == JavaParserConstants.SEMICOLON) {
                discardingSemicolon = true;
-            } else if (discardingSemicolon && currentToken.kind != JavaParserConstants.SEMICOLON) {
+            } else if (discardingSemicolon) {
                discardingSemicolon = false;
            }
        }

-        private void skipAnnotationSuppression(final Token currentToken) {
+        private void skipAnnotationSuppression(final JavaccToken currentToken) {
            // if processing an annotation, look for a CPD-START or CPD-END
            if (isAnnotation) {
                if (!discardingSuppressing && currentToken.kind == JavaParserConstants.STRING_LITERAL
-                        && CPD_START.equals(currentToken.image)) {
+                        && CPD_START.equals(currentToken.getImage())) {
                    discardingSuppressing = true;
                } else if (discardingSuppressing && currentToken.kind == JavaParserConstants.STRING_LITERAL
-                        && CPD_END.equals(currentToken.image)) {
+                        && CPD_END.equals(currentToken.getImage())) {
                    discardingSuppressing = false;
                }
            }
@ -166,7 +167,7 @@ public class JavaTokenizer extends JavaCCTokenizer {
                    || discardingSuppressing;
        }

-        private void detectAnnotations(Token currentToken) {
+        private void detectAnnotations(JavaccToken currentToken) {
            if (isAnnotation && nextTokenEndsAnnotation) {
                isAnnotation = false;
                nextTokenEndsAnnotation = false;
@ -211,24 +212,24 @@ public class JavaTokenizer extends JavaCCTokenizer {
            classMembersIndentations = new LinkedList<>();
        }

-        public void processToken(Token currentToken) {
+        public void processToken(JavaccToken currentToken) {
            if (!ignoreIdentifiers) {
                return;
            }

            switch (currentToken.kind) {
            case JavaParserConstants.IDENTIFIER:
-                if ("enum".equals(currentToken.image)) {
+                if ("enum".equals(currentToken.getImage())) {
                    // If declaring an enum, add a new block nesting level at
                    // which constructors may exist
                    pushTypeDeclaration();
                } else if (storeNextIdentifier) {
-                    classMembersIndentations.peek().name = currentToken.image;
+                    classMembersIndentations.peek().name = currentToken.getImage();
                    storeNextIdentifier = false;
                }

                // Store this token
-                prevIdentifier = currentToken.image;
+                prevIdentifier = currentToken.getImage();
                break;

            case JavaParserConstants.CLASS:
@ -271,7 +272,7 @@ public class JavaTokenizer extends JavaCCTokenizer {
            storeNextIdentifier = true;
        }

-        public void restoreConstructorToken(Tokens tokenEntries, Token currentToken) {
+        public void restoreConstructorToken(Tokens tokenEntries, JavaccToken currentToken) {
            if (!ignoreIdentifiers) {
                return;
            }
--- a/pmd-java/src/main/java/net/sourceforge/pmd/lang/java/JavaTokenManager.java
+++ b/pmd-java/src/main/java/net/sourceforge/pmd/lang/java/JavaTokenManager.java
@ -7,7 +7,7 @@ package net.sourceforge.pmd.lang.java;
 import java.io.Reader;

 import net.sourceforge.pmd.lang.TokenManager;
-import net.sourceforge.pmd.lang.ast.JavaCharStream;
+import net.sourceforge.pmd.lang.ast.impl.javacc.CharStreamFactory;
 import net.sourceforge.pmd.lang.java.ast.JavaParserTokenManager;

 /**
@ -17,7 +17,7 @@ public class JavaTokenManager implements TokenManager {
    private final JavaParserTokenManager tokenManager;

    public JavaTokenManager(Reader source) {
-        tokenManager = new JavaParserTokenManager(new JavaCharStream(source));
+        tokenManager = new JavaParserTokenManager(CharStreamFactory.javaCharStream(source));
    }

    @Override
--- a/pmd-java/src/main/java/net/sourceforge/pmd/lang/java/ast/ASTCompilationUnit.java
+++ b/pmd-java/src/main/java/net/sourceforge/pmd/lang/java/ast/ASTCompilationUnit.java
@ -11,6 +11,7 @@ import java.util.Map;
 import net.sourceforge.pmd.annotation.InternalApi;
 import net.sourceforge.pmd.lang.ast.Node;
 import net.sourceforge.pmd.lang.ast.RootNode;
+import net.sourceforge.pmd.lang.ast.impl.TokenDocument;
 import net.sourceforge.pmd.lang.java.typeresolution.ClassTypeResolver;

 // FUTURE Change this class to extend from SimpleJavaNode, as TypeNode is not appropriate (unless I'm wrong)
@ -19,6 +20,7 @@ public class ASTCompilationUnit extends AbstractJavaTypeNode implements RootNode
    private ClassTypeResolver classTypeResolver;
    private List<Comment> comments;
    private Map<Integer, String> noPmdComments = Collections.emptyMap();
+    private TokenDocument tokenDocument;

    @InternalApi
    @Deprecated
@ -42,6 +44,16 @@ public class ASTCompilationUnit extends AbstractJavaTypeNode implements RootNode
        this.comments = comments;
    }

+    @Override
+    public CharSequence getText() {
+        return tokenDocument.getFullText();
+    }
+
+
+    void setTokenDocument(TokenDocument document) {
+        this.tokenDocument = document;
+    }
+
    @Override
    public Object jjtAccept(JavaParserVisitor visitor, Object data) {
        return visitor.visit(this, data);
@ -86,6 +98,12 @@ public class ASTCompilationUnit extends AbstractJavaTypeNode implements RootNode
        return classTypeResolver;
    }

+
+    @Override
+    public ASTCompilationUnit getRoot() {
+        return this;
+    }
+
    @InternalApi
    @Deprecated
    public void setClassTypeResolver(ClassTypeResolver classTypeResolver) {
--- a/pmd-java/src/main/java/net/sourceforge/pmd/lang/java/ast/ASTSwitchExpression.java
+++ b/pmd-java/src/main/java/net/sourceforge/pmd/lang/java/ast/ASTSwitchExpression.java
@ -24,4 +24,10 @@ public class ASTSwitchExpression extends AbstractJavaTypeNode {
    public Object jjtAccept(JavaParserVisitor visitor, Object data) {
        return visitor.visit(this, data);
    }
+
+    @Override
+    public <T> void jjtAccept(SideEffectingVisitor<T> visitor, T data) {
+        visitor.visit(this, data);
+    }
+
 }
--- a/pmd-java/src/main/java/net/sourceforge/pmd/lang/java/ast/ASTSwitchLabeledBlock.java
+++ b/pmd-java/src/main/java/net/sourceforge/pmd/lang/java/ast/ASTSwitchLabeledBlock.java
@ -26,6 +26,11 @@ public class ASTSwitchLabeledBlock extends AbstractJavaNode implements ASTSwitch
        return visitor.visit(this, data);
    }

+    @Override
+    public <T> void jjtAccept(SideEffectingVisitor<T> visitor, T data) {
+        visitor.visit(this, data);
+    }
+
    @Override
    public void jjtClose() {
        super.jjtClose();
--- a/pmd-java/src/main/java/net/sourceforge/pmd/lang/java/ast/ASTSwitchLabeledExpression.java
+++ b/pmd-java/src/main/java/net/sourceforge/pmd/lang/java/ast/ASTSwitchLabeledExpression.java
@ -26,6 +26,11 @@ public class ASTSwitchLabeledExpression extends AbstractJavaNode implements ASTS
        return visitor.visit(this, data);
    }

+    @Override
+    public <T> void jjtAccept(SideEffectingVisitor<T> visitor, T data) {
+        visitor.visit(this, data);
+    }
+
    @Override
    public void jjtClose() {
        super.jjtClose();
--- a/pmd-java/src/main/java/net/sourceforge/pmd/lang/java/ast/ASTSwitchLabeledThrowStatement.java
+++ b/pmd-java/src/main/java/net/sourceforge/pmd/lang/java/ast/ASTSwitchLabeledThrowStatement.java
@ -26,6 +26,11 @@ public class ASTSwitchLabeledThrowStatement extends AbstractJavaNode implements
        return visitor.visit(this, data);
    }

+    @Override
+    public <T> void jjtAccept(SideEffectingVisitor<T> visitor, T data) {
+        visitor.visit(this, data);
+    }
+
    @Override
    public void jjtClose() {
        super.jjtClose();
--- a/pmd-java/src/main/java/net/sourceforge/pmd/lang/java/ast/ASTYieldStatement.java
+++ b/pmd-java/src/main/java/net/sourceforge/pmd/lang/java/ast/ASTYieldStatement.java
@ -21,6 +21,11 @@ public class ASTYieldStatement extends AbstractJavaTypeNode {
        return visitor.visit(this, data);
    }

+    @Override
+    public <T> void jjtAccept(SideEffectingVisitor<T> visitor, T data) {
+        visitor.visit(this, data);
+    }
+
    @Override
    public String getImage() {
        String result = super.getImage();
--- a/pmd-java/src/main/java/net/sourceforge/pmd/lang/java/ast/AbstractJavaNode.java
+++ b/pmd-java/src/main/java/net/sourceforge/pmd/lang/java/ast/AbstractJavaNode.java
@ -4,9 +4,12 @@

 package net.sourceforge.pmd.lang.java.ast;

+import org.checkerframework.checker.nullness.qual.NonNull;
+
 import net.sourceforge.pmd.annotation.InternalApi;
 import net.sourceforge.pmd.lang.ast.AbstractNode;
 import net.sourceforge.pmd.lang.ast.Node;
+import net.sourceforge.pmd.lang.ast.impl.javacc.JavaccToken;
 import net.sourceforge.pmd.lang.symboltable.Scope;

@Deprecated
@ -16,6 +19,8 @@ public abstract class AbstractJavaNode extends AbstractNode implements JavaNode
    protected JavaParser parser;
    private Scope scope;
    private Comment comment;
+    private ASTCompilationUnit root;
+    private CharSequence text;

    @InternalApi
    @Deprecated
@ -31,34 +36,34 @@ public abstract class AbstractJavaNode extends AbstractNode implements JavaNode
    }

    @Override
-    public void jjtOpen() {
-        if (beginLine == -1 && parser.token.next != null) {
-            beginLine = parser.token.next.beginLine;
-            beginColumn = parser.token.next.beginColumn;
-        }
+    public int getBeginLine() {
+        return jjtGetFirstToken().getBeginLine();
    }

    @Override
-    public void jjtClose() {
-        if (beginLine == -1 && children.length == 0) {
-            beginColumn = parser.token.beginColumn;
-        }
-        if (beginLine == -1) {
-            beginLine = parser.token.beginLine;
-        }
-        endLine = parser.token.endLine;
-        endColumn = parser.token.endColumn;
+    public int getBeginColumn() {
+        return jjtGetFirstToken().getBeginColumn();
    }

    @Override
-    public Object jjtAccept(JavaParserVisitor visitor, Object data) {
-        return visitor.visit(this, data);
+    public int getEndLine() {
+        return jjtGetLastToken().getEndLine();
+    }
+
+    @Override
+    public int getEndColumn() {
+        return jjtGetLastToken().getEndColumn();
    }


    @Override
-    public <T> void jjtAccept(SideEffectingVisitor<T> visitor, T data) {
-        visitor.visit(this, data);
+    public JavaNode jjtGetParent() {
+        return (JavaNode) super.jjtGetParent();
+    }
+
+    @Override
+    public JavaNode jjtGetChild(int index) {
+        return (JavaNode) super.jjtGetChild(index);
    }


@ -92,6 +97,14 @@ public abstract class AbstractJavaNode extends AbstractNode implements JavaNode
        return scope;
    }

+    @Override
+    public CharSequence getText() {
+        if (text == null) {
+            text = getRoot().getText().subSequence(getStartOffset(), getEndOffset());
+        }
+        return text;
+    }
+
    @InternalApi
    @Deprecated
    @Override
@ -109,9 +122,48 @@ public abstract class AbstractJavaNode extends AbstractNode implements JavaNode
        return comment;
    }

+    @Override
+    @NonNull
+    public ASTCompilationUnit getRoot() {
+        // storing a reference on each node ensures that each path is roamed
+        // at most once.
+        if (root == null) {
+            root = jjtGetParent().getRoot();
+        }
+        return root;
+    }

    @Override
-    public final String getXPathNodeName() {
+    public JavaccToken jjtGetFirstToken() {
+        return (JavaccToken) firstToken;
+    }
+
+    @Override
+    public JavaccToken jjtGetLastToken() {
+        return (JavaccToken) lastToken;
+    }
+
+    @Override
+    public String getXPathNodeName() {
        return JavaParserTreeConstants.jjtNodeName[id];
    }
+
+
+    /**
+     * The toString of Java nodes is only meant for debugging purposes
+     * as it's pretty expensive.
+     */
+    @Override
+    public String toString() {
+        return "|" + getXPathNodeName() + "|" + getStartOffset() + "," + getEndOffset() + "|" + getText();
+    }
+
+    private int getStartOffset() {
+        return this.jjtGetFirstToken().getStartInDocument();
+    }
+
+
+    private int getEndOffset() {
+        return this.jjtGetLastToken().getEndInDocument();
+    }
 }
--- a/pmd-java/src/main/java/net/sourceforge/pmd/lang/java/ast/Comment.java
+++ b/pmd-java/src/main/java/net/sourceforge/pmd/lang/java/ast/Comment.java
@ -14,6 +14,7 @@ import org.apache.commons.lang3.StringUtils;

 import net.sourceforge.pmd.PMD;
 import net.sourceforge.pmd.lang.ast.AbstractNode;
+import net.sourceforge.pmd.lang.ast.impl.javacc.JavaccToken;

 public abstract class Comment extends AbstractNode {
    // single regex, that captures: the start of a multi-line comment (/**|/*), the start of a single line comment (//)
@ -23,10 +24,12 @@ public abstract class Comment extends AbstractNode {
    // Same as "\\R" - but \\R is only available with java8+
    static final Pattern NEWLINES_PATTERN = Pattern.compile("\\u000D\\u000A|[\\u000A\\u000B\\u000C\\u000D\\u0085\\u2028\\u2029]");

-    protected Comment(Token t) {
-        super(-1, t.beginLine, t.endLine, t.beginColumn, t.endColumn);
+    protected Comment(JavaccToken t) {
+        super(-1);

-        setImage(t.image);
+        setImage(t.getImage());
+        jjtSetFirstToken(t);
+        jjtSetLastToken(t);
    }

    @Override
@ -34,6 +37,7 @@ public abstract class Comment extends AbstractNode {
        return getImage();
    }

+
    /**
     * Filters the comment by removing the leading comment marker (like {@code *}) of each line
     * as well as the start markers ({@code //}, {@code /*} or {@code /**}
--- a/pmd-java/src/main/java/net/sourceforge/pmd/lang/java/ast/CommentUtil.java
+++ b/pmd-java/src/main/java/net/sourceforge/pmd/lang/java/ast/CommentUtil.java
@ -13,6 +13,7 @@ import java.util.regex.Pattern;

 import org.apache.commons.lang3.StringUtils;

+import net.sourceforge.pmd.lang.ast.impl.javacc.JavaccToken;
 import net.sourceforge.pmd.lang.java.javadoc.JavadocTag;

 /**
@ -147,8 +148,7 @@ public final class CommentUtil {
    @Deprecated // will be removed with PMD 7.0.0
    public static List<String> multiLinesIn(String comment) {
        // temporary createa a Multiline Comment Node
-        Token t = new Token();
-        t.image = comment;
+        JavaccToken t = new JavaccToken(comment);
        MultiLineComment node = new MultiLineComment(t);
        return Arrays.asList(Comment.NEWLINES_PATTERN.split(node.getFilteredComment()));
    }
--- a/pmd-java/src/main/java/net/sourceforge/pmd/lang/java/ast/DummyJavaNode.java
+++ b/pmd-java/src/main/java/net/sourceforge/pmd/lang/java/ast/DummyJavaNode.java
@ -25,4 +25,14 @@ public class DummyJavaNode extends AbstractJavaNode {
    public DummyJavaNode(JavaParser parser, int id) {
        super(parser, id);
    }
+
+    @Override
+    public Object jjtAccept(JavaParserVisitor visitor, Object data) {
+        return data;
+    }
+
+    @Override
+    public <T> void jjtAccept(SideEffectingVisitor<T> visitor, T data) {
+        // do nothing
+    }
 }
--- a/pmd-java/src/main/java/net/sourceforge/pmd/lang/java/ast/FormalComment.java
+++ b/pmd-java/src/main/java/net/sourceforge/pmd/lang/java/ast/FormalComment.java
@ -10,13 +10,14 @@ import java.util.regex.Matcher;
 import java.util.regex.Pattern;

 import net.sourceforge.pmd.lang.ast.Node;
+import net.sourceforge.pmd.lang.ast.impl.javacc.JavaccToken;
 import net.sourceforge.pmd.lang.java.javadoc.JavadocTag;

 public class FormalComment extends Comment {

    private static final Pattern JAVADOC_TAG = Pattern.compile("@([A-Za-z0-9]+)");

-    public FormalComment(Token t) {
+    public FormalComment(JavaccToken t) {
        super(t);

        findJavadocs();
--- a/pmd-java/src/main/java/net/sourceforge/pmd/lang/java/ast/InternalApiBridge.java
+++ b/pmd-java/src/main/java/net/sourceforge/pmd/lang/java/ast/InternalApiBridge.java
@ -9,7 +9,7 @@ import java.io.Reader;
 import net.sourceforge.pmd.annotation.InternalApi;
 import net.sourceforge.pmd.lang.ParserOptions;
 import net.sourceforge.pmd.lang.ast.AbstractTokenManager;
-import net.sourceforge.pmd.lang.ast.JavaCharStream;
+import net.sourceforge.pmd.lang.ast.impl.javacc.CharStreamFactory;

 /**
 * Acts as a bridge between outer parts of PMD and the restricted access
@ -29,7 +29,7 @@ public final class InternalApiBridge {
    }

    public static ASTCompilationUnit parseInternal(String fileName, Reader source, int jdkVersion, boolean preview, ParserOptions options) {
-        JavaParser parser = new JavaParser(new JavaCharStream(source));
+        JavaParser parser = new JavaParser(CharStreamFactory.javaCharStream(source));
        String suppressMarker = options.getSuppressMarker();
        if (suppressMarker != null) {
            parser.setSuppressMarker(suppressMarker);
--- a/pmd-java/src/main/java/net/sourceforge/pmd/lang/java/ast/JavaNode.java
+++ b/pmd-java/src/main/java/net/sourceforge/pmd/lang/java/ast/JavaNode.java
@ -5,7 +5,11 @@
 package net.sourceforge.pmd.lang.java.ast;


+import org.checkerframework.checker.nullness.qual.NonNull;
+
 import net.sourceforge.pmd.annotation.InternalApi;
+import net.sourceforge.pmd.lang.ast.TextAvailableNode;
+import net.sourceforge.pmd.lang.ast.impl.javacc.JavaccToken;
 import net.sourceforge.pmd.lang.symboltable.Scope;
 import net.sourceforge.pmd.lang.symboltable.ScopedNode;

@ -13,7 +17,7 @@ import net.sourceforge.pmd.lang.symboltable.ScopedNode;
 /**
 * Root interface for all Nodes of the Java AST.
 */
-public interface JavaNode extends ScopedNode {
+public interface JavaNode extends ScopedNode, TextAvailableNode {

    /**
     * Calls back the visitor's visit method corresponding to the runtime type of this Node.
@ -65,4 +69,22 @@ public interface JavaNode extends ScopedNode {
    @Deprecated
    void setScope(Scope scope);

+
+    @Override
+    JavaNode jjtGetChild(int index);
+
+
+    @Override
+    JavaNode jjtGetParent();
+
+
+    JavaccToken jjtGetFirstToken();
+
+
+    JavaccToken jjtGetLastToken();
+
+
+    @Override
+    @NonNull ASTCompilationUnit getRoot();
+
 }
--- a/pmd-java/src/main/java/net/sourceforge/pmd/lang/java/ast/JavaTokenFactory.java
+++ b/pmd-java/src/main/java/net/sourceforge/pmd/lang/java/ast/JavaTokenFactory.java
@ -0,0 +1,94 @@
+/*
+ * BSD-style license; for more info see http://pmd.sourceforge.net/license.html
+ */
+
+package net.sourceforge.pmd.lang.java.ast;
+
+import net.sourceforge.pmd.lang.ast.CharStream;
+import net.sourceforge.pmd.lang.ast.JavaCharStream;
+import net.sourceforge.pmd.lang.ast.impl.TokenDocument;
+import net.sourceforge.pmd.lang.ast.impl.javacc.JavaccToken;
+
+/**
+ * Support methods for the token manager. The call to {@link #newToken(int, CharStream)}
+ * is hacked in via search/replace on {@link JavaParserTokenManager}.
+ */
+final class JavaTokenFactory {
+
+    private JavaTokenFactory() {
+
+    }
+
+    static JavaccToken newToken(int kind, CharStream charStream) {
+        JavaCharStream jcs = (JavaCharStream) charStream;
+
+        switch (kind) {
+        case JavaParserConstants.RUNSIGNEDSHIFT:
+        case JavaParserConstants.RSIGNEDSHIFT:
+        case JavaParserConstants.GT:
+            return new GTToken(
+                JavaParserConstants.GT,
+                kind,
+                ">",
+                jcs.getStartOffset(),
+                jcs.getEndOffset(),
+                jcs.getTokenDocument()
+            );
+
+        case JavaParserConstants.WHITESPACE:
+            // We don't create a new string for the image of whitespace tokens eagerly
+
+            // It's unlikely that anybody cares about that, and since
+            // they're still 30% of all tokens this is advantageous
+            return new LazyImageToken(
+                kind,
+                jcs.getStartOffset(),
+                jcs.getEndOffset(),
+                jcs.getTokenDocument()
+            );
+
+        default:
+            // Most tokens have an entry in there, it's used to share the
+            // image string for keywords & punctuation. Those represent ~40%
+            // of token instances
+            String image = JavaParserTokenManager.jjstrLiteralImages[kind];
+
+            return new JavaccToken(
+                kind,
+                image == null ? charStream.GetImage() : image,
+                jcs.getStartOffset(),
+                jcs.getEndOffset(),
+                jcs.getTokenDocument()
+            );
+        }
+    }
+
+    static int getRealKind(JavaccToken token) {
+        return token instanceof GTToken ? ((GTToken) token).realKind : token.kind;
+    }
+
+    private static final class LazyImageToken extends JavaccToken {
+
+        LazyImageToken(int kind, int startInclusive, int endExclusive, TokenDocument document) {
+            super(kind, null, startInclusive, endExclusive, document);
+        }
+
+        @Override
+        public String getImage() {
+            return document.getFullText().substring(getStartInDocument(), getEndInDocument());
+        }
+    }
+
+    private static final class GTToken extends JavaccToken {
+
+        final int realKind;
+
+        GTToken(int kind, int realKind, CharSequence image, int startOffset, int endOffset, TokenDocument doc) {
+            super(kind, image, startOffset, endOffset, doc);
+            this.realKind = realKind;
+        }
+
+    }
+
+
+}
--- a/pmd-java/src/main/java/net/sourceforge/pmd/lang/java/ast/MultiLineComment.java
+++ b/pmd-java/src/main/java/net/sourceforge/pmd/lang/java/ast/MultiLineComment.java
@ -4,9 +4,11 @@

 package net.sourceforge.pmd.lang.java.ast;

+import net.sourceforge.pmd.lang.ast.impl.javacc.JavaccToken;
+
 public class MultiLineComment extends Comment {

-    public MultiLineComment(Token t) {
+    public MultiLineComment(JavaccToken t) {
        super(t);
    }

--- a/pmd-java/src/main/java/net/sourceforge/pmd/lang/java/ast/SingleLineComment.java
+++ b/pmd-java/src/main/java/net/sourceforge/pmd/lang/java/ast/SingleLineComment.java
@ -4,9 +4,11 @@

 package net.sourceforge.pmd.lang.java.ast;

+import net.sourceforge.pmd.lang.ast.impl.javacc.JavaccToken;
+
 public class SingleLineComment extends Comment {

-    public SingleLineComment(Token t) {
+    public SingleLineComment(JavaccToken t) {
        super(t);
    }

--- a/Show More
+++ b/Show More