From b3860a3d5795ac4ac33ce14ba76d87a06e6e4013 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Fournier?= Date: Tue, 6 Aug 2019 09:42:48 +0200 Subject: [PATCH] Compute image of whitespace tokens lazily Nobody cares and they make up a significant part of token sequences. Alternatively we could intern them which would most likely yield near 100% cache hit, because of indentation patterns. --- .../pmd/lang/ast/impl/JavaccToken.java | 2 +- .../pmd/lang/java/ast/ASTCompilationUnit.java | 5 ---- .../pmd/lang/java/ast/JavaNode.java | 1 + .../pmd/lang/java/ast/JavaTokenUtils.java | 24 +++++++++++++++++++ 4 files changed, 26 insertions(+), 6 deletions(-) diff --git a/pmd-core/src/main/java/net/sourceforge/pmd/lang/ast/impl/JavaccToken.java b/pmd-core/src/main/java/net/sourceforge/pmd/lang/ast/impl/JavaccToken.java index 1b2197c3dc..ac407b6eee 100644 --- a/pmd-core/src/main/java/net/sourceforge/pmd/lang/ast/impl/JavaccToken.java +++ b/pmd-core/src/main/java/net/sourceforge/pmd/lang/ast/impl/JavaccToken.java @@ -67,7 +67,7 @@ public class JavaccToken implements GenericToken, java.io.Serializable { private final CharSequence image; private final int startInclusive; private final int endExclusive; - private final TokenDocument document; + protected final TokenDocument document; /** {@link #undefined()} */ private JavaccToken() { diff --git a/pmd-java/src/main/java/net/sourceforge/pmd/lang/java/ast/ASTCompilationUnit.java b/pmd-java/src/main/java/net/sourceforge/pmd/lang/java/ast/ASTCompilationUnit.java index 078e35ab3b..b12f7c9e62 100644 --- a/pmd-java/src/main/java/net/sourceforge/pmd/lang/java/ast/ASTCompilationUnit.java +++ b/pmd-java/src/main/java/net/sourceforge/pmd/lang/java/ast/ASTCompilationUnit.java @@ -104,11 +104,6 @@ public final class ASTCompilationUnit extends AbstractJavaTypeNode implements Ro return classTypeResolver; } - @Override - public ASTCompilationUnit getRoot() { - return this; - } - @InternalApi @Deprecated public void setClassTypeResolver(ClassTypeResolver classTypeResolver) { diff --git a/pmd-java/src/main/java/net/sourceforge/pmd/lang/java/ast/JavaNode.java b/pmd-java/src/main/java/net/sourceforge/pmd/lang/java/ast/JavaNode.java index 4e488027f7..cfc8bbad63 100644 --- a/pmd-java/src/main/java/net/sourceforge/pmd/lang/java/ast/JavaNode.java +++ b/pmd-java/src/main/java/net/sourceforge/pmd/lang/java/ast/JavaNode.java @@ -85,6 +85,7 @@ public interface JavaNode extends ScopedNode, TextAvailableNode { @Override JavaNode jjtGetParent(); + GenericToken jjtGetFirstToken(); diff --git a/pmd-java/src/main/java/net/sourceforge/pmd/lang/java/ast/JavaTokenUtils.java b/pmd-java/src/main/java/net/sourceforge/pmd/lang/java/ast/JavaTokenUtils.java index 1e67b3093a..6ac09c6d35 100644 --- a/pmd-java/src/main/java/net/sourceforge/pmd/lang/java/ast/JavaTokenUtils.java +++ b/pmd-java/src/main/java/net/sourceforge/pmd/lang/java/ast/JavaTokenUtils.java @@ -32,6 +32,18 @@ final class JavaTokenUtils { jcs.getEndOffset(), jcs.getTokenDocument() ); + case JavaParserConstants.WHITESPACE: + // unlikely that anybody cares about that, and since + // they're still 30% of all tokens best make this assumption + + // btw 40% of all tokens have a compile-time string constant + // as image (jjstrLiteralImages) so they're shared. + return new LazyImageToken( + kind, + jcs.getStartOffset(), + jcs.getEndOffset(), + jcs.getTokenDocument() + ); default: return new JavaccToken( kind, @@ -47,6 +59,18 @@ final class JavaTokenUtils { return token instanceof GTToken ? ((GTToken) token).realKind : token.kind; } + private static final class LazyImageToken extends JavaccToken { + + public LazyImageToken(int kind, int startInclusive, int endExclusive, TokenDocument document) { + super(kind, null, startInclusive, endExclusive, document); + } + + @Override + public String getImage() { + return document.getFullText().substring(getStartInDocument(), getEndInDocument()); + } + } + private static final class GTToken extends JavaccToken { final int realKind;