Refactor JavaCharStream to buffer whole file in memory, provide access to underlying text for all java nodes

This commit is contained in:
Clément Fournier
2019-06-08 22:05:10 +02:00
parent 1e1a13da42
commit a70e70ad15
23 changed files with 617 additions and 163 deletions

View File

@ -56,13 +56,23 @@
javacchome="${javacc-home.path}" />
<replace file="${target}/net/sourceforge/pmd/lang/ast/dummy/JavaCharStream.java"
token="net.sourceforge.pmd.lang.ast.dummy"
value="net.sourceforge.pmd.lang.ast" />
value="net.sourceforge.pmd.lang.ast.impl" />
<replace file="${target}/net/sourceforge/pmd/lang/ast/dummy/JavaCharStream.java"
token="class JavaCharStream"
value="class JavaCharStream implements CharStream" />
<move overwrite="true"
token="JavaCharStream"
value="JavaCharStreamBase"/>
<replace file="${target}/net/sourceforge/pmd/lang/ast/dummy/JavaCharStream.java"
token="class JavaCharStreamBase"
value="abstract class JavaCharStreamBase implements net.sourceforge.pmd.lang.ast.CharStream" />
<replace file="${target}/net/sourceforge/pmd/lang/ast/dummy/JavaCharStream.java"
token="char c;"
value="char c; beforeReadChar();" />
<replace file="${target}/net/sourceforge/pmd/lang/ast/dummy/JavaCharStream.java"
token="/** Read a character. */"
value="protected void beforeReadChar() { }" />
<move overwrite="true"
file="${target}/net/sourceforge/pmd/lang/ast/dummy/JavaCharStream.java"
tofile="${target}/net/sourceforge/pmd/lang/ast/JavaCharStream.java" />
tofile="${target}/net/sourceforge/pmd/lang/ast/impl/JavaCharStreamBase.java" />
<!-- Generate ASCII w/o Unicode Escapes CharStream implementation -->
<javacc usercharstream="false"

View File

@ -34,6 +34,16 @@ public class AntlrToken implements GenericToken {
return null;
}
@Override
public int getStartDocumentOffset() {
return token.getStartIndex();
}
@Override
public int getEndDocumentOffset() {
return token.getStopIndex();
}
@Override
public GenericToken getPreviousComment() {
return previousComment;

View File

@ -28,6 +28,13 @@ public interface GenericToken {
*/
String getImage();
// TODO implement everywhere by generalizing the use of JavaccToken
int getStartDocumentOffset();
int getEndDocumentOffset();
/**
* Gets the line where the token's region begins
* @return a non-negative integer containing the begin line

View File

@ -0,0 +1,114 @@
/*
* BSD-style license; for more info see http://pmd.sourceforge.net/license.html
*/
package net.sourceforge.pmd.lang.ast.impl;
import java.io.IOException;
import java.io.Reader;
import java.io.StringReader;
import org.apache.commons.io.IOUtils;
import net.sourceforge.pmd.lang.ast.internal.SharingCharSeq;
/**
* This stream buffers the whole file in memory before parsing,
* and shares the char array between all tokens.
*
* @author Clément Fournier
*/
public class JavaCharStream extends JavaCharStreamBase {
// full text with nothing escaped and all
private final SharingCharSeq seq;
private int[] startOffsets;
private JavaCharStream(String fulltext) {
super(new StringReader(fulltext));
this.seq = new SharingCharSeq(fulltext);
this.startOffsets = new int[bufsize];
maxNextCharInd = seq.length();
nextCharBuf = null; // the char buf is emulated by the TokenisedCharseq and isn't needed
}
protected void ExpandBuff(boolean wrapAround) {
int[] newStartOffsets = new int[bufsize + 2048];
if (wrapAround) {
System.arraycopy(startOffsets, tokenBegin, newStartOffsets, 0, bufsize - tokenBegin);
System.arraycopy(startOffsets, 0, newStartOffsets, bufsize - tokenBegin, bufpos);
startOffsets = newStartOffsets;
} else {
System.arraycopy(startOffsets, tokenBegin, newStartOffsets, 0, bufsize - tokenBegin);
startOffsets = newStartOffsets;
}
super.ExpandBuff(wrapAround);
}
@Override
protected void beforeReadChar() {
startOffsets[bufpos + 1] = nextCharInd + 1;
}
public int getStartOffset() {
return startOffsets[tokenBegin];
}
public int getEndOffset() {
if (bufpos >= startOffsets.length) {
return seq.length();
} else {
return startOffsets[bufpos] + 1;
}
}
public SharingCharSeq getFullText() {
return seq;
}
@Override
protected char ReadByte() throws IOException {
++nextCharInd;
if (nextCharInd >= seq.length()) {
if (bufpos != 0) {
--bufpos;
backup(0);
} else {
bufline[bufpos] = line;
bufcolumn[bufpos] = column;
}
throw new IOException();
}
return seq.charAt(nextCharInd);
}
@Override
public String GetImage() {
// TODO GetImage should return a CharSequence based on the current shared sequence
return super.GetImage();
}
@Override
protected void FillBuff() {
throw new IllegalStateException("Buffer shouldn't be refilled");
}
public static JavaCharStream createStream(Reader dstream) {
String fullText;
try {
fullText = IOUtils.toString(dstream);
dstream.close();
} catch (IOException e) {
throw new RuntimeException(e);
}
return new JavaCharStream(fullText);
}
}

View File

@ -0,0 +1,168 @@
/*
* BSD-style license; for more info see http://pmd.sourceforge.net/license.html
*/
/*
* BSD-style license; for more info see http://pmd.sourceforge.net/license.html
*/
/*
* BSD-style license; for more info see http://pmd.sourceforge.net/license.html
*/
package net.sourceforge.pmd.lang.ast.impl;
import net.sourceforge.pmd.lang.ast.GenericToken;
/**
* @author Clément Fournier
*/
public class JavaccToken implements GenericToken, java.io.Serializable {
/**
* The version identifier for this Serializable class.
* Increment only if the <i>serialized</i> form of the
* class changes.
*/
private static final long serialVersionUID = 1L;
/**
* An integer that describes the kind of this token. This numbering
* system is determined by JavaCCParser, and a table of these numbers is
* stored in the file ...Constants.java.
*/
public int kind;
/**
* A reference to the next regular (non-special) token from the input
* stream. If this is the last token from the input stream, or if the
* token manager has not read tokens beyond this one, this field is
* set to null. This is true only if this token is also a regular
* token. Otherwise, see below for a description of the contents of
* this field.
*/
public JavaccToken next;
/**
* This field is used to access special tokens that occur prior to this
* token, but after the immediately preceding regular (non-special) token.
* If there are no such special tokens, this field is set to null.
* When there are more than one such special token, this field refers
* to the last of these special tokens, which in turn refers to the next
* previous special token through its specialToken field, and so on
* until the first special token (whose specialToken field is null).
* The next fields of special tokens refer to other special tokens that
* immediately follow it (without an intervening regular token). If there
* is no such token, this field is null.
*/
public JavaccToken specialToken;
/**
* The string image of the token.
*
* @deprecated Make me private, currently public for some tests that create tokens themselves
*/
@Deprecated
public CharSequence image;
private int startOffset;
private int endOffset;
/** The line number of the first character of this Token. */
private int beginLine;
/** The column number of the first character of this Token. */
private int beginColumn;
/** The line number of the last character of this Token. */
private int endLine;
/** The column number of the last character of this Token. */
private int endColumn;
/**
* No-argument constructor
*/
public JavaccToken() {}
/**
* Constructs a new token for the specified Image and Kind.
*/
public JavaccToken(int kind,
CharSequence image,
int startOffset,
int endOffset,
int beginColumn,
int endColumn,
int beginLine,
int endLine) {
this.kind = kind;
this.image = image;
this.startOffset = startOffset;
this.endOffset = endOffset;
this.beginColumn = beginColumn;
this.endColumn = endColumn;
this.beginLine = beginLine;
this.endLine = endLine;
}
@Override
public GenericToken getNext() {
return next;
}
@Override
public GenericToken getPreviousComment() {
return specialToken;
}
@Override
public String getImage() {
return image.toString();
}
@Override
public int getStartDocumentOffset() {
return startOffset;
}
@Override
public int getEndDocumentOffset() {
return endOffset;
}
@Override
public int getBeginLine() {
return beginLine;
}
@Override
public int getEndLine() {
return endLine;
}
@Override
public int getBeginColumn() {
return beginColumn;
}
@Override
public int getEndColumn() {
return endColumn;
}
/**
* An optional attribute value of the Token.
* Tokens which are not used as syntactic sugar will often contain
* meaningful values that will be used later on by the compiler or
* interpreter. This attribute value is often different from the image.
* Any subclass of Token that actually wants to return a non-null value can
* override this method as appropriate.
*/
public Object getValue() {
return null;
}
/**
* Returns the image.
*/
public String toString() {
return image.toString();
}
}

View File

@ -0,0 +1,85 @@
/*
* BSD-style license; for more info see http://pmd.sourceforge.net/license.html
*/
package net.sourceforge.pmd.lang.ast.internal;
import java.util.Objects;
import org.checkerframework.checker.nullness.qual.NonNull;
/**
* A {@link CharSequence} implemented with the (value, offset, count) representation,
* meaning it shares the underlying char array to allocate sub-sequences.
* This is advantageous to represent a tokenized file, as we share the
* char array for the whole file and only change the bounds for each token.
*
* @author Clément Fournier
*/
public final class SharingCharSeq implements CharSequence {
private final char[] value;
private final int offset;
private final int count;
private String myStr = null;
public SharingCharSeq(String str) {
this(str.toCharArray(), 0, str.length());
myStr = str;
}
public SharingCharSeq(char[] value, int offset, int count) {
this.value = value;
this.offset = offset;
this.count = count;
}
@Override
public int length() {
return count;
}
@Override
public char charAt(int index) {
if (index < 0 || index >= count) {
throw new StringIndexOutOfBoundsException("Index out of bounds: " + index + " not in [0," + count + "[");
}
return value[offset + index];
}
@Override
public CharSequence subSequence(int start, int end) {
return new SharingCharSeq(value, offset + start, end - start);
}
@NonNull
@Override
public String toString() {
String str = myStr;
if (str == null) {
str = new String(value, offset, count);
myStr = str;
}
return str;
}
@Override
public int hashCode() {
return toString().hashCode();
}
@Override
public boolean equals(Object o) {
if (this == o) {
return true;
}
if (o == null || getClass() != o.getClass()) {
return false;
}
SharingCharSeq that = (SharingCharSeq) o;
return Objects.equals(toString(), that.toString());
}
}

View File

@ -232,6 +232,7 @@ options {
STATIC = false;
USER_CHAR_STREAM = true;
JDK_VERSION = "1.5";
TOKEN_FACTORY = "JavaTokenUtils";
MULTI = true;
VISITOR = true;
@ -250,6 +251,8 @@ import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import net.sourceforge.pmd.lang.ast.CharStream;
import net.sourceforge.pmd.lang.ast.GenericToken;
import net.sourceforge.pmd.lang.ast.impl.JavaccToken;
import net.sourceforge.pmd.lang.ast.TokenMgrError;
import net.sourceforge.pmd.lang.ast.Node;
@ -406,9 +409,9 @@ SPECIAL_TOKEN :
{
< SINGLE_LINE_COMMENT: "//"(~["\n","\r"])* ("\n"|"\r"|"\r\n")? >
{
int startOfNOPMD = matchedToken.image.indexOf(suppressMarker);
int startOfNOPMD = matchedToken.getImage().indexOf(suppressMarker);
if (startOfNOPMD != -1) {
suppressMap.put(matchedToken.beginLine, matchedToken.image.substring(startOfNOPMD + suppressMarker.length()));
suppressMap.put(matchedToken.getBeginLine(), matchedToken.getImage().substring(startOfNOPMD + suppressMarker.length()));
}
comments.add(new SingleLineComment(matchedToken));
}
@ -1520,20 +1523,8 @@ TOKEN :
/* >'s need special attention due to generics syntax. */
TOKEN :
{
< RUNSIGNEDSHIFT: ">>>" >
{
matchedToken.kind = GT;
((Token.GTToken)matchedToken).realKind = RUNSIGNEDSHIFT;
input_stream.backup(2);
matchedToken.image = ">";
}
| < RSIGNEDSHIFT: ">>" >
{
matchedToken.kind = GT;
((Token.GTToken)matchedToken).realKind = RSIGNEDSHIFT;
input_stream.backup(1);
matchedToken.image = ">";
}
< RUNSIGNEDSHIFT: ">>>" > { input_stream.backup(2); }
| < RSIGNEDSHIFT: ">>" > { input_stream.backup(1); }
| < GT: ">" >
}
@ -1560,6 +1551,7 @@ ASTCompilationUnit CompilationUnit() :
<EOF>
{
jjtThis.setComments(token_source.comments);
jjtThis.setFileText(((net.sourceforge.pmd.lang.ast.impl.JavaCharStream) token_source.input_stream).getFullText());
return jjtThis;
}
}
@ -1812,7 +1804,7 @@ void FormalParameter() :
void ConstructorDeclaration(int modifiers) :
{jjtThis.setModifiers(modifiers);
Token t;}
JavaccToken t;}
{
[ TypeParameters() ]
<IDENTIFIER> {jjtThis.setImage(getToken(0).getImage());} FormalParameters() [ ThrowsList() ]
@ -3008,7 +3000,7 @@ void RUNSIGNEDSHIFT() #void:
{}
{
( LOOKAHEAD({ getToken(1).kind == GT &&
((Token.GTToken)getToken(1)).realKind == RUNSIGNEDSHIFT} )
((JavaTokenUtils.GTToken)getToken(1)).realKind == RUNSIGNEDSHIFT} )
">" ">" ">"
)
}
@ -3017,7 +3009,7 @@ void RSIGNEDSHIFT() #void:
{}
{
( LOOKAHEAD({ getToken(1).kind == GT &&
((Token.GTToken)getToken(1)).realKind == RSIGNEDSHIFT} )
((JavaTokenUtils.GTToken)getToken(1)).realKind == RSIGNEDSHIFT} )
">" ">"
)
}

View File

@ -123,10 +123,58 @@
</replacevalue>
</replace>
<replace token=" Token " value=" net.sourceforge.pmd.lang.ast.impl.JavaccToken ">
<fileset dir="${target-package-dir}"/>
</replace>
<replace token="(Token " value="(net.sourceforge.pmd.lang.ast.impl.JavaccToken ">
<fileset dir="${target-package-dir}"/>
</replace>
<replace token=" Token(" value=" net.sourceforge.pmd.lang.ast.impl.JavaccToken(">
<fileset dir="${target-package-dir}"/>
</replace>
<replace token=";Token " value=";net.sourceforge.pmd.lang.ast.impl.JavaccToken ">
<fileset dir="${target-package-dir}"/>
</replace>
<replace file="${target-package-dir}/JavaParserTokenManager.java">
<replacetoken><![CDATA[t = JavaTokenUtils.newToken(jjmatchedKind, curTokenImage);
t.beginLine = beginLine;
t.endLine = endLine;
t.beginColumn = beginColumn;
t.endColumn = endColumn;]]></replacetoken>
<replacevalue>
<![CDATA[t = JavaTokenUtils.newToken(jjmatchedKind, curTokenImage, beginLine, endLine, beginColumn, endColumn);]]></replacevalue>
</replace>
<replaceregexp file="${target-package-dir}/JavaParserTokenManager.java" flags="s">
<regexp pattern="protected net.sourceforge.pmd.lang.ast.impl.JavaccToken jjFillToken.*?}" />
<substitution
expression="protected net.sourceforge.pmd.lang.ast.impl.JavaccToken jjFillToken() {return JavaTokenUtils.newToken(jjmatchedKind, input_stream);}"/>
</replaceregexp>
<replace token=".image" value=".getImage()">
<fileset dir="${target-package-dir}"/>
</replace>
<replace token=".beginLine" value=".getBeginLine()">
<fileset dir="${target-package-dir}"/>
</replace>
<replace token=".beginColumn" value=".getBeginColumn()">
<fileset dir="${target-package-dir}"/>
</replace>
<replace file="${target-package-dir}/JavaParserTokenManager.java"
token="class JavaParserTokenManager"
value="class JavaParserTokenManager extends net.sourceforge.pmd.lang.ast.AbstractTokenManager" />
token="public class JavaParserTokenManager"
value="import net.sourceforge.pmd.lang.ast.impl.JavaccToken; public class JavaParserTokenManager extends net.sourceforge.pmd.lang.ast.AbstractTokenManager" />
<replace file="${target-package-dir}/JavaParser.java"
token="throw new Error"
value="throw new RuntimeException" />
@ -176,81 +224,10 @@ public interface" />
public class]]></replacevalue>
</replace>
<replace file="${target-package-dir}/Token.java">
<replacetoken><![CDATA[ default : return new Token(ofKind, image);
}
}
]]></replacetoken>
<replacevalue><![CDATA[ case JavaParserConstants.RUNSIGNEDSHIFT :
case JavaParserConstants.RSIGNEDSHIFT :
case JavaParserConstants.GT:
return new GTToken(ofKind, image);
default : return new Token(ofKind, image);
}
}
public static final class GTToken extends Token {
public int realKind = JavaParserConstants.GT;
public GTToken(int ofKind, String image) {
super(ofKind, image);
}
}
]]></replacevalue>
</replace>
<replace file="${target-package-dir}/Token.java">
<replacetoken>public class Token implements java.io.Serializable</replacetoken>
<replacevalue><![CDATA[import net.sourceforge.pmd.lang.ast.GenericToken;
public class Token implements GenericToken, java.io.Serializable]]></replacevalue>
</replace>
<!--Add implementation methods of GenericToken-->
<replace file="${target-package-dir}/Token.java">
<replacetoken>public Token specialToken;</replacetoken>
<replacevalue><![CDATA[public Token specialToken;
@Override
public GenericToken getNext() {
return next;
}
@Override
public GenericToken getPreviousComment() {
return specialToken;
}
@Override
public String getImage() {
return image;
}
@Override
public int getBeginLine() {
return beginLine;
}
@Override
public int getEndLine() {
return endLine;
}
@Override
public int getBeginColumn() {
return beginColumn;
}
@Override
public int getEndColumn() {
return endColumn;
}
]]></replacevalue>
</replace>
<delete>
<fileset dir="${target-package-dir}">
<include name="AST*.java" />
<include name="Token.java"/>
</fileset>
</delete>
</target>

View File

@ -15,9 +15,9 @@ import net.sourceforge.pmd.cpd.token.JavaCCTokenFilter;
import net.sourceforge.pmd.cpd.token.TokenFilter;
import net.sourceforge.pmd.lang.TokenManager;
import net.sourceforge.pmd.lang.ast.GenericToken;
import net.sourceforge.pmd.lang.ast.impl.JavaccToken;
import net.sourceforge.pmd.lang.java.JavaTokenManager;
import net.sourceforge.pmd.lang.java.ast.JavaParserConstants;
import net.sourceforge.pmd.lang.java.ast.Token;
public class JavaTokenizer extends JavaCCTokenizer {
@ -56,7 +56,7 @@ public class JavaTokenizer extends JavaCCTokenizer {
@Override
protected TokenEntry processToken(Tokens tokenEntries, GenericToken currentToken, String fileName) {
String image = currentToken.getImage();
Token javaToken = (Token) currentToken;
JavaccToken javaToken = (JavaccToken) currentToken;
constructorDetector.restoreConstructorToken(tokenEntries, javaToken);
@ -113,17 +113,18 @@ public class JavaTokenizer extends JavaCCTokenizer {
@Override
protected void analyzeToken(final GenericToken currentToken) {
detectAnnotations((Token) currentToken);
JavaccToken token = (JavaccToken) currentToken;
detectAnnotations(token);
skipSemicolon((Token) currentToken);
skipPackageAndImport((Token) currentToken);
skipAnnotationSuppression((Token) currentToken);
skipSemicolon(token);
skipPackageAndImport(token);
skipAnnotationSuppression(token);
if (ignoreAnnotations) {
skipAnnotations();
}
}
private void skipPackageAndImport(final Token currentToken) {
private void skipPackageAndImport(final JavaccToken currentToken) {
if (currentToken.kind == JavaParserConstants.PACKAGE || currentToken.kind == JavaParserConstants.IMPORT) {
discardingKeywords = true;
} else if (discardingKeywords && currentToken.kind == JavaParserConstants.SEMICOLON) {
@ -131,22 +132,22 @@ public class JavaTokenizer extends JavaCCTokenizer {
}
}
private void skipSemicolon(final Token currentToken) {
private void skipSemicolon(final JavaccToken currentToken) {
if (currentToken.kind == JavaParserConstants.SEMICOLON) {
discardingSemicolon = true;
} else if (discardingSemicolon && currentToken.kind != JavaParserConstants.SEMICOLON) {
} else if (discardingSemicolon) {
discardingSemicolon = false;
}
}
private void skipAnnotationSuppression(final Token currentToken) {
private void skipAnnotationSuppression(final JavaccToken currentToken) {
// if processing an annotation, look for a CPD-START or CPD-END
if (isAnnotation) {
if (!discardingSuppressing && currentToken.kind == JavaParserConstants.STRING_LITERAL
&& CPD_START.equals(currentToken.image)) {
&& CPD_START.equals(currentToken.getImage())) {
discardingSuppressing = true;
} else if (discardingSuppressing && currentToken.kind == JavaParserConstants.STRING_LITERAL
&& CPD_END.equals(currentToken.image)) {
&& CPD_END.equals(currentToken.getImage())) {
discardingSuppressing = false;
}
}
@ -166,7 +167,7 @@ public class JavaTokenizer extends JavaCCTokenizer {
|| discardingSuppressing;
}
private void detectAnnotations(Token currentToken) {
private void detectAnnotations(JavaccToken currentToken) {
if (isAnnotation && nextTokenEndsAnnotation) {
isAnnotation = false;
nextTokenEndsAnnotation = false;
@ -211,24 +212,24 @@ public class JavaTokenizer extends JavaCCTokenizer {
classMembersIndentations = new LinkedList<>();
}
public void processToken(Token currentToken) {
public void processToken(JavaccToken currentToken) {
if (!ignoreIdentifiers) {
return;
}
switch (currentToken.kind) {
case JavaParserConstants.IDENTIFIER:
if ("enum".equals(currentToken.image)) {
if ("enum".equals(currentToken.getImage())) {
// If declaring an enum, add a new block nesting level at
// which constructors may exist
pushTypeDeclaration();
} else if (storeNextIdentifier) {
classMembersIndentations.peek().name = currentToken.image;
classMembersIndentations.peek().name = currentToken.getImage();
storeNextIdentifier = false;
}
// Store this token
prevIdentifier = currentToken.image;
prevIdentifier = currentToken.getImage();
break;
case JavaParserConstants.CLASS:
@ -271,7 +272,7 @@ public class JavaTokenizer extends JavaCCTokenizer {
storeNextIdentifier = true;
}
public void restoreConstructorToken(Tokens tokenEntries, Token currentToken) {
public void restoreConstructorToken(Tokens tokenEntries, JavaccToken currentToken) {
if (!ignoreIdentifiers) {
return;
}

View File

@ -7,7 +7,7 @@ package net.sourceforge.pmd.lang.java;
import java.io.Reader;
import net.sourceforge.pmd.lang.TokenManager;
import net.sourceforge.pmd.lang.ast.JavaCharStream;
import net.sourceforge.pmd.lang.ast.impl.JavaCharStream;
import net.sourceforge.pmd.lang.java.ast.JavaParserTokenManager;
/**
@ -17,7 +17,7 @@ public class JavaTokenManager implements TokenManager {
private final JavaParserTokenManager tokenManager;
public JavaTokenManager(Reader source) {
tokenManager = new JavaParserTokenManager(new JavaCharStream(source));
tokenManager = new JavaParserTokenManager(JavaCharStream.createStream(source));
}
@Override

View File

@ -22,6 +22,7 @@ public final class ASTCompilationUnit extends AbstractJavaTypeNode implements Ro
private ClassTypeResolver classTypeResolver;
private List<Comment> comments;
private Map<Integer, String> noPmdComments = Collections.emptyMap();
private CharSequence fileText;
ASTCompilationUnit(int id) {
super(id);
@ -39,6 +40,16 @@ public final class ASTCompilationUnit extends AbstractJavaTypeNode implements Ro
this.comments = comments;
}
@Override
public CharSequence getText() {
return fileText;
}
void setFileText(CharSequence fileText) {
this.fileText = fileText;
}
@Override
public Object jjtAccept(JavaParserVisitor visitor, Object data) {
return visitor.visit(this, data);

View File

@ -6,7 +6,6 @@ package net.sourceforge.pmd.lang.java.ast;
import org.apache.commons.lang3.StringEscapeUtils;
/**
* Represents a string literal. The image of this node can be the literal as it appeared
* in the source, but JavaCC performs its own unescaping and some escapes may be lost.
@ -27,39 +26,9 @@ public final class ASTStringLiteral extends AbstractLiteral implements ASTLitera
}
private String reconstructedImage = null;
@Override
public String getImage() {
if (reconstructedImage == null) {
reconstructedImage = isTextBlock ? super.getImage() : getEscapedStringLiteral(super.getImage());
}
return reconstructedImage;
}
/**
* Tries to reconstruct the original string literal. If the original length
* is greater than the parsed String literal, then probably some unicode
* escape sequences have been used.
*/
private String getEscapedStringLiteral(String javaccEscaped) {
int fullLength = getEndColumn() - getBeginColumn();
if (fullLength > javaccEscaped.length()) {
StringBuilder result = new StringBuilder(fullLength);
for (int i = 0; i < javaccEscaped.length(); i++) {
char c = javaccEscaped.charAt(i);
if (c < 0x20 || c > 0xff || javaccEscaped.length() == 1) {
String hex = "0000" + Integer.toHexString(c);
result.append("\\u").append(hex.substring(hex.length() - 4));
} else {
result.append(c);
}
}
return result.toString();
}
return javaccEscaped;
return getText().toString();
}
void setTextBlock() {
@ -87,7 +56,7 @@ public final class ASTStringLiteral extends AbstractLiteral implements ASTLitera
* Returns the value without delimiters and unescaped.
*/
public String getUnescapedValue() {
String image = getImage();
String image = super.getImage();
String woDelims = image.substring(1, image.length() - 1);
return StringEscapeUtils.unescapeJava(woDelims);
}

View File

@ -20,6 +20,7 @@ abstract class AbstractJavaNode extends AbstractNode implements JavaNode {
private JSymbolTable symbolTable;
private Comment comment;
private ASTCompilationUnit root;
private CharSequence text;
AbstractJavaNode(int id) {
super(id);
@ -99,11 +100,31 @@ abstract class AbstractJavaNode extends AbstractNode implements JavaNode {
@Override
public Scope getScope() {
if (scope == null) {
return ((JavaNode) parent).getScope();
return jjtGetParent().getScope();
}
return scope;
}
@Override
public CharSequence getText() {
if (text == null) {
text = getRoot().getText().subSequence(getStartOffset(), getEndOffset());
}
return text;
}
@Override
public int getStartOffset() {
return jjtGetFirstToken().getStartDocumentOffset();
}
@Override
public int getEndOffset() {
return jjtGetLastToken().getEndDocumentOffset();
}
void setScope(Scope scope) {
this.scope = scope;
}

View File

@ -14,6 +14,7 @@ import org.apache.commons.lang3.StringUtils;
import net.sourceforge.pmd.PMD;
import net.sourceforge.pmd.lang.ast.AbstractNode;
import net.sourceforge.pmd.lang.ast.GenericToken;
public abstract class Comment extends AbstractNode {
// single regex, that captures: the start of a multi-line comment (/**|/*), the start of a single line comment (//)
@ -23,10 +24,10 @@ public abstract class Comment extends AbstractNode {
// Same as "\\R" - but \\R is only available with java8+
static final Pattern NEWLINES_PATTERN = Pattern.compile("\\u000D\\u000A|[\\u000A\\u000B\\u000C\\u000D\\u0085\\u2028\\u2029]");
protected Comment(Token t) {
super(-1, t.beginLine, t.endLine, t.beginColumn, t.endColumn);
protected Comment(GenericToken t) {
super(-1);
setImage(t.image);
setImage(t.getImage());
}
@Override

View File

@ -13,6 +13,7 @@ import java.util.regex.Pattern;
import org.apache.commons.lang3.StringUtils;
import net.sourceforge.pmd.lang.ast.impl.JavaccToken;
import net.sourceforge.pmd.lang.java.javadoc.JavadocTag;
/**
@ -147,7 +148,7 @@ public final class CommentUtil {
@Deprecated // will be removed with PMD 7.0.0
public static List<String> multiLinesIn(String comment) {
// temporary createa a Multiline Comment Node
Token t = new Token();
JavaccToken t = new JavaccToken();
t.image = comment;
MultiLineComment node = new MultiLineComment(t);
return Arrays.asList(Comment.NEWLINES_PATTERN.split(node.getFilteredComment()));

View File

@ -9,6 +9,7 @@ import java.util.Collection;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import net.sourceforge.pmd.lang.ast.GenericToken;
import net.sourceforge.pmd.lang.ast.Node;
import net.sourceforge.pmd.lang.java.javadoc.JavadocTag;
@ -16,7 +17,7 @@ public class FormalComment extends Comment {
private static final Pattern JAVADOC_TAG = Pattern.compile("@([A-Za-z0-9]+)");
public FormalComment(Token t) {
public FormalComment(GenericToken t) {
super(t);
findJavadocs();

View File

@ -86,6 +86,9 @@ public interface JavaNode extends ScopedNode {
JavaNode jjtGetParent();
ASTCompilationUnit getRoot();
GenericToken jjtGetFirstToken();
@ -109,6 +112,16 @@ public interface JavaNode extends ScopedNode {
}
int getStartOffset();
int getEndOffset();
CharSequence getText();
/**
* FIXME figure that out
*/

View File

@ -0,0 +1,66 @@
/*
* BSD-style license; for more info see http://pmd.sourceforge.net/license.html
*/
package net.sourceforge.pmd.lang.java.ast;
import net.sourceforge.pmd.lang.ast.CharStream;
import net.sourceforge.pmd.lang.ast.impl.JavaCharStream;
import net.sourceforge.pmd.lang.ast.impl.JavaccToken;
final class JavaTokenUtils {
private JavaTokenUtils() {
}
public static JavaccToken newToken(int kind, CharStream charStream) {
JavaCharStream jcs = (JavaCharStream) charStream;
String image = JavaParserTokenManager.jjstrLiteralImages[kind];
switch (kind) {
case JavaParserConstants.RUNSIGNEDSHIFT:
case JavaParserConstants.RSIGNEDSHIFT:
case JavaParserConstants.GT:
return new GTToken(
JavaParserConstants.GT,
kind,
">",
jcs.getStartOffset(),
jcs.getEndOffset(),
jcs.getBeginColumn(),
jcs.getEndColumn(),
jcs.getBeginLine(),
jcs.getEndLine()
);
default:
return new JavaccToken(
kind,
image == null ? charStream.GetImage() : image,
jcs.getStartOffset(),
jcs.getEndOffset(),
jcs.getBeginColumn(),
jcs.getEndColumn(),
jcs.getBeginLine(),
jcs.getEndLine()
);
}
}
public static final class GTToken extends JavaccToken {
public int realKind;
/**
* Constructs a new token for the specified Image and Kind.
*/
public GTToken(int kind, int realKind, CharSequence image, int startOffset, int endOffset, int beginColumn, int endColumn, int beginLine, int endLine) {
super(kind, image, startOffset, endOffset, beginColumn, endColumn, beginLine, endLine);
this.realKind = realKind;
}
}
}

View File

@ -4,9 +4,11 @@
package net.sourceforge.pmd.lang.java.ast;
import net.sourceforge.pmd.lang.ast.GenericToken;
public class MultiLineComment extends Comment {
public MultiLineComment(Token t) {
public MultiLineComment(GenericToken t) {
super(t);
}

View File

@ -4,9 +4,11 @@
package net.sourceforge.pmd.lang.java.ast;
import net.sourceforge.pmd.lang.ast.GenericToken;
public class SingleLineComment extends Comment {
public SingleLineComment(Token t) {
public SingleLineComment(GenericToken t) {
super(t);
}

View File

@ -9,6 +9,7 @@ import org.junit.Assert;
import org.junit.Test;
import net.sourceforge.pmd.PMD;
import net.sourceforge.pmd.lang.ast.impl.JavaccToken;
public class CommentTest {
@Test
@ -97,7 +98,7 @@ public class CommentTest {
}
private String filter(String comment) {
Token t = new Token();
JavaccToken t = new JavaccToken();
t.image = comment;
Comment node = new Comment(t) {
@Override

View File

@ -7,6 +7,8 @@ package net.sourceforge.pmd.lang.java.ast;
import org.junit.Assert;
import org.junit.Test;
import net.sourceforge.pmd.lang.ast.impl.JavaccToken;
public class FormalCommentTest {
@Test
@ -21,7 +23,7 @@ public class FormalCommentTest {
+ " boolean supports(N node);\n"
+ "";
Token token = new Token();
JavaccToken token = new JavaccToken();
token.image = comment;
FormalComment commentNode = new FormalComment(token);

View File

@ -16,12 +16,12 @@ import org.junit.Test;
import net.sourceforge.pmd.lang.LanguageRegistry;
import net.sourceforge.pmd.lang.LanguageVersionHandler;
import net.sourceforge.pmd.lang.ast.Node;
import net.sourceforge.pmd.lang.ast.impl.JavaccToken;
import net.sourceforge.pmd.lang.java.JavaLanguageModule;
import net.sourceforge.pmd.lang.java.ast.ASTCompilationUnit;
import net.sourceforge.pmd.lang.java.ast.ASTMethodDeclaration;
import net.sourceforge.pmd.lang.java.ast.FormalComment;
import net.sourceforge.pmd.lang.java.ast.MultiLineComment;
import net.sourceforge.pmd.lang.java.ast.Token;
public class AbstractCommentRuleTest {
@ -33,7 +33,7 @@ public class AbstractCommentRuleTest {
*/
@Test
public void testFilteredCommentIn() {
Token token = new Token();
JavaccToken token = new JavaccToken();
token.image = "/* multi line comment with blank lines\n\n\n */";
String filtered = testSubject.filteredCommentIn(new MultiLineComment(token));