[java] Support String Templates for Java 21 Preview

JEP 430
This commit is contained in:
Andreas Dangel
2023-08-04 17:13:48 +02:00
parent c2710c9a91
commit 715d58fef3
9 changed files with 1237 additions and 2 deletions

View File

@ -1,4 +1,6 @@
/**
* Support "JEP 430: String Templates" for Java 21 Preview.
* New AST nodes: ASTTemplateExpression, ASTTemplate, ASTTemplateFragment
* Promote "JEP 441: Pattern Matching for switch" as permanent language feature for Java 21.
* Renamed SwitchGuard to Guard.
* Promote "JEP 440: Record Patterns" as permanent language feature for Java 21.
@ -584,6 +586,53 @@ PARSER_END(JavaParserImpl)
TOKEN_MGR_DECLS :
{
protected List<JavaComment> comments = new ArrayList<JavaComment>();
enum TokenContext { STRING_TEMPLATE, TEXT_BLOCK_TEMPLATE, BLOCK; }
private static final java.util.regex.Pattern TEXT_BLOCK_TEMPLATE_END_PATTERN =
java.util.regex.Pattern.compile("^}[^\"]*\"\"\"");
private static final java.util.regex.Pattern STRING_TEMPLATE_MID_OR_END_PATTERN =
java.util.regex.Pattern.compile("^}(?:[^\"\\\\\n\r]|\\\\(?:[ntbrfs\\\\'\"]|[0-7][0-7]?|[0-3][0-7][0-7]))*(\\{|\")");
private TokenContext determineContext() {
Throwable t = new Throwable().fillInStackTrace();
for (StackTraceElement e : t.getStackTrace()) {
String method = e.getMethodName();
if ("TextBlockTemplate".equals(method)) {
return TokenContext.TEXT_BLOCK_TEMPLATE;
} else if ("StringTemplate".equals(method)) {
return TokenContext.STRING_TEMPLATE;
} else if ("Block".equals(method)
|| "ClassOrInterfaceBody".equals(method)
|| "ArrayInitializer".equals(method)
|| "MemberValueArrayInitializer".equals(method)
|| "SwitchBlock".equals(method)) {
return TokenContext.BLOCK;
}
}
return null;
}
private net.sourceforge.pmd.lang.ast.impl.javacc.JavaccToken rereadTokenAs(int kind, int length) {
input_stream.backup(lengthOfMatch);
try {
for (int i = 0; i < length; i++) {
input_stream.readChar();
}
} catch (java.io.EOFException eofException) {
throw new IllegalStateException(eofException);
}
jjmatchedKind = kind;
return jjFillToken();
}
private net.sourceforge.pmd.lang.ast.impl.javacc.JavaccToken handleBlock() {
net.sourceforge.pmd.lang.ast.impl.javacc.JavaccToken matchedToken = rereadTokenAs(JavaTokenKinds.RBRACE, 1);
if (!"}".equals(input_stream.getTokenImage())) {
throw new IllegalStateException("Expected '}'");
}
return matchedToken;
}
}
/* WHITE SPACE */
@ -754,7 +803,8 @@ TOKEN :
| < #EXPONENT_TAIL: (["+","-"])? <DIGIT_SEQ> >
| < CHARACTER_LITERAL: "'" ( ~["'", "\\","\n","\r"] | <STRING_ESCAPE> ) "'" >
| < STRING_LITERAL: "\"" ( ~["\"","\\","\n","\r"] | <STRING_ESCAPE> )* "\"" >
| < STRING_LITERAL: "\"" (<STRING_CHARACTER>)* "\"" >
| < #STRING_CHARACTER: ~["\"","\\","\n","\r"] | <STRING_ESCAPE> >
| < #STRING_ESCAPE:
"\\"
( ["n","t","b","r","f","s","\\","'","\""]
@ -763,9 +813,12 @@ TOKEN :
| ["0"-"3"] ["0"-"7"] ["0"-"7"]
)
>
| < #TEXT_BLOCK_CHARACTER: ~["\\"] | <STRING_ESCAPE> | ("\\")? <LINE_TERMINATOR> >
}
/* TEXT BLOCKS */
// note: Text Blocks need an own lexical state, so that we can reliably determine
// the end of the text block (""") which is 3 characters long.
MORE :
{
< "\"\"\"" (<HORIZONTAL_WHITESPACE>)* <LINE_TERMINATOR> > : IN_TEXT_BLOCK_LITERAL
@ -780,7 +833,7 @@ TOKEN :
<IN_TEXT_BLOCK_LITERAL>
MORE :
{
< ~["\\"] | <STRING_ESCAPE> | ("\\")? <LINE_TERMINATOR> >
< <TEXT_BLOCK_CHARACTER> >
}
/* IDENTIFIERS */
@ -982,6 +1035,87 @@ TOKEN :
| < GT: ">" >
}
/* FRAGMENTS */
// Note: The fragments introduce ambiguity with other token productions, especially the separator token "}" (RBRACE).
// In order to produce the correct token sequence, the ambiguity needs to be resolved using the context.
// That means, that STRING_TEMPLATE_MID/END and TEXT_BLOCK_TEMPLATE_MID/END could actually be a closing bracket ("}").
// Additionally, a STRING_TEMPLATE_MID could be a TEXT_BLOCK_TEMPLATE_MID and the other way round.
// See JLS 3.13 Fragments (Java 21 Preview)
TOKEN :
{
< STRING_TEMPLATE_BEGIN: "\"" <STRING_FRAGMENT> "\\{" >
| < STRING_TEMPLATE_MID: "}" <STRING_FRAGMENT> "\\{" >
{
{
TokenContext ctx = determineContext();
switch (ctx) {
case TEXT_BLOCK_TEMPLATE:
jjmatchedKind = TEXT_BLOCK_TEMPLATE_MID;
matchedToken = jjFillToken();
break;
case BLOCK:
matchedToken = handleBlock();
break;
}
}
}
| < STRING_TEMPLATE_END: "}" <STRING_FRAGMENT> "\"" >
{
{
TokenContext ctx = determineContext();
if (ctx == TokenContext.BLOCK) {
matchedToken = handleBlock();
}
}
}
| < #STRING_FRAGMENT: (<STRING_CHARACTER>)* >
| < TEXT_BLOCK_TEMPLATE_BEGIN: "\"\"\"" (<HORIZONTAL_WHITESPACE>)* <LINE_TERMINATOR> <TEXT_BLOCK_FRAGMENT> "\\{" >
| < TEXT_BLOCK_TEMPLATE_MID: "}" <TEXT_BLOCK_FRAGMENT> "\\{" >
{
{
TokenContext ctx = determineContext();
switch (ctx) {
case STRING_TEMPLATE: {
java.util.regex.Matcher m = STRING_TEMPLATE_MID_OR_END_PATTERN.matcher(matchedToken.getImage());
if (m.find()) {
int kind = STRING_TEMPLATE_END;
if ("\\{".equals(m.group(1))) {
kind = STRING_TEMPLATE_MID;
}
matchedToken = rereadTokenAs(kind, m.end());
}
break;
}
case TEXT_BLOCK_TEMPLATE: {
// Note: TEXT_BLOCK_FRAGMENT is not really correct and might match """ as part of TEXT_BLOCK_TEMPLATE_MID
// instead of TEXT_BLOCK_TEMPLATE_END. In case this happens, this is corrected here.
java.util.regex.Matcher m = TEXT_BLOCK_TEMPLATE_END_PATTERN.matcher(matchedToken.getImage());
if (m.find()) {
matchedToken = rereadTokenAs(TEXT_BLOCK_TEMPLATE_END, m.end());
}
break;
}
case BLOCK:
matchedToken = handleBlock();
break;
}
}
}
| < TEXT_BLOCK_TEMPLATE_END: "}" <TEXT_BLOCK_FRAGMENT> "\"\"\"" >
{
{
TokenContext ctx = determineContext();
if (ctx == TokenContext.BLOCK) {
matchedToken = handleBlock();
}
}
}
| < #TEXT_BLOCK_FRAGMENT: (<TEXT_BLOCK_CHARACTER>)* >
}
/*****************************************
* THE JAVA LANGUAGE GRAMMAR STARTS HERE *
*****************************************/
@ -2031,6 +2165,7 @@ void PrimaryStep2() #void:
// "super" alone is not a valid expression
("." MemberSelector() | MethodReference())
| MemberSelector()
| {forceExprContext();} TemplateArgument() #TemplateExpression(2)
)
// catches the case where the ambig name is the start of an array type
| LOOKAHEAD("@" | "[" "]") {forceTypeContext();} Dims() #ArrayType(2) (MethodReference() | "." "class" #ClassLiteral(1))
@ -2135,6 +2270,45 @@ boolean LambdaParameterType() #void :
| FormalParamType() { return false; }
}
void TemplateArgument() #void :
{}
{
Template()
| StringLiteral()
}
void Template() :
{}
{
StringTemplate()
| TextBlockTemplate()
}
void StringTemplate() #void :
{}
{
<STRING_TEMPLATE_BEGIN> { setLastTokenImage(jjtThis); } #TemplateFragment
EmbeddedExpression()
( <STRING_TEMPLATE_MID> { setLastTokenImage(jjtThis); } #TemplateFragment EmbeddedExpression() )*
<STRING_TEMPLATE_END> { setLastTokenImage(jjtThis); } #TemplateFragment
}
void TextBlockTemplate() #void :
{}
{
<TEXT_BLOCK_TEMPLATE_BEGIN> { setLastTokenImage(jjtThis); } #TemplateFragment
EmbeddedExpression()
( <TEXT_BLOCK_TEMPLATE_MID> { setLastTokenImage(jjtThis); } #TemplateFragment EmbeddedExpression() )*
<TEXT_BLOCK_TEMPLATE_END> { setLastTokenImage(jjtThis); } #TemplateFragment
}
void EmbeddedExpression() #void :
{}
{
[ Expression() ]
}
void Literal() #void :
{}
{