Clément Fournier a11f45e511 Cleanup spec
2020-09-17 22:54:59 +02:00

1664 lines
38 KiB
Plaintext

/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
/*
* NOTE : please see documentation at bottom of this file. (It was placed there its tiring
* to always have to page past it... :)
*/
options
{
MULTI = true;
VISITOR = true;
IGNORE_CASE = true;
/** for debugging purposes. Keep false */
DEBUG_PARSER=false;
DEBUG_TOKEN_MANAGER=false;
}
PARSER_BEGIN(VmParserImpl)
package net.sourceforge.pmd.lang.vm.ast;
import java.io.IOException;
import java.io.Reader;
import java.util.ArrayList;
import java.util.List;
import java.util.HashMap;
import java.util.Map;
import org.apache.commons.lang3.StringUtils;
import net.sourceforge.pmd.lang.ast.TokenMgrError;
import net.sourceforge.pmd.lang.ast.impl.javacc.CharStream;
import net.sourceforge.pmd.lang.ast.impl.javacc.JavaccToken;
/**
* This class is responsible for parsing a Velocity
* template. This class was generated by JavaCC using
* the JJTree extension to produce an Abstract
* Syntax Tree (AST) of the template.
*
* Please look at the Parser.jjt file which is
* what controls the generation of this class.
*
* @author <a href="mailto:jvanzyl@apache.org">Jason van Zyl</a>
* @author <a href="mailto:geirm@optonline.net">Geir Magnusson Jr.</a>
* @author <a href="hps@intermeta.de">Henning P. Schmiedehausen</a>
* @version $Id$
*/
public class VmParserImpl
{
/**
* Check whether there is a left parenthesis with leading optional
* whitespaces. This method is used in the semantic look ahead of
* Directive method. This is done in code instead of as a production
* for simplicity and efficiency.
*/
private boolean isLeftParenthesis()
{
char c;
int no = 0;
try {
while(true)
{
/**
* Read a character
*/
c = token_source.input_stream.readChar();
no++;
if (c == '(')
{
return true;
}
/**
* if not a white space return
*/
else if (c != ' ' && c != '\n' && c != '\r' && c != '\t')
{
return false;
}
}
}
catch(IOException e)
{
}
finally
{
/**
* Backup the stream to the initial state
*/
token_source.input_stream.backup(no);
}
return false;
}
}
PARSER_END(VmParserImpl)
TOKEN_MGR_DECLS:
{
private int fileDepth = 0;
private int lparen = 0;
private int rparen = 0;
List stateStack = new ArrayList(50);
public boolean debugPrint = false;
private boolean inReference;
public boolean inDirective;
private boolean inComment;
public boolean inSet;
/**
* pushes the current state onto the 'state stack',
* and maintains the parens counts
* public because we need it in PD &amp; VM handling
*
* @return boolean : success. It can fail if the state machine
* gets messed up (do don't mess it up :)
*/
public boolean stateStackPop()
{
ParserState s;
try
{
s = (ParserState) stateStack.remove(stateStack.size() - 1); // stack.pop
}
catch(IndexOutOfBoundsException e)
{
// empty stack
lparen=0;
SwitchTo(DEFAULT);
return false;
}
if( debugPrint )
System.out.println(
" stack pop (" + stateStack.size() + ") : lparen=" +
s.lparen +
" newstate=" + s.lexstate );
lparen = s.lparen;
rparen = s.rparen;
SwitchTo(s.lexstate);
return true;
}
/**
* pops a state off the stack, and restores paren counts
*
* @return boolean : success of operation
*/
public boolean stateStackPush()
{
if( debugPrint )
System.out.println(" (" + stateStack.size() + ") pushing cur state : " +
curLexState );
ParserState s = new ParserState();
s.lparen = lparen;
s.rparen = rparen;
s.lexstate = curLexState;
lparen = 0;
stateStack.add(s); // stack.push
return true;
}
/**
* Clears all state variables, resets to
* start values, clears stateStack. Call
* before parsing.
*/
public void clearStateVars()
{
stateStack.clear();
lparen = 0;
rparen = 0;
inReference = false;
inDirective = false;
inComment = false;
inSet = false;
return;
}
/**
* Holds the state of the parsing process.
*/
private static class ParserState
{
int lparen;
int rparen;
int lexstate;
}
/**
* handles the dropdown logic when encountering a RPAREN
*/
private void RPARENHandler()
{
/*
* Ultimately, we want to drop down to the state below
* the one that has an open (if we hit bottom (DEFAULT),
* that's fine. It's just text schmoo.
*/
boolean closed = false;
if (inComment)
closed = true;
while( !closed )
{
/*
* look at current state. If we haven't seen a lparen
* in this state then we drop a state, because this
* lparen clearly closes our state
*/
if( lparen > 0)
{
/*
* if rparen + 1 == lparen, then this state is closed.
* Otherwise, increment and keep parsing
*/
if( lparen == rparen + 1)
{
stateStackPop();
}
else
{
rparen++;
}
closed = true;
}
else
{
/*
* now, drop a state
*/
if(!stateStackPop())
break;
}
}
}
}
/* ------------------------------------------------------------------------
*
* Tokens
*
* Note : we now have another state, REFMODIFIER. This is sort of a
* type of REFERENCE state, simply use to use the DIRECTIVE token
* set when we are processing a $foo.bar() construct
*
* ------------------------------------------------------------------------- */
<REFERENCE, REFMODIFIER>
TOKEN:
{
<INDEX_LBRACKET: "[">
{
stateStackPush();
SwitchTo(REFINDEX);
}
}
<REFINDEX>
TOKEN:
{
<INDEX_RBRACKET: "]">
{
stateStackPop();
}
}
<DIRECTIVE,REFMOD2>
TOKEN:
{
<LBRACKET: "[">
| <RBRACKET: "]">
| <COMMA:",">
}
<DIRECTIVE, REFMOD2>
TOKEN:
{
<DOUBLEDOT : ".." >
}
<DIRECTIVE, REFMOD2>
TOKEN:
{
<IN : "in" >
}
<DIRECTIVE, REFMOD2>
TOKEN:
{
<COLON : ":" >
}
<DIRECTIVE, REFMOD2>
TOKEN :
{
<LEFT_CURLEY : "{" >
| <RIGHT_CURLEY : "}" >
}
<DIRECTIVE,REFMODIFIER>
TOKEN:
{
<LPAREN: "(">
{
if (!inComment)
lparen++;
/*
* If in REFERENCE and we have seen the dot, then move
* to REFMOD2 -> Modifier()
*/
if (curLexState == REFMODIFIER )
SwitchTo( REFMOD2 );
}
}
/*
* we never will see a ')' in anything but DIRECTIVE and REFMOD2.
* Each have their own
*/
<DIRECTIVE>
TOKEN:
{
/*
* We will eat any whitespace upto and including a newline for directives
*/
<RPAREN: ")" ( ( " " | "\t" )* ( "\n" | "\r" | "\r\n" ))?>
{
RPARENHandler();
}
}
<REFMOD2>
TOKEN:
{
/*
* in REFMOD2, we don't want to bind the whitespace and \n like we
* do when closing a directive.
*/
<REFMOD2_RPAREN: ")">
{
/*
* need to simply switch back to REFERENCE, not drop down the stack
* because we can (infinitely) chain, ala
* $foo.bar().blargh().woogie().doogie()
*/
SwitchTo( REFERENCE );
}
}
/*----------------------------------------------
*
* escape "\\" handling for the built-in directives
*
*--------------------------------------------- */
TOKEN:
{
/*
* We have to do this, because we want these to be a Text node, and
* whatever follows to be peer to this text in the tree.
*
* We need to touch the ASTs for these, because we want an even # of \'s
* to render properly in front of the block
*
* This is really simplistic. I actually would prefer to find them in
* grammatical context, but I am neither smart nor rested, a receipe
* for disaster, another long night with Mr. Parser, or both.
*/
<ESCAPE_DIRECTIVE : (<DOUBLE_ESCAPE>)* "\\#" (<WORD> | <BRACKETED_WORD>) >
}
/*
* needed because #set is so wacky in it's desired behavior. We want set
* to eat any preceding whitespace so it is invisible in formatting.
* (As it should be.) If this works well, I am going to chuck the whole MORE:
* token abomination.
*
* We added the lexical states REFERENCE, REFMODIFIER, REFMOD2 to
* address JIRA issue VELOCITY-631. With SET_DIRECTIVE only in the
* DEFAULT lexical state the following VTL fails "$a#set($b = 1)"
* because the Reference token uses LOOKAHEAD(2) combined with the
* fact that we explicity set the lex state to REFERENCE with the $
* token, which means we would never evaulate this token during the
* look ahead. This general issue is disscussed here:
*
* http://www.engr.mun.ca/~theo/JavaCC-FAQ/javacc-faq-ie.htm#tth_sEc3.12
*
*/
<DEFAULT, REFERENCE, REFMODIFIER, REFMOD2>
TOKEN:
{
<SET_DIRECTIVE: (" "|"\t")* ("#set" | "#{set}") (" ")* "(">
{
if (! inComment)
{
inDirective = true;
if ( debugPrint )
System.out.print("#set : going to " + DIRECTIVE );
stateStackPush();
inSet = true;
SwitchTo(DIRECTIVE);
}
/*
* need the LPAREN action
*/
if (!inComment)
{
lparen++;
/*
* If in REFERENCE and we have seen the dot, then move
* to REFMOD2 -> Modifier()
*/
if (curLexState == REFMODIFIER )
SwitchTo( REFMOD2 );
}
}
}
<*>
MORE :
{
/*
* Note : DOLLARBANG is a duplicate of DOLLAR. They must be identical.
*/
<DOLLAR: ("\\")* "$">
{
if (! inComment)
{
/*
* if we find ourselves in REFERENCE, we need to pop down
* to end the previous ref
*/
if (curLexState == REFERENCE)
{
inReference = false;
stateStackPop();
}
inReference = true;
if ( debugPrint )
System.out.print( "$ : going to " + REFERENCE );
stateStackPush();
SwitchTo(REFERENCE);
}
}
| <DOLLARBANG: ("\\")* "$" ("\\")* "!">
{
if (! inComment)
{
/*
* if we find ourselves in REFERENCE, we need to pop down
* to end the previous ref
*/
if (curLexState == REFERENCE)
{
inReference = false;
stateStackPop();
}
inReference = true;
if ( debugPrint )
System.out.print( "$! : going to " + REFERENCE );
stateStackPush();
SwitchTo(REFERENCE);
}
}
| "#[["
{
if (!inComment)
{
inComment = true;
stateStackPush();
SwitchTo( IN_TEXTBLOCK );
}
}
| <"#**" ~["#"]>
{
if (!inComment)
{
input_stream.backup(1);
inComment = true;
stateStackPush();
SwitchTo( IN_FORMAL_COMMENT);
}
}
| "#*"
{
if (!inComment)
{
inComment=true;
stateStackPush();
SwitchTo( IN_MULTI_LINE_COMMENT );
}
}
| <HASH : "#" >
{
if (! inComment)
{
/*
* We can have the situation where #if($foo)$foo#end.
* We need to transition out of REFERENCE before going to DIRECTIVE.
* I don't really like this, but I can't think of a legal way
* you are going into DIRECTIVE while in REFERENCE. -gmj
*/
if (curLexState == REFERENCE || curLexState == REFMODIFIER )
{
inReference = false;
stateStackPop();
}
inDirective = true;
if ( debugPrint )
System.out.print("# : going to " + DIRECTIVE );
stateStackPush();
SwitchTo(PRE_DIRECTIVE);
}
}
}
// treat the single line comment case separately
// to avoid ##<EOF> errors
<DEFAULT,PRE_DIRECTIVE,DIRECTIVE,REFERENCE>
TOKEN :
{
<SINGLE_LINE_COMMENT_START: "##">
{
if (!inComment)
{
if (curLexState == REFERENCE)
{
inReference = false;
stateStackPop();
}
inComment = true;
stateStackPush();
SwitchTo(IN_SINGLE_LINE_COMMENT);
}
}
}
TOKEN :
{
<DOUBLE_ESCAPE : "\\\\">
| <ESCAPE: "\\" >
| <TEXT: (~["$", "#", "\\"])+ >
}
/* -----------------------------------------------------------------------
*
* *_COMMENT Lexical tokens
*
*-----------------------------------------------------------------------*/
<IN_SINGLE_LINE_COMMENT>
TOKEN :
{
<SINGLE_LINE_COMMENT: "\n" | "\r" | "\r\n">
{
inComment = false;
stateStackPop();
}
}
<IN_FORMAL_COMMENT>
TOKEN :
{
<FORMAL_COMMENT: "*#" >
{
inComment = false;
stateStackPop();
}
}
<IN_MULTI_LINE_COMMENT>
TOKEN :
{
<MULTI_LINE_COMMENT: "*#" >
{
inComment = false;
stateStackPop();
}
}
<IN_TEXTBLOCK>
TOKEN :
{
<TEXTBLOCK: "]]#" >
{
inComment = false;
stateStackPop();
}
}
<IN_SINGLE_LINE_COMMENT,IN_FORMAL_COMMENT,IN_MULTI_LINE_COMMENT>
SKIP :
{
< ~[] >
}
<IN_TEXTBLOCK>
MORE :
{
< ~[] >
}
/* -----------------------------------------------------------------------
*
* DIRECTIVE Lexical State (some of it, anyway)
*
* ---------------------------------------------------------------------- */
<DIRECTIVE,REFMOD2,REFINDEX>
TOKEN:
{
<WHITESPACE : ([" ","\t", "\n", "\r"])+ >
}
<DIRECTIVE,REFMOD2,REFINDEX>
TOKEN :
{
// <STRING_LITERAL: ( "\"" ( ~["\"","\n","\r"] )* "\"" ) | ( "'" ( ~["'","\n","\r"] )* "'" ) >
< STRING_LITERAL:
("\""
( (~["\""])
| ("\\"
( ["n","t","b","r","f"]
| ["0"-"7"] ( ["0"-"7"] )?
| ["0"-"3"] ["0"-"7"] ["0"-"7"]
| "u" ["0"-"9", "a"-"f", "A"-"F"] ["0"-"9", "a"-"f", "A"-"F"] ["0"-"9", "a"-"f", "A"-"F"] ["0"-"9", "a"-"f", "A"-"F"]
)
)
| ("\"\"")
| ( "\\" (" ")* "\n")
)*
"\""
)
|
("\'"
( (~["\'"])
| ("''")
| ( "\\" (" ")* "\n")
)*
"\'"
)
>
{
/*
* - if we are in DIRECTIVE and haven't seen ( yet, then also drop out.
* don't forget to account for the beloved yet wierd #set
* - finally, if we are in REFMOD2 (remember : $foo.bar( ) then " is ok!
*/
if( curLexState == DIRECTIVE && !inSet && lparen == 0)
stateStackPop();
}
}
<REFERENCE,DIRECTIVE,REFMODIFIER,REFMOD2,REFINDEX>
TOKEN:
{
<TRUE: "true">
| <FALSE: "false">
}
<DIRECTIVE>
TOKEN :
{
<NEWLINE: "\n" | "\r" | "\r\n" >
{
if ( debugPrint )
System.out.println(" NEWLINE :");
stateStackPop();
if (inSet)
inSet = false;
if (inDirective)
inDirective = false;
}
}
<DIRECTIVE>
TOKEN :
{
<MINUS: "-">
| <PLUS: "+">
| <MULTIPLY: "*">
| <DIVIDE: "/">
| <MODULUS: "%">
| <LOGICAL_AND: "&&" | "and" >
| <LOGICAL_OR: "||" | "or" >
| <LOGICAL_LT: "<" | "lt" >
| <LOGICAL_LE: "<=" | "le" >
| <LOGICAL_GT: ">" | "gt" >
| <LOGICAL_GE: ">=" | "ge" >
| <LOGICAL_EQUALS: "==" | "eq" >
| <LOGICAL_NOT_EQUALS: "!=" | "ne" >
| <LOGICAL_NOT: "!" | "not" >
| <EQUALS: "=" >
}
<PRE_DIRECTIVE>
TOKEN :
{
<END: ( "end" ( ( " " | "\t" )* ( "\n" | "\r" | "\r\n" ) )? )
| ("{end}" ( ( " " | "\t" )* ( "\n" | "\r" | "\r\n" ) )? ) >
{
inDirective = false;
stateStackPop();
}
| <FOREACH_DIRECTIVE: "foreach" | "{foreach}">
{
SwitchTo(DIRECTIVE);
}
| <IF_DIRECTIVE: "if" | "{if}">
{
SwitchTo(DIRECTIVE);
}
| <ELSEIF_DIRECTIVE: "elseif" | "{elseif}">
{
SwitchTo(DIRECTIVE);
}
| <ELSE_DIRECTIVE:
( "else" ( ( " " | "\t" )* ( "\n" | "\r" | "\r\n" ) )? )
| ( "{else}" ( ( " " | "\t" )* ( "\n" | "\r" | "\r\n" ) )? ) >
{
inDirective = false;
stateStackPop();
}
}
<PRE_DIRECTIVE,DIRECTIVE,REFMOD2,REFINDEX>
TOKEN:
{
<#DIGIT: [ "0"-"9" ] >
/*
* treat FLOATING_POINT_LITERAL and INTEGER_LITERAL differently as a range can only handle integers.
*/
/**
* Note -- we also define an integer as ending with a double period,
* in order to avoid 1..3 being defined as floating point (1.) then a period, then a integer
*/
| <INTEGER_LITERAL: ("-")? (<DIGIT>)+ ("..")? >
{
/*
* Remove the double period if it is there
*/
if (matchedToken.getImage().endsWith("..")) {
input_stream.backup(2);
matchedToken = matchedToken.replaceImage(input_stream);
}
/*
* check to see if we are in set
* ex. #set $foo = $foo + 3
* because we want to handle the \n after
*/
if ( lparen == 0 && !inSet && curLexState != REFMOD2 && curLexState != REFINDEX)
{
stateStackPop();
}
}
| <FLOATING_POINT_LITERAL:
("-")? (<DIGIT>)+ "." (<DIGIT>)* (<EXPONENT>)?
| ("-")? "." (<DIGIT>)+ (<EXPONENT>)?
| ("-")? (<DIGIT>)+ <EXPONENT>
>
{
/*
* check to see if we are in set
* ex. #set $foo = $foo + 3
* because we want to handle the \n after
*/
if ( lparen == 0 && !inSet && curLexState != REFMOD2)
{
stateStackPop();
}
}
|
<#EXPONENT: ["e","E"] (["+","-"])? (["0"-"9"])+ >
}
<PRE_DIRECTIVE,DIRECTIVE>
TOKEN:
{
<#LETTER: [ "a"-"z", "A" - "Z" ] >
| <#DIRECTIVE_CHAR: [ "a"-"z", "A"-"Z", "0"-"9", "_", "@" ] >
| <WORD: ( <LETTER> | ["_"] | ["@"]) (<DIRECTIVE_CHAR>)* >
| <BRACKETED_WORD: "{" ( <LETTER> | ["_"]) (<DIRECTIVE_CHAR>)* "}" >
}
/* -----------------------------------------------------------------------
*
* REFERENCE Lexical States
*
* This is more than a single state, because of the structure of
* the VTL references. We use three states because the set of tokens
* for each state can be different.
*
* $foo.bar( "arg" )
* ^ ^ ^
* | | |
* ----------- > REFERENCE : state initiated by the '$' character. Continues
* | | until end of the reference, or the . character.
* |------ > REFMODIFIER : state switched to when the <DOT> is encountered.
* | note that this is a switch, not a push. See notes at bottom
* | re stateStack.
* |-- > REFMOD2 : state switch to when the LPAREN is encountered.
* again, this is a switch, not a push.
*
* During the REFERENCE or REFMODIFIER lex states we will switch to
* REFINDEX if a bracket is encountered '['. for example: $foo[1]
* or $foo.bar[1], $foo.bar( "arg" )[1]
* ---------------------------------------------------------------------------- */
<REFERENCE,REFMODIFIER,REFMOD2>
TOKEN :
{
<#ALPHA_CHAR: ["a"-"z", "A"-"Z"] >
| <#ALPHANUM_CHAR: [ "a"-"z", "A"-"Z", "0"-"9" ] >
| <#IDENTIFIER_CHAR: [ "a"-"z", "A"-"Z", "0"-"9", "-", "_" ] >
| <IDENTIFIER: ( <ALPHA_CHAR> | ["_"]) (<IDENTIFIER_CHAR>)* >
| <DOT: "." <ALPHA_CHAR>>
{
/*
* push the alpha char back into the stream so the following identifier
* is complete
*/
input_stream.backup(1);
/*
* and munge the <DOT> so we just get a . when we have normal text that
* looks like a ref.ident
*/
matchedToken = matchedToken.replaceImage(input_stream);
if ( debugPrint )
System.out.print("DOT : switching to " + REFMODIFIER);
SwitchTo(REFMODIFIER);
}
}
<REFERENCE,REFMODIFIER>
TOKEN :
{
<LCURLY: "{">
| <RCURLY: "}">
{
stateStackPop();
}
}
<REFERENCE,REFMODIFIER,REFMOD>
SPECIAL_TOKEN :
{
<REFERENCE_TERMINATOR: ~[] >
{
/*
* push every terminator character back into the stream
*/
input_stream.backup(1);
inReference = false;
if ( debugPrint )
System.out.print("REF_TERM :");
stateStackPop();
}
}
<PRE_DIRECTIVE>
SPECIAL_TOKEN :
{
<DIRECTIVE_TERMINATOR: ~[] >
{
if ( debugPrint )
System.out.print("DIRECTIVE_TERM :");
input_stream.backup(1);
inDirective = false;
stateStackPop();
}
}
/**
* This method is what starts the whole parsing
* process. After the parsing is complete and
* the template has been turned into an AST,
* this method returns the root of AST which
* can subsequently be traversed by a visitor
* which implements the ParserVisitor interface
* which is generated automatically by JavaCC
*/
ASTTemplate Template() : {}
{
( Statement() )* <EOF>
{ return jjtThis; }
}
/**
* These are the types of statements that
* are acceptable in Velocity templates.
*/
void Statement() #void : {}
{
IfStatement()
| ForeachStatement()
| LOOKAHEAD(2) Reference()
| Comment()
| Textblock()
| SetDirective()
| EscapedDirective()
| Escape()
| Directive()
| Text()
}
/**
* used to separate the notion of a valid directive that has been
* escaped, versus something that looks like a directive and
* is just schmoo. This is important to do as a separate production
* that creates a node, because we want this, in either case, to stop
* the further parsing of the Directive() tree.
*/
void EscapedDirective() : {}
{
<ESCAPE_DIRECTIVE>
}
/**
* Used to catch and process escape sequences in grammatical constructs
* as escapes outside of VTL are just characters. Right now we have both
* this and the EscapeDirective() construction because in the EscapeDirective()
* case, we want to suck in the #&lt;directive&gt; and here we don't. We just want
* the escapes to render correctly
*/
void Escape() :
{ int count = 0; }
{
( LOOKAHEAD(2) <DOUBLE_ESCAPE> { count++; } )+
{
jjtThis.setValue(StringUtils.repeat("\\", count));
}
}
void Comment() : {}
{
<SINGLE_LINE_COMMENT_START> ( <SINGLE_LINE_COMMENT> ) ?
| <MULTI_LINE_COMMENT>
| <FORMAL_COMMENT>
}
void Textblock() : {}
{
<TEXTBLOCK>
}
void FloatingPointLiteral() : {}
{
<FLOATING_POINT_LITERAL>
}
void IntegerLiteral() : {}
{
<INTEGER_LITERAL>
}
void StringLiteral() : {}
{
<STRING_LITERAL>
}
/**
* This method corresponds to variable
* references in Velocity templates.
* The following are examples of variable
* references that may be found in a
* template:
*
* $foo
* $bar
*
*/
void Identifier() : {}
{
<IDENTIFIER>
}
void Word() : {}
{
<WORD>
}
/**
* Supports the arguments for the Pluggable Directives
*/
void DirectiveArg() #void : {}
{
Reference()
| Word()
| StringLiteral()
| IntegerLiteral()
/*
* Need to put this before the floating point expansion
*/
| LOOKAHEAD( <LBRACKET> [<WHITESPACE>] ( Reference() | IntegerLiteral()) [<WHITESPACE>] <DOUBLEDOT> ) IntegerRange()
| FloatingPointLiteral()
| Map()
| ObjectArray()
| True()
| False()
}
/**
* Supports the Pluggable Directives
* #foo( arg+ )
*/
VmNode Directive() :
{
JavaccToken t = null;
int argPos = 0;
boolean isVM = false;
boolean doItNow = false;
}
{
/*
* note that if we were escaped, that is now handled by
* EscapedDirective()
*/
((t = <WORD>) | (t = <BRACKETED_WORD>))
{
String directiveName;
if (t.kind == VmTokenKinds.BRACKETED_WORD)
{
directiveName = t.getImage().substring(2, t.getImage().length() - 1);
}
else
{
directiveName = t.getImage().substring(1);
}
/*
* Velocimacro support : if the directive is macro directive
* then set the flag so after the block parsing, we add the VM
* right then. (So available if used w/in the current template )
*/
if (directiveName.equals("macro"))
{
doItNow = true;
}
/*
* set the directive name from here. No reason for the thing to know
* about parser tokens
*/
jjtThis.setDirectiveName(directiveName);
/*
* now, switch us out of PRE_DIRECTIVE
*/
token_source.SwitchTo(DIRECTIVE);
argPos = 0;
}
/**
* Look for the pattern [WHITESPACE] <LPAREN>
*/
(LOOKAHEAD( { isLeftParenthesis() } )
/*
* if this is indeed a token, match the #foo ( arg ) pattern
*/
[<WHITESPACE>] <LPAREN> ( LOOKAHEAD(2) [<WHITESPACE>] [<COMMA> [<WHITESPACE>]]
DirectiveArg()
{
if (jjtree.peekNode() instanceof ASTWord)
{
if (doItNow && argPos == 0)
{
/* if #macro and it's the 0th arg, ok */
}
else if (!jjtThis.isDirective())
{
// not a real directive, but maybe a Velocimacro
throw new ParseException("Invalid arg #"
+ argPos + " in VM " + t.getImage(), t);
}
/* if #foreach and it's the 2nd arg, ok */
else if (jjtThis.isDirective() && (!"foreach".equals(jjtThis.getDirectiveName()) || argPos != 1))
{
throw new ParseException("Invalid arg #"
+ argPos + " in directive " + t.getImage(), t);
}
else
{
/* either schmoo or a late-defined macro,
* VelocimacroProxy will have to check for latter. */
}
}
else
{
if (doItNow && argPos == 0)
{
/* if a VM and it's the 0th arg, not ok */
throw new ParseException("Invalid first arg"
+ " in #macro() directive - must be a"
+ " word token (no ' or double quote surrounding)", t);
}
}
argPos++;
}
)* [<WHITESPACE>] <RPAREN>
{
if (jjtThis.isLine())
{
return jjtThis;
}
}
|
{
if (doItNow) // doItNow is true if the directive is "macro"
{
// VELOCITY-667 We get here if we have a "#macro" construct
// without parenthesis which is a parse error
throw new ParseException("A macro declaration requires at least a name argument", t);
}
/**
* Not a directive
*/
token_source.stateStackPop();
token_source.inDirective = false;
return jjtThis;
}
)
/*
* and the following block if the PD needs it
*/
( Statement() )* #Block
<END>
{ return jjtThis; }
}
/**
* for creating a map in a #set
*
* #set($foo = {$foo : $bar, $blargh : $thingy})
*/
void Map() : {}
{
<LEFT_CURLEY>
(
LOOKAHEAD(2) Parameter() <COLON> Parameter() (<COMMA> Parameter() <COLON> Parameter() )*
|
[ <WHITESPACE> ]
)
/** note: need both tokens as they are generated in different states **/
( <RIGHT_CURLEY> | <RCURLY> )
}
void ObjectArray() : {}
{
<LBRACKET> [ Parameter() ( <COMMA> Parameter() )* ] <RBRACKET>
}
/**
* supports the [n..m] vector generator for use in
* the #foreach() to generate measured ranges w/o
* needing explicit support from the app/servlet
*/
void IntegerRange() : {}
{
<LBRACKET> [<WHITESPACE>]
( Reference() | IntegerLiteral())
[<WHITESPACE>] <DOUBLEDOT> [<WHITESPACE>]
(Reference() | IntegerLiteral())
[<WHITESPACE>] <RBRACKET>
}
/**
* A Simplified parameter more suitable for an index position: $foo[$index]
*/
void IndexParameter() #void: {}
{
[<WHITESPACE>]
(
StringLiteral()
| IntegerLiteral()
| True()
| False()
| Reference()
)
[ <WHITESPACE>]
}
/**
* This method has yet to be fully implemented
* but will allow arbitrarily nested method
* calls
*/
void Parameter() #void: {}
{
[<WHITESPACE>]
(
StringLiteral()
| IntegerLiteral()
| LOOKAHEAD( <LBRACKET> [<WHITESPACE>] ( Reference() | IntegerLiteral()) [<WHITESPACE>] <DOUBLEDOT> ) IntegerRange()
| Map()
| ObjectArray()
| True()
| False()
| Reference()
| FloatingPointLiteral()
)
[ <WHITESPACE>]
}
/**
* This method has yet to be fully implemented
* but will allow arbitrarily nested method
* calls
*/
void Method() : {}
{
Identifier() <LPAREN> [ Parameter() ( <COMMA> Parameter() )* ] <REFMOD2_RPAREN>
}
void Index() : {}
{
<INDEX_LBRACKET> IndexParameter() <INDEX_RBRACKET>
}
void Reference() : {}
{
/*
* A reference is either ${<FOO>} or $<FOO>
*/
(
<IDENTIFIER> (Index())*
(LOOKAHEAD(2) <DOT> (LOOKAHEAD(3) Method() | Identifier() ) (Index())* )*
)
|
(
<LCURLY>
<IDENTIFIER> (Index())*
(LOOKAHEAD(2) <DOT> (LOOKAHEAD(3) Method() | Identifier() ) (Index())* )*
<RCURLY>
)
}
void True() : {}
{
<TRUE>
}
void False() : {}
{
<FALSE>
}
/**
* This is somewhat of a kludge, the problem is that the parser picks
* up on '$[' , or '$![' as being a Reference, and does not dismiss it even though
* there is no <Identifier> between $ and [, This has something to do
* with the LOOKAHEAD in Reference, but I never found a way to resolve
* it in a more fashionable way..
*/
<DEFAULT,REFERENCE>
TOKEN :
{
<EMPTY_INDEX : ("$[" | "$![" | "$\\![" | "$.")>
}
/**
* This method is responsible for allowing
* all non-grammar text to pass through
* unscathed.
*/
void Text() : {}
{
<TEXT>
| <DOT>
| <RPAREN>
| <LPAREN>
| <INTEGER_LITERAL>
| <FLOATING_POINT_LITERAL>
| <STRING_LITERAL>
| <ESCAPE>
| <LCURLY>
| <RCURLY>
| <EMPTY_INDEX>
}
/* -----------------------------------------------------------------------
*
* Defined Directive Syntax
*
* ----------------------------------------------------------------------*/
void ForeachStatement() : {}
{
<FOREACH_DIRECTIVE> [<WHITESPACE>] <LPAREN> [<WHITESPACE>] Reference() [<WHITESPACE>] <IN> [<WHITESPACE>] PrimaryExpression() [<WHITESPACE>] <RPAREN>
( Statement() )* #Block
<END>
}
void IfStatement() : {}
{
<IF_DIRECTIVE> [<WHITESPACE>] <LPAREN> Expression() <RPAREN>
( Statement() )* #Block
[ LOOKAHEAD(1) ( ElseIfStatement() )+ ]
[ LOOKAHEAD(1) ElseStatement() ]
<END>
}
void ElseStatement() : {}
{
<ELSE_DIRECTIVE>
( Statement() )* #Block
}
void ElseIfStatement() : {}
{
<ELSEIF_DIRECTIVE> [<WHITESPACE>]
<LPAREN> Expression() <RPAREN>
( Statement() )* #Block
}
/**
* Currently support both types of set :
* #set( expr )
* #set expr
*/
void SetDirective() : {}
{
<SET_DIRECTIVE>([<WHITESPACE>] Reference() [<WHITESPACE>] <EQUALS> Expression() <RPAREN>
{
/*
* ensure that inSet is false. Leads to some amusing bugs...
*/
token_source.inSet = false;
}
[<NEWLINE>] )
}
/* -----------------------------------------------------------------------
*
* Expression Syntax
*
* ----------------------------------------------------------------------*/
void Expression() : {}
{
// LOOKAHEAD( PrimaryExpression() <EQUALS> ) Assignment()
//|
ConditionalOrExpression()
}
void Assignment() #Assignment(2) : {}
{
PrimaryExpression() <EQUALS> Expression()
}
void ConditionalOrExpression() #void : {}
{
ConditionalAndExpression()
( <LOGICAL_OR> ConditionalAndExpression() #OrNode(2) )*
}
void ConditionalAndExpression() #void : {}
{
EqualityExpression()
( <LOGICAL_AND> EqualityExpression() #AndNode(2) )*
}
void EqualityExpression() #void : {}
{
RelationalExpression()
(
<LOGICAL_EQUALS> RelationalExpression() #EQNode(2)
| <LOGICAL_NOT_EQUALS> RelationalExpression() #NENode(2)
)*
}
void RelationalExpression() #void : {}
{
AdditiveExpression()
(
<LOGICAL_LT> AdditiveExpression() #LTNode(2)
| <LOGICAL_GT> AdditiveExpression() #GTNode(2)
| <LOGICAL_LE> AdditiveExpression() #LENode(2)
| <LOGICAL_GE> AdditiveExpression() #GENode(2)
)*
}
void AdditiveExpression() #void : {}
{
MultiplicativeExpression()
(
<PLUS> MultiplicativeExpression() #AddNode(2)
| <MINUS> MultiplicativeExpression() #SubtractNode(2)
)*
}
void MultiplicativeExpression() #void : {}
{
UnaryExpression()
(
<MULTIPLY> UnaryExpression() #MulNode(2)
| <DIVIDE> UnaryExpression() #DivNode(2)
| <MODULUS> UnaryExpression() #ModNode(2)
)*
}
void UnaryExpression() #void : {}
{
LOOKAHEAD(2) [<WHITESPACE>] <LOGICAL_NOT> UnaryExpression() #NotNode(1)
| PrimaryExpression()
}
void PrimaryExpression() #void : {}
{
[<WHITESPACE>]
(
StringLiteral()
| Reference()
| IntegerLiteral()
| LOOKAHEAD( <LBRACKET> [<WHITESPACE>] ( Reference() | IntegerLiteral()) [<WHITESPACE>] <DOUBLEDOT> ) IntegerRange()
| FloatingPointLiteral()
| Map()
| ObjectArray()
| True()
| False()
| <LPAREN> Expression() <RPAREN>
)
[<WHITESPACE>]
}
/* ======================================================================
Notes
-----
template == the input stream for this parser, contains 'VTL'
mixed in with 'schmoo'
VTL == Velocity Template Language : the references, directives, etc
schmoo == the non-VTL component of a template
reference == VTL entity that represents data within the context. ex. $foo
directive == VTL entity that denotes 'action' (#set, #foreach, #if )
defined directive (DD) == VTL directive entity that is expressed
explicitly w/in this grammar
pluggable directive (PD) == VTL directive entity that is defined outside of the
grammar. PD's allow VTL to be easily expandable w/o parser modification.
The problem with parsing VTL is that an input stream consists generally of
little bits of VTL mixed in with 'other stuff, referred to as 'schmoo'.
Unlike other languages, like C or Java, where the parser can punt whenever
it encounters input that doesn't conform to the grammar, the VTL parser can't do
that. It must simply output the schmoo and keep going.
There are a few things that we do here :
- define a set of parser states (DEFAULT, DIRECTIVE, REFERENCE, etc)
- define for each parser state a set of tokens for each state
- define the VTL grammar, expressed (mostly) in the productions such as Text(),
SetStatement(), etc.
It is clear that this expression of the VTL grammar (the contents
of this .jjt file) is maturing and evolving as we learn more about
how to parse VTL ( and as I learn about parsing...), so in the event
this documentation is in disagreement w/ the source, the source
takes precedence. :)
Parser States
-------------
DEFAULT : This is the base or starting state, and strangely enough, the
default state.
PRE_DIRECTIVE : State immediately following '#' before we figure out which
defined or pluggable directive (or neither) we are working with.
DIRECTIVE : This state is triggered by the a match of a DD or a PD.
REFERENCE : Triggered by '$'. Analagous to PRE_DIRECTIVE.
REFMODIFIER : Triggered by .<alpha> when in REFERENCE.
REFMOD2 : Triggered by ( when in REFMODIFIER
(cont)
Escape Sequences
----------------
The escape processing in VTL is very simple. The '\' character acts
only as an escape when :
1) On or more touch a VTL element.
A VTL element is either :
1) It preceeds a reference that is in the context.
2) It preceeds a defined directive (#set, #if, #end, etc) or a valid
pluggable directive, such as #foreach
In all other cases the '\' is just another piece of text. The purpose of this
is to allow the non-VTL parts of a template (the 'schmoo') to not have to be
altered for processing by Velocity.
So if in the context $foo and $bar were defined and $woogie was not
\$foo \$bar \$woogie
would output
$foo $bar \$woogie
Further, you can stack them and they affect left to right, just like convention
escape characters in other languages.
\$foo = $foo
\\$foo = \<foo>
\\\$foo = \$foo
What You Expect
---------------
The recent versions of the parser are trying to support precise output to
support general template use. The directives do not render trailing
whitespace and newlines if followed by a newline. They will render
preceding whitespace. The only exception is #set, which also eats
preceding whitespace.
So, with a template :
------
#set $foo="foo"
#if($foo)
\$foo = $foo
#end
------
it will render precisely :
------
$foo = foo
------
*/