pmd/pmd-vm/etc/grammar/VmParser.jjt

1976 lines
47 KiB
Plaintext
Raw Normal View History

/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
/*
* NOTE : please see documentation at bottom of this file. (It was placed there its tiring
* to always have to page past it... :)
*/
options
{
/** The default package for this parser kit
NODE_PACKAGE="org.apache.velocity.runtime.parser";
*/
NODE_PACKAGE="net.sourceforge.pmd.lang.vm.ast";
NODE_CLASS = "net.sourceforge.pmd.lang.vm.ast.AbstractVmNode";
/** A source file will be generated for each non-terminal */
MULTI=true;
/**
* Each node will have access to the parser, I did this so
* some global information can be shared via the parser. I
* think this will come in handly keeping track of
* context, and being able to push changes back into
* the context when nodes make modifications to the
* context by setting properties, variables and
* what not.
*/
NODE_USES_PARSER=true;
/**
* The parser must be non-static in order for the
* above option to work, otherwise the parser value
* is passed in as null, which isn't all the useful ;)
*/
STATIC=false;
/**
* Enables the use of a visitor that each of nodes
* will accept. This way we can separate the logic
* of node processing in a visitor and out of the
* nodes themselves. If processing changes then
* the nothing has to change in the node code.
*/
VISITOR=true;
/**
* Declare that we are accepting unicode input and
* that we are using a custom character stream class
* Note that the char stream class is really a slightly
* modified ASCII_CharStream, as it appears we are safe
* because we only deal with pre-encoding-converted
* Readers rather than raw input streams.
*/
UNICODE_INPUT=true;
USER_CHAR_STREAM=true;
/**
* for debugging purposes. Keep false
*/
DEBUG_PARSER=false;
DEBUG_TOKEN_MANAGER=false;
/**
Copied from JSP parser
*/
FORCE_LA_CHECK = false;
IGNORE_CASE = true;
}
PARSER_BEGIN(VmParser)
package net.sourceforge.pmd.lang.vm.ast;
import java.io.IOException;
import java.io.Reader;
import java.util.ArrayList;
import java.util.List;
import java.util.HashMap;
import java.util.Map;
import net.sourceforge.pmd.lang.ast.CharStream;
2013-09-28 22:19:31 -04:00
import net.sourceforge.pmd.lang.vm.directive.Directive;
import net.sourceforge.pmd.lang.vm.util.VelocityCharStream;
2013-09-28 22:19:31 -04:00
import net.sourceforge.pmd.lang.vm.util.DirectiveMapper;
/**
* This class is responsible for parsing a Velocity
* template. This class was generated by JavaCC using
* the JJTree extension to produce an Abstract
* Syntax Tree (AST) of the template.
*
* Please look at the Parser.jjt file which is
* what controls the generation of this class.
*
* @author <a href="mailto:jvanzyl@apache.org">Jason van Zyl</a>
* @author <a href="mailto:geirm@optonline.net">Geir Magnusson Jr.</a>
* @author <a href="hps@intermeta.de">Henning P. Schmiedehausen</a>
* @version $Id$
*/
public class VmParser
{
/**
* Keep track of defined macros, used for escape processing
*/
private Map macroNames = new HashMap();
/**
* Name of current template we are parsing. Passed to us in parse()
*/
public String currentTemplateName = "";
/**
* Set to true if the property
* RuntimeConstants.RUNTIME_REFERENCES_STRICT_ESCAPE is set to true
*/
public boolean strictEscape = false;
/**
* This method gets a Directive from the directives Hashtable
*/
public Directive getDirective(String directive)
{
2013-09-28 22:19:31 -04:00
return DirectiveMapper.getDirective(directive);
}
/**
* This method finds out of the directive exists in the directives Map.
*/
public boolean isDirective(String directive)
{
2013-09-28 22:19:31 -04:00
return DirectiveMapper.getDirective(directive) != null;
}
/**
* Produces a processed output for an escaped control or
* pluggable directive
*/
private String escapedDirective( String strImage )
{
int iLast = strImage.lastIndexOf("\\");
String strDirective = strImage.substring(iLast + 1);
2013-09-28 22:19:31 -04:00
2013-09-30 22:23:59 -04:00
// return ( strImage.substring(0,iLast/2) + strDirective);
boolean bRecognizedDirective = false;
// we don't have to call substring method all the time in this method
String dirTag = strDirective.substring(1);
if (dirTag.charAt(0) == '{')
{
dirTag = dirTag.substring(1, dirTag.length() - 1);
}
2013-09-30 22:23:59 -04:00
/*
* If this is a predefined derective or if we detect
* a macro definition (this is aproximate at best) then
* we absorb the forward slash. If in strict reference
* mode then we always absord the forward slash regardless
* if the derective is defined or not.
*/
2013-09-30 22:23:59 -04:00
if (strictEscape
|| isDirective(dirTag)
|| macroNames.containsKey(dirTag)
2013-09-30 22:23:59 -04:00
|| !DirectiveMapper.isDirective(dirTag)) //rsvc.isVelocimacro(dirTag, currentTemplateName))
{
bRecognizedDirective = true;
}
else
{
if ( dirTag.equals("if")
|| dirTag.equals("end")
|| dirTag.equals("set")
|| dirTag.equals("else")
|| dirTag.equals("elseif")
)
{
bRecognizedDirective = true;
}
}
2013-09-30 22:23:59 -04:00
/*
* if so, make the proper prefix string (let the escapes do their thing..)
* otherwise, just return what it is..
*/
2013-09-30 22:23:59 -04:00
if (bRecognizedDirective)
return ( strImage.substring(0,iLast/2) + strDirective);
else
return ( strImage );
}
/**
* Check whether there is a left parenthesis with leading optional
* whitespaces. This method is used in the semantic look ahead of
* Directive method. This is done in code instead of as a production
* for simplicity and efficiency.
*/
private boolean isLeftParenthesis()
{
char c;
int no = 0;
try {
while(true)
{
/**
* Read a character
*/
2013-10-01 21:56:49 -04:00
c = token_source.input_stream.readChar();
no++;
if (c == '(')
{
return true;
}
/**
* if not a white space return
*/
else if (c != ' ' && c != '\n' && c != '\r' && c != '\t')
{
return false;
}
}
}
catch(IOException e)
{
}
finally
{
/**
* Backup the stream to the initial state
*/
2013-10-01 21:56:49 -04:00
token_source.input_stream.backup(no);
}
return false;
}
/**
* We use this method in a lookahead to determine if we are in a macro
* default value assignment. The standard lookahead is not smart enough.
* here we look for the equals after the reference.
*/
private boolean isAssignment()
{
// Basically if the last character read was not '$' then false
if (token_source.curLexState != REFERENCE) return false;
char c = ' ';
int backup = 0;
try
{
// Read through any white space
while(Character.isWhitespace(c))
{
2013-10-01 21:56:49 -04:00
c = token_source.input_stream.readChar();
backup++;
}
// This is what we are ultimately looking for
if (c != '=') return false;
}
catch (IOException e)
{
}
finally
{
2013-10-01 21:56:49 -04:00
token_source.input_stream.backup(backup);
}
return true;
}
}
PARSER_END(VmParser)
TOKEN_MGR_DECLS:
{
private int fileDepth = 0;
private int lparen = 0;
private int rparen = 0;
List stateStack = new ArrayList(50);
public boolean debugPrint = false;
private boolean inReference;
public boolean inDirective;
private boolean inComment;
public boolean inSet;
/**
* pushes the current state onto the 'state stack',
* and maintains the parens counts
* public because we need it in PD &amp; VM handling
*
* @return boolean : success. It can fail if the state machine
* gets messed up (do don't mess it up :)
*/
public boolean stateStackPop()
{
ParserState s;
try
{
s = (ParserState) stateStack.remove(stateStack.size() - 1); // stack.pop
}
catch(IndexOutOfBoundsException e)
{
// empty stack
lparen=0;
SwitchTo(DEFAULT);
return false;
}
if( debugPrint )
System.out.println(
" stack pop (" + stateStack.size() + ") : lparen=" +
s.lparen +
" newstate=" + s.lexstate );
lparen = s.lparen;
rparen = s.rparen;
SwitchTo(s.lexstate);
return true;
}
/**
* pops a state off the stack, and restores paren counts
*
* @return boolean : success of operation
*/
public boolean stateStackPush()
{
if( debugPrint )
System.out.println(" (" + stateStack.size() + ") pushing cur state : " +
curLexState );
ParserState s = new ParserState();
s.lparen = lparen;
s.rparen = rparen;
s.lexstate = curLexState;
lparen = 0;
stateStack.add(s); // stack.push
return true;
}
/**
* Clears all state variables, resets to
* start values, clears stateStack. Call
* before parsing.
*/
public void clearStateVars()
{
stateStack.clear();
lparen = 0;
rparen = 0;
inReference = false;
inDirective = false;
inComment = false;
inSet = false;
return;
}
/**
* Holds the state of the parsing process.
*/
private static class ParserState
{
int lparen;
int rparen;
int lexstate;
}
/**
* handles the dropdown logic when encountering a RPAREN
*/
private void RPARENHandler()
{
/*
* Ultimately, we want to drop down to the state below
* the one that has an open (if we hit bottom (DEFAULT),
* that's fine. It's just text schmoo.
*/
boolean closed = false;
if (inComment)
closed = true;
while( !closed )
{
/*
* look at current state. If we haven't seen a lparen
* in this state then we drop a state, because this
* lparen clearly closes our state
*/
if( lparen > 0)
{
/*
* if rparen + 1 == lparen, then this state is closed.
* Otherwise, increment and keep parsing
*/
if( lparen == rparen + 1)
{
stateStackPop();
}
else
{
rparen++;
}
closed = true;
}
else
{
/*
* now, drop a state
*/
if(!stateStackPop())
break;
}
}
}
}
/* ------------------------------------------------------------------------
*
* Tokens
*
* Note : we now have another state, REFMODIFIER. This is sort of a
* type of REFERENCE state, simply use to use the DIRECTIVE token
* set when we are processing a $foo.bar() construct
*
* ------------------------------------------------------------------------- */
<REFERENCE, REFMODIFIER>
TOKEN:
{
<INDEX_LBRACKET: "[">
{
stateStackPush();
SwitchTo(REFINDEX);
}
}
<REFINDEX>
TOKEN:
{
<INDEX_RBRACKET: "]">
{
stateStackPop();
}
}
<DIRECTIVE,REFMOD2>
TOKEN:
{
<LBRACKET: "[">
| <RBRACKET: "]">
| <COMMA:",">
}
<DIRECTIVE, REFMOD2>
TOKEN:
{
<DOUBLEDOT : ".." >
}
2013-09-30 22:23:59 -04:00
<DIRECTIVE, REFMOD2>
TOKEN:
{
<IN : "in" >
}
<DIRECTIVE, REFMOD2>
TOKEN:
{
<COLON : ":" >
}
<DIRECTIVE, REFMOD2>
TOKEN :
{
<LEFT_CURLEY : "{" >
| <RIGHT_CURLEY : "}" >
}
<DIRECTIVE,REFMODIFIER>
TOKEN:
{
<LPAREN: "(">
{
if (!inComment)
lparen++;
/*
* If in REFERENCE and we have seen the dot, then move
* to REFMOD2 -> Modifier()
*/
if (curLexState == REFMODIFIER )
SwitchTo( REFMOD2 );
}
}
/*
* we never will see a ')' in anything but DIRECTIVE and REFMOD2.
* Each have their own
*/
<DIRECTIVE>
TOKEN:
{
/*
* We will eat any whitespace upto and including a newline for directives
*/
<RPAREN: ")" ( ( " " | "\t" )* ( "\n" | "\r" | "\r\n" ))?>
{
RPARENHandler();
}
}
<REFMOD2>
TOKEN:
{
/*
* in REFMOD2, we don't want to bind the whitespace and \n like we
* do when closing a directive.
*/
<REFMOD2_RPAREN: ")">
{
/*
* need to simply switch back to REFERENCE, not drop down the stack
* because we can (infinitely) chain, ala
* $foo.bar().blargh().woogie().doogie()
*/
SwitchTo( REFERENCE );
}
}
/*----------------------------------------------
*
* escape "\\" handling for the built-in directives
*
*--------------------------------------------- */
TOKEN:
{
/*
* We have to do this, because we want these to be a Text node, and
* whatever follows to be peer to this text in the tree.
*
* We need to touch the ASTs for these, because we want an even # of \'s
* to render properly in front of the block
*
* This is really simplistic. I actually would prefer to find them in
* grammatical context, but I am neither smart nor rested, a receipe
* for disaster, another long night with Mr. Parser, or both.
*/
<ESCAPE_DIRECTIVE : (<DOUBLE_ESCAPE>)* "\\#" (<WORD> | <BRACKETED_WORD>) >
}
/*
* needed because #set is so wacky in it's desired behavior. We want set
* to eat any preceeding whitespace so it is invisible in formatting.
* (As it should be.) If this works well, I am going to chuck the whole MORE:
* token abomination.
*
* We added the lexical states REFERENCE, REFMODIFIER, REFMOD2 to
* address JIRA issue VELOCITY-631. With SET_DIRECTIVE only in the
* DEFAULT lexical state the following VTL fails "$a#set($b = 1)"
* because the Reference token uses LOOKAHEAD(2) combined with the
* fact that we explicity set the lex state to REFERENCE with the $
* token, which means we would never evaulate this token during the
* look ahead. This general issue is disscussed here:
*
* http://www.engr.mun.ca/~theo/JavaCC-FAQ/javacc-faq-ie.htm#tth_sEc3.12
*
*/
<DEFAULT, REFERENCE, REFMODIFIER, REFMOD2>
TOKEN:
{
<SET_DIRECTIVE: (" "|"\t")* ("#set" | "#{set}") (" ")* "(">
{
if (! inComment)
{
inDirective = true;
if ( debugPrint )
System.out.print("#set : going to " + DIRECTIVE );
stateStackPush();
inSet = true;
SwitchTo(DIRECTIVE);
}
/*
* need the LPAREN action
*/
if (!inComment)
{
lparen++;
/*
* If in REFERENCE and we have seen the dot, then move
* to REFMOD2 -> Modifier()
*/
if (curLexState == REFMODIFIER )
SwitchTo( REFMOD2 );
}
}
}
<*>
MORE :
{
/*
* Note : DOLLARBANG is a duplicate of DOLLAR. They must be identical.
*/
<DOLLAR: ("\\")* "$">
{
if (! inComment)
{
/*
* if we find ourselves in REFERENCE, we need to pop down
* to end the previous ref
*/
if (curLexState == REFERENCE)
{
inReference = false;
stateStackPop();
}
inReference = true;
if ( debugPrint )
System.out.print( "$ : going to " + REFERENCE );
stateStackPush();
SwitchTo(REFERENCE);
}
}
| <DOLLARBANG: ("\\")* "$" ("\\")* "!">
{
if (! inComment)
{
/*
* if we find ourselves in REFERENCE, we need to pop down
* to end the previous ref
*/
if (curLexState == REFERENCE)
{
inReference = false;
stateStackPop();
}
inReference = true;
if ( debugPrint )
System.out.print( "$! : going to " + REFERENCE );
stateStackPush();
SwitchTo(REFERENCE);
}
}
| "#[["
{
if (!inComment)
{
inComment = true;
stateStackPush();
SwitchTo( IN_TEXTBLOCK );
}
}
| <"#**" ~["#"]>
{
if (!inComment)
{
input_stream.backup(1);
inComment = true;
stateStackPush();
SwitchTo( IN_FORMAL_COMMENT);
}
}
| "#*"
{
if (!inComment)
{
inComment=true;
stateStackPush();
SwitchTo( IN_MULTI_LINE_COMMENT );
}
}
| <HASH : "#" >
{
if (! inComment)
{
/*
* We can have the situation where #if($foo)$foo#end.
* We need to transition out of REFERENCE before going to DIRECTIVE.
* I don't really like this, but I can't think of a legal way
* you are going into DIRECTIVE while in REFERENCE. -gmj
*/
if (curLexState == REFERENCE || curLexState == REFMODIFIER )
{
inReference = false;
stateStackPop();
}
inDirective = true;
if ( debugPrint )
System.out.print("# : going to " + DIRECTIVE );
stateStackPush();
SwitchTo(PRE_DIRECTIVE);
}
}
}
// treat the single line comment case separately
// to avoid ##<EOF> errors
<DEFAULT,PRE_DIRECTIVE,DIRECTIVE,REFERENCE>
TOKEN :
{
<SINGLE_LINE_COMMENT_START: "##">
{
if (!inComment)
{
if (curLexState == REFERENCE)
{
inReference = false;
stateStackPop();
}
inComment = true;
stateStackPush();
SwitchTo(IN_SINGLE_LINE_COMMENT);
}
}
}
TOKEN :
{
<DOUBLE_ESCAPE : "\\\\">
| <ESCAPE: "\\" >
| <TEXT: (~["$", "#", "\\"])+ >
}
/* -----------------------------------------------------------------------
*
* *_COMMENT Lexical tokens
*
*-----------------------------------------------------------------------*/
<IN_SINGLE_LINE_COMMENT>
TOKEN :
{
<SINGLE_LINE_COMMENT: "\n" | "\r" | "\r\n">
{
inComment = false;
stateStackPop();
}
}
<IN_FORMAL_COMMENT>
TOKEN :
{
<FORMAL_COMMENT: "*#" >
{
inComment = false;
stateStackPop();
}
}
<IN_MULTI_LINE_COMMENT>
TOKEN :
{
<MULTI_LINE_COMMENT: "*#" >
{
inComment = false;
stateStackPop();
}
}
<IN_TEXTBLOCK>
TOKEN :
{
<TEXTBLOCK: "]]#" >
{
inComment = false;
stateStackPop();
}
}
<IN_SINGLE_LINE_COMMENT,IN_FORMAL_COMMENT,IN_MULTI_LINE_COMMENT>
SKIP :
{
< ~[] >
}
<IN_TEXTBLOCK>
MORE :
{
< ~[] >
}
/* -----------------------------------------------------------------------
*
* DIRECTIVE Lexical State (some of it, anyway)
*
* ---------------------------------------------------------------------- */
<DIRECTIVE,REFMOD2,REFINDEX>
TOKEN:
{
<WHITESPACE : ([" ","\t", "\n", "\r"])+ >
}
<DIRECTIVE,REFMOD2,REFINDEX>
TOKEN :
{
// <STRING_LITERAL: ( "\"" ( ~["\"","\n","\r"] )* "\"" ) | ( "'" ( ~["'","\n","\r"] )* "'" ) >
< STRING_LITERAL:
("\""
( (~["\""])
| ("\\"
( ["n","t","b","r","f"]
| ["0"-"7"] ( ["0"-"7"] )?
| ["0"-"3"] ["0"-"7"] ["0"-"7"]
| "u" ["0"-"9", "a"-"f", "A"-"F"] ["0"-"9", "a"-"f", "A"-"F"] ["0"-"9", "a"-"f", "A"-"F"] ["0"-"9", "a"-"f", "A"-"F"]
)
)
| ("\"\"")
| ( "\\" (" ")* "\n")
)*
"\""
)
|
("\'"
( (~["\'"])
| ("''")
| ( "\\" (" ")* "\n")
)*
"\'"
)
>
{
/*
* - if we are in DIRECTIVE and haven't seen ( yet, then also drop out.
* don't forget to account for the beloved yet wierd #set
* - finally, if we are in REFMOD2 (remember : $foo.bar( ) then " is ok!
*/
if( curLexState == DIRECTIVE && !inSet && lparen == 0)
stateStackPop();
}
}
<REFERENCE,DIRECTIVE,REFMODIFIER,REFMOD2,REFINDEX>
TOKEN:
{
<TRUE: "true">
| <FALSE: "false">
}
<DIRECTIVE>
TOKEN :
{
<NEWLINE: "\n" | "\r" | "\r\n" >
{
if ( debugPrint )
System.out.println(" NEWLINE :");
stateStackPop();
if (inSet)
inSet = false;
if (inDirective)
inDirective = false;
}
}
2013-09-30 22:23:59 -04:00
<DIRECTIVE>
TOKEN :
{
<MINUS: "-">
| <PLUS: "+">
| <MULTIPLY: "*">
| <DIVIDE: "/">
| <MODULUS: "%">
| <LOGICAL_AND: "&&" | "and" >
| <LOGICAL_OR: "||" | "or" >
| <LOGICAL_LT: "<" | "lt" >
| <LOGICAL_LE: "<=" | "le" >
| <LOGICAL_GT: ">" | "gt" >
| <LOGICAL_GE: ">=" | "ge" >
| <LOGICAL_EQUALS: "==" | "eq" >
| <LOGICAL_NOT_EQUALS: "!=" | "ne" >
| <LOGICAL_NOT: "!" | "not" >
| <EQUALS: "=" >
}
<PRE_DIRECTIVE>
TOKEN :
{
<END: ( "end" ( ( " " | "\t" )* ( "\n" | "\r" | "\r\n" ) )? )
| ("{end}" ( ( " " | "\t" )* ( "\n" | "\r" | "\r\n" ) )? ) >
{
inDirective = false;
stateStackPop();
}
2013-09-30 22:23:59 -04:00
| <FOREACH_DIRECTIVE: "foreach" | "{foreach}">
{
SwitchTo(DIRECTIVE);
}
| <IF_DIRECTIVE: "if" | "{if}">
{
SwitchTo(DIRECTIVE);
}
| <ELSEIF_DIRECTIVE: "elseif" | "{elseif}">
{
SwitchTo(DIRECTIVE);
}
| <ELSE_DIRECTIVE:
( "else" ( ( " " | "\t" )* ( "\n" | "\r" | "\r\n" ) )? )
| ( "{else}" ( ( " " | "\t" )* ( "\n" | "\r" | "\r\n" ) )? ) >
{
inDirective = false;
stateStackPop();
}
}
<PRE_DIRECTIVE,DIRECTIVE,REFMOD2,REFINDEX>
TOKEN:
{
<#DIGIT: [ "0"-"9" ] >
/*
* treat FLOATING_POINT_LITERAL and INTEGER_LITERAL differently as a range can only handle integers.
*/
/**
* Note -- we also define an integer as ending with a double period,
* in order to avoid 1..3 being defined as floating point (1.) then a period, then a integer
*/
| <INTEGER_LITERAL: ("-")? (<DIGIT>)+ ("..")? >
{
/*
* Remove the double period if it is there
*/
if (matchedToken.image.endsWith("..")) {
input_stream.backup(2);
matchedToken.image = matchedToken.image.substring(0,matchedToken.image.length()-2);
}
/*
* check to see if we are in set
* ex. #set $foo = $foo + 3
* because we want to handle the \n after
*/
if ( lparen == 0 && !inSet && curLexState != REFMOD2 && curLexState != REFINDEX)
{
stateStackPop();
}
}
| <FLOATING_POINT_LITERAL:
("-")? (<DIGIT>)+ "." (<DIGIT>)* (<EXPONENT>)?
| ("-")? "." (<DIGIT>)+ (<EXPONENT>)?
| ("-")? (<DIGIT>)+ <EXPONENT>
>
{
/*
* check to see if we are in set
* ex. #set $foo = $foo + 3
* because we want to handle the \n after
*/
if ( lparen == 0 && !inSet && curLexState != REFMOD2)
{
stateStackPop();
}
}
|
<#EXPONENT: ["e","E"] (["+","-"])? (["0"-"9"])+ >
}
<PRE_DIRECTIVE,DIRECTIVE>
TOKEN:
{
<#LETTER: [ "a"-"z", "A" - "Z" ] >
2013-09-30 22:23:59 -04:00
| <#DIRECTIVE_CHAR: [ "a"-"z", "A"-"Z", "0"-"9", "_", "@" ] >
| <WORD: ( <LETTER> | ["_"] | ["@"]) (<DIRECTIVE_CHAR>)* >
| <BRACKETED_WORD: "{" ( <LETTER> | ["_"]) (<DIRECTIVE_CHAR>)* "}" >
}
/* -----------------------------------------------------------------------
*
* REFERENCE Lexical States
*
* This is more than a single state, because of the structure of
* the VTL references. We use three states because the set of tokens
* for each state can be different.
*
* $foo.bar( "arg" )
* ^ ^ ^
* | | |
* ----------- > REFERENCE : state initiated by the '$' character. Continues
* | | until end of the reference, or the . character.
* |------ > REFMODIFIER : state switched to when the <DOT> is encountered.
* | note that this is a switch, not a push. See notes at bottom
* | re stateStack.
* |-- > REFMOD2 : state switch to when the LPAREN is encountered.
* again, this is a switch, not a push.
*
* During the REFERENCE or REFMODIFIER lex states we will switch to
* REFINDEX if a bracket is encountered '['. for example: $foo[1]
* or $foo.bar[1], $foo.bar( "arg" )[1]
* ---------------------------------------------------------------------------- */
<REFERENCE,REFMODIFIER,REFMOD2>
TOKEN :
{
<#ALPHA_CHAR: ["a"-"z", "A"-"Z"] >
| <#ALPHANUM_CHAR: [ "a"-"z", "A"-"Z", "0"-"9" ] >
| <#IDENTIFIER_CHAR: [ "a"-"z", "A"-"Z", "0"-"9", "-", "_" ] >
| <IDENTIFIER: ( <ALPHA_CHAR> | ["_"]) (<IDENTIFIER_CHAR>)* >
| <DOT: "." <ALPHA_CHAR>>
{
/*
* push the alpha char back into the stream so the following identifier
* is complete
*/
input_stream.backup(1);
/*
* and munge the <DOT> so we just get a . when we have normal text that
* looks like a ref.ident
*/
matchedToken.image = ".";
if ( debugPrint )
System.out.print("DOT : switching to " + REFMODIFIER);
SwitchTo(REFMODIFIER);
}
}
<REFERENCE,REFMODIFIER>
TOKEN :
{
<LCURLY: "{">
| <RCURLY: "}">
{
stateStackPop();
}
}
<REFERENCE,REFMODIFIER,REFMOD>
SPECIAL_TOKEN :
{
<REFERENCE_TERMINATOR: ~[] >
{
/*
* push every terminator character back into the stream
*/
input_stream.backup(1);
inReference = false;
if ( debugPrint )
System.out.print("REF_TERM :");
stateStackPop();
}
}
<PRE_DIRECTIVE>
SPECIAL_TOKEN :
{
<DIRECTIVE_TERMINATOR: ~[] >
{
if ( debugPrint )
System.out.print("DIRECTIVE_TERM :");
input_stream.backup(1);
inDirective = false;
stateStackPop();
}
}
/**
* This method is what starts the whole parsing
* process. After the parsing is complete and
* the template has been turned into an AST,
* this method returns the root of AST which
* can subsequently be traversed by a visitor
* which implements the ParserVisitor interface
* which is generated automatically by JavaCC
*/
ASTprocess process() : {}
{
( Statement() )* <EOF>
{ return jjtThis; }
}
/**
* These are the types of statements that
* are acceptable in Velocity templates.
*/
void Statement() #void : {}
{
IfStatement()
2013-09-30 22:23:59 -04:00
| ForeachStatement()
| LOOKAHEAD(2) Reference()
| Comment()
| Textblock()
| SetDirective()
| EscapedDirective()
| Escape()
| Directive()
| Text()
}
/**
* used to separate the notion of a valid directive that has been
* escaped, versus something that looks like a directive and
* is just schmoo. This is important to do as a separate production
* that creates a node, because we want this, in either case, to stop
* the further parsing of the Directive() tree.
*/
void EscapedDirective() : {}
{
{
Token t = null;
}
t = <ESCAPE_DIRECTIVE>
{
/*
* churn and burn..
*/
t.image = escapedDirective( t.image );
}
}
/**
* Used to catch and process escape sequences in grammatical constructs
* as escapes outside of VTL are just characters. Right now we have both
* this and the EscapeDirective() construction because in the EscapeDirective()
* case, we want to suck in the #&lt;directive&gt; and here we don't. We just want
* the escapes to render correctly
*/
void Escape() : {}
{
{
Token t = null;
int count = 0;
boolean control = false;
}
( LOOKAHEAD(2) t = <DOUBLE_ESCAPE>
{
count++;
}
)+
{
/*
* first, check to see if we have a control directive
*/
switch(t.next.kind ) {
2013-09-30 22:23:59 -04:00
case FOREACH_DIRECTIVE :
case IF_DIRECTIVE :
case ELSE_DIRECTIVE :
case ELSEIF_DIRECTIVE :
case END :
control = true;
break;
}
/*
* if that failed, lets lookahead to see if we matched a PD or a VM
*/
String nTag = t.next.image.substring(1);
if (strictEscape
|| isDirective(nTag)
|| macroNames.containsKey(nTag)
2013-09-30 22:23:59 -04:00
|| !DirectiveMapper.isDirective(nTag)) //rsvc.isVelocimacro(nTag, currentTemplateName)
{
control = true;
}
jjtThis.val = "";
for( int i = 0; i < count; i++)
jjtThis.val += ( control ? "\\" : "\\\\");
}
}
void Comment() : {}
{
<SINGLE_LINE_COMMENT_START> ( <SINGLE_LINE_COMMENT> ) ?
| <MULTI_LINE_COMMENT>
| <FORMAL_COMMENT>
}
void Textblock() : {}
{
<TEXTBLOCK>
}
void FloatingPointLiteral() : {}
{
<FLOATING_POINT_LITERAL>
}
void IntegerLiteral() : {}
{
<INTEGER_LITERAL>
}
void StringLiteral() : {}
{
<STRING_LITERAL>
}
/**
* This method corresponds to variable
* references in Velocity templates.
* The following are examples of variable
* references that may be found in a
* template:
*
* $foo
* $bar
*
*/
void Identifier() : {}
{
<IDENTIFIER>
}
void Word() : {}
{
<WORD>
}
/**
* Supports the arguments for the Pluggable Directives
*/
int DirectiveArg() #void : {}
{
Reference()
{
return VmParserTreeConstants.JJTREFERENCE;
}
| Word()
{
return VmParserTreeConstants.JJTWORD;
}
| StringLiteral()
{
return VmParserTreeConstants.JJTSTRINGLITERAL;
}
| IntegerLiteral()
{
return VmParserTreeConstants.JJTINTEGERLITERAL;
}
/*
* Need to put this before the floating point expansion
*/
| LOOKAHEAD( <LBRACKET> [<WHITESPACE>] ( Reference() | IntegerLiteral()) [<WHITESPACE>] <DOUBLEDOT> ) IntegerRange()
{
return VmParserTreeConstants.JJTINTEGERRANGE;
}
| FloatingPointLiteral()
{
return VmParserTreeConstants.JJTFLOATINGPOINTLITERAL;
}
| Map()
{
return VmParserTreeConstants.JJTMAP;
}
| ObjectArray()
{
return VmParserTreeConstants.JJTOBJECTARRAY;
}
| True()
{
return VmParserTreeConstants.JJTTRUE;
}
| False()
{
return VmParserTreeConstants.JJTFALSE;
}
}
/**
* Supports the Pluggable Directives
* #foo( arg+ )
*/
VmNode Directive() :
{
Token t = null;
int argType;
int argPos = 0;
Directive d;
int directiveType;
boolean isVM = false;
2013-09-30 22:23:59 -04:00
boolean doItNow = false;
}
{
/*
* note that if we were escaped, that is now handled by
* EscapedDirective()
*/
((t = <WORD>) | (t = <BRACKETED_WORD>))
{
String directiveName;
if (t.kind == VmParserConstants.BRACKETED_WORD)
{
directiveName = t.image.substring(2, t.image.length() - 1);
}
else
{
directiveName = t.image.substring(1);
}
d = getDirective(directiveName);
/*
* Velocimacro support : if the directive is macro directive
* then set the flag so after the block parsing, we add the VM
* right then. (So available if used w/in the current template )
*/
if (directiveName.equals("macro"))
{
2013-09-30 22:23:59 -04:00
doItNow = true;
}
/*
* set the directive name from here. No reason for the thing to know
* about parser tokens
*/
jjtThis.setDirectiveName(directiveName);
if ( d == null)
{
if( directiveName.startsWith("@") )
{
// block macro call of type: #@foobar($arg1 $arg2) astBody #end
directiveType = Directive.BLOCK;
}
else
{
/*
* if null, then not a real directive, but maybe a Velocimacro
*/
2013-09-30 22:23:59 -04:00
isVM = !DirectiveMapper.isDirective(directiveName); //rsvc.isVelocimacro(directiveName, currentTemplateName);
directiveType = Directive.LINE;
}
}
else
{
directiveType = d.getType();
}
/*
* now, switch us out of PRE_DIRECTIVE
*/
token_source.SwitchTo(DIRECTIVE);
argPos = 0;
}
/**
* Look for the pattern [WHITESPACE] <LPAREN>
*/
(LOOKAHEAD( { isLeftParenthesis() } )
/*
* if this is indeed a token, match the #foo ( arg ) pattern
*/
2013-09-30 22:23:59 -04:00
(([<WHITESPACE>] <LPAREN>) ( LOOKAHEAD(2) [<WHITESPACE>] [<COMMA> [<WHITESPACE>]]
argType = DirectiveArg()
{
2013-09-30 22:23:59 -04:00
if (argType == VmParserTreeConstants.JJTWORD)
{
if (doItNow && argPos == 0)
{
/* if #macro and it's the 0th arg, ok */
}
else if (isVM)
{
throw new MacroParseException("Invalid arg #"
+ argPos + " in VM " + t.image, currentTemplateName, t);
}
/* if #foreach and it's the 2nd arg, ok */
else if (d != null && (!directiveName.equals("foreach") || argPos != 1))
{
throw new MacroParseException("Invalid arg #"
+ argPos + " in directive " + t.image, currentTemplateName, t);
}
else
{
/* either schmoo or a late-defined macro,
* VelocimacroProxy will have to check for latter. */
}
}
else
{
2013-09-30 22:23:59 -04:00
if (doItNow && argPos == 0)
{
2013-09-30 22:23:59 -04:00
/* if a VM and it's the 0th arg, not ok */
throw new MacroParseException("Invalid first arg"
+ " in #macro() directive - must be a"
+ " word token (no \' or \" surrounding)", currentTemplateName, t);
}
}
argPos++;
}
)* [<WHITESPACE>] <RPAREN>)
{
if (directiveType == Directive.LINE)
{
return jjtThis;
}
}
2013-09-30 22:23:59 -04:00
|
{
if (doItNow) // doItNow is true if the directive is "macro"
{
// VELOCITY-667 We get here if we have a "#macro" construct
// without parenthesis which is a parse error
throw new MacroParseException("A macro declaration requires at least a name argument"
, currentTemplateName, t);
}
/**
* Not a directive
*/
token_source.stateStackPop();
token_source.inDirective = false;
return jjtThis;
}
)
/*
* and the following block if the PD needs it
*/
( Statement() )* #Block
<END>
{
/*
* VM : if we are processing a #macro directive, we need to
* process the block. In truth, I can just register the name
* and do the work later when init-ing. That would work
* as long as things were always defined before use. This way
* we don't have to worry about forward references and such...
*/
2013-09-30 22:23:59 -04:00
if (doItNow)
{
// Add the macro name so that we can peform escape processing
// on defined macros
String macroName = ((AbstractVmNode)jjtThis.jjtGetChild(0)).getFirstToken().image;
macroNames.put(macroName, macroName);
}
/*
if (d != null)
{
d.checkArgs(argtypes, t, currentTemplateName);
}
*/
/*
* VM : end
*/
return jjtThis;
}
}
/**
* for creating a map in a #set
*
* #set($foo = {$foo : $bar, $blargh : $thingy})
*/
void Map() : {}
{
<LEFT_CURLEY>
(
LOOKAHEAD(2) Parameter() <COLON> Parameter() (<COMMA> Parameter() <COLON> Parameter() )*
|
[ <WHITESPACE> ]
)
/** note: need both tokens as they are generated in different states **/
( <RIGHT_CURLEY> | <RCURLY> )
}
void ObjectArray() : {}
{
<LBRACKET> [ Parameter() ( <COMMA> Parameter() )* ] <RBRACKET>
}
/**
* supports the [n..m] vector generator for use in
* the #foreach() to generate measured ranges w/o
* needing explicit support from the app/servlet
*/
void IntegerRange() : {}
{
<LBRACKET> [<WHITESPACE>]
( Reference() | IntegerLiteral())
[<WHITESPACE>] <DOUBLEDOT> [<WHITESPACE>]
(Reference() | IntegerLiteral())
[<WHITESPACE>] <RBRACKET>
}
/**
* A Simplified parameter more suitable for an index position: $foo[$index]
*/
void IndexParameter() #void: {}
{
[<WHITESPACE>]
(
StringLiteral()
| IntegerLiteral()
| True()
| False()
| Reference()
)
[ <WHITESPACE>]
}
/**
* This method has yet to be fully implemented
* but will allow arbitrarily nested method
* calls
*/
void Parameter() #void: {}
{
[<WHITESPACE>]
(
StringLiteral()
| IntegerLiteral()
| LOOKAHEAD( <LBRACKET> [<WHITESPACE>] ( Reference() | IntegerLiteral()) [<WHITESPACE>] <DOUBLEDOT> ) IntegerRange()
| Map()
| ObjectArray()
| True()
| False()
| Reference()
| FloatingPointLiteral()
)
[ <WHITESPACE>]
}
/**
* This method has yet to be fully implemented
* but will allow arbitrarily nested method
* calls
*/
void Method() : {}
{
2013-09-30 22:23:59 -04:00
Identifier() <LPAREN> [ Parameter() ( <COMMA> Parameter() )* ] <REFMOD2_RPAREN>
}
void Index() : {}
{
<INDEX_LBRACKET> IndexParameter() <INDEX_RBRACKET>
}
void Reference() : {}
{
/*
* A reference is either ${<FOO>} or $<FOO>
*/
(
<IDENTIFIER> (Index())*
(LOOKAHEAD(2) <DOT> (LOOKAHEAD(3) Method() | Identifier() ) (Index())* )*
)
|
(
<LCURLY>
<IDENTIFIER> (Index())*
(LOOKAHEAD(2) <DOT> (LOOKAHEAD(3) Method() | Identifier() ) (Index())* )*
<RCURLY>
)
}
void True() : {}
{
<TRUE>
}
void False() : {}
{
<FALSE>
}
/**
* This is somewhat of a kludge, the problem is that the parser picks
* up on '$[' , or '$![' as being a Reference, and does not dismiss it even though
* there is no <Identifier> between $ and [, This has something to do
* with the LOOKAHEAD in Reference, but I never found a way to resolve
* it in a more fashionable way..
*/
<DEFAULT,REFERENCE>
TOKEN :
{
<EMPTY_INDEX : ("$[" | "$![" | "$\\![" | "$.")>
}
/**
* This method is responsible for allowing
* all non-grammar text to pass through
* unscathed.
*/
void Text() : {}
{
<TEXT>
| <DOT>
| <RPAREN>
| <LPAREN>
| <INTEGER_LITERAL>
| <FLOATING_POINT_LITERAL>
| <STRING_LITERAL>
| <ESCAPE>
| <LCURLY>
| <RCURLY>
| <EMPTY_INDEX>
}
/* -----------------------------------------------------------------------
*
* Defined Directive Syntax
*
* ----------------------------------------------------------------------*/
2013-09-30 22:23:59 -04:00
void ForeachStatement() : {}
{
2013-10-01 21:56:49 -04:00
<FOREACH_DIRECTIVE> [<WHITESPACE>] <LPAREN> [<WHITESPACE>] Reference() [<WHITESPACE>] <IN> [<WHITESPACE>] PrimaryExpression() [<WHITESPACE>] <RPAREN>
2013-09-30 22:23:59 -04:00
( Statement() )* #Block
<END>
}
void IfStatement() : {}
{
<IF_DIRECTIVE> [<WHITESPACE>] <LPAREN> Expression() <RPAREN>
( Statement() )* #Block
[ LOOKAHEAD(1) ( ElseIfStatement() )+ ]
[ LOOKAHEAD(1) ElseStatement() ]
<END>
}
void ElseStatement() : {}
{
<ELSE_DIRECTIVE>
( Statement() )* #Block
}
void ElseIfStatement() : {}
{
<ELSEIF_DIRECTIVE> [<WHITESPACE>]
<LPAREN> Expression() <RPAREN>
( Statement() )* #Block
}
/**
* Currently support both types of set :
* #set( expr )
* #set expr
*/
void SetDirective() : {}
{
<SET_DIRECTIVE>([<WHITESPACE>] Reference() [<WHITESPACE>] <EQUALS> Expression() <RPAREN>
{
/*
* ensure that inSet is false. Leads to some amusing bugs...
*/
token_source.inSet = false;
}
[<NEWLINE>] )
}
/* -----------------------------------------------------------------------
*
* Expression Syntax
*
* ----------------------------------------------------------------------*/
void Expression() : {}
{
// LOOKAHEAD( PrimaryExpression() <EQUALS> ) Assignment()
//|
ConditionalOrExpression()
}
void Assignment() #Assignment(2) : {}
{
PrimaryExpression() <EQUALS> Expression()
}
void ConditionalOrExpression() #void : {}
{
ConditionalAndExpression()
( <LOGICAL_OR> ConditionalAndExpression() #OrNode(2) )*
}
void ConditionalAndExpression() #void : {}
{
EqualityExpression()
( <LOGICAL_AND> EqualityExpression() #AndNode(2) )*
}
void EqualityExpression() #void : {}
{
RelationalExpression()
(
<LOGICAL_EQUALS> RelationalExpression() #EQNode(2)
| <LOGICAL_NOT_EQUALS> RelationalExpression() #NENode(2)
)*
}
void RelationalExpression() #void : {}
{
AdditiveExpression()
(
<LOGICAL_LT> AdditiveExpression() #LTNode(2)
| <LOGICAL_GT> AdditiveExpression() #GTNode(2)
| <LOGICAL_LE> AdditiveExpression() #LENode(2)
| <LOGICAL_GE> AdditiveExpression() #GENode(2)
)*
}
void AdditiveExpression() #void : {}
{
MultiplicativeExpression()
(
<PLUS> MultiplicativeExpression() #AddNode(2)
| <MINUS> MultiplicativeExpression() #SubtractNode(2)
)*
}
void MultiplicativeExpression() #void : {}
{
UnaryExpression()
(
<MULTIPLY> UnaryExpression() #MulNode(2)
| <DIVIDE> UnaryExpression() #DivNode(2)
| <MODULUS> UnaryExpression() #ModNode(2)
)*
}
void UnaryExpression() #void : {}
{
LOOKAHEAD(2) [<WHITESPACE>] <LOGICAL_NOT> UnaryExpression() #NotNode(1)
| PrimaryExpression()
}
void PrimaryExpression() #void : {}
{
[<WHITESPACE>]
(
StringLiteral()
| Reference()
| IntegerLiteral()
| LOOKAHEAD( <LBRACKET> [<WHITESPACE>] ( Reference() | IntegerLiteral()) [<WHITESPACE>] <DOUBLEDOT> ) IntegerRange()
| FloatingPointLiteral()
| Map()
| ObjectArray()
| True()
| False()
| <LPAREN> Expression() <RPAREN>
)
[<WHITESPACE>]
}
/* ======================================================================
Notes
-----
template == the input stream for this parser, contains 'VTL'
mixed in with 'schmoo'
VTL == Velocity Template Language : the references, directives, etc
schmoo == the non-VTL component of a template
reference == VTL entity that represents data within the context. ex. $foo
directive == VTL entity that denotes 'action' (#set, #foreach, #if )
defined directive (DD) == VTL directive entity that is expressed
explicitly w/in this grammar
pluggable directive (PD) == VTL directive entity that is defined outside of the
grammar. PD's allow VTL to be easily expandable w/o parser modification.
The problem with parsing VTL is that an input stream consists generally of
little bits of VTL mixed in with 'other stuff, referred to as 'schmoo'.
Unlike other languages, like C or Java, where the parser can punt whenever
it encounters input that doesn't conform to the grammar, the VTL parser can't do
that. It must simply output the schmoo and keep going.
There are a few things that we do here :
- define a set of parser states (DEFAULT, DIRECTIVE, REFERENCE, etc)
- define for each parser state a set of tokens for each state
- define the VTL grammar, expressed (mostly) in the productions such as Text(),
SetStatement(), etc.
It is clear that this expression of the VTL grammar (the contents
of this .jjt file) is maturing and evolving as we learn more about
how to parse VTL ( and as I learn about parsing...), so in the event
this documentation is in disagreement w/ the source, the source
takes precedence. :)
Parser States
-------------
DEFAULT : This is the base or starting state, and strangely enough, the
default state.
PRE_DIRECTIVE : State immediately following '#' before we figure out which
defined or pluggable directive (or neither) we are working with.
DIRECTIVE : This state is triggered by the a match of a DD or a PD.
REFERENCE : Triggered by '$'. Analagous to PRE_DIRECTIVE.
REFMODIFIER : Triggered by .<alpha> when in REFERENCE.
REFMOD2 : Triggered by ( when in REFMODIFIER
(cont)
Escape Sequences
----------------
The escape processing in VTL is very simple. The '\' character acts
only as an escape when :
1) On or more touch a VTL element.
A VTL element is either :
1) It preceeds a reference that is in the context.
2) It preceeds a defined directive (#set, #if, #end, etc) or a valid
pluggable directive, such as #foreach
In all other cases the '\' is just another piece of text. The purpose of this
is to allow the non-VTL parts of a template (the 'schmoo') to not have to be
altered for processing by Velocity.
So if in the context $foo and $bar were defined and $woogie was not
\$foo \$bar \$woogie
would output
$foo $bar \$woogie
Further, you can stack them and they affect left to right, just like convention
escape characters in other languages.
\$foo = $foo
\\$foo = \<foo>
\\\$foo = \$foo
What You Expect
---------------
The recent versions of the parser are trying to support precise output to
support general template use. The directives do not render trailing
whitespace and newlines if followed by a newline. They will render
preceeding whitespace. The only exception is #set, which also eats
preceeding whitespace.
So, with a template :
------
#set $foo="foo"
#if($foo)
\$foo = $foo
#end
------
it will render precisely :
------
$foo = foo
------
*/