/* JSP Parser for PMD.
* It supports supports more-or-less well written JSP files.
* The JSP Document style is supported, except for inline DTD.
* The JSP Page style (<% ... %>) is supported.
* Java code is not parsed.
* Script code inside is not parsed.
*/
options {
USER_CHAR_STREAM = true;
NODE_USES_PARSER=true;
UNICODE_INPUT=true;
FORCE_LA_CHECK = false;
IGNORE_CASE = true;
STATIC = false;
MULTI=true;
VISITOR=true;
}
PARSER_BEGIN(JspParser)
package net.sourceforge.pmd.lang.jsp.ast;
import net.sourceforge.pmd.lang.ast.CharStream;
import net.sourceforge.pmd.lang.ast.TokenMgrError;
/**
* JSP Parser for PMD.
* @author Pieter, Application Engineers NV/SA, http://www.ae.be
*/
public class JspParser {
/**
* Counter used to keep track of unclosed tags
*/
private OpenTagRegister tagRegister = new OpenTagRegister();
/**
* Return the contents of a quote.
* @param quote String - starting and ending with " or '
* @return String a substring of quote: quote without the first and list
* character.
*/
private static String quoteContent(String quote) {
return quote.substring(1, quote.length()-1);
}
/**
* Return the contents of a EL expression or a Value Binding expression.
* @param expression String - starting with ${ or #{ and ending with }
* @return String a substring of expression: expression without the first two and list
* characters.
*/
private static String expressionContent(String expression) {
return expression.substring(2, expression.length()-1).trim();
}
}
PARSER_END(JspParser)
/** ******************************************************************** */
/** ************************* JSP LEXICON **************************** */
/** ******************************************************************** */
/* This JavaCC lexicon has the following states:
* - StartTagState : this is the state entered after the "<" of a tag, until a
* non-whitespace is found.
* This is only for tags, not for xml-comments, declarations, etc.
* - AfterTagState : this is the state entered after the closing ">" of a tag,
* or xml-comment or declaration, until some non-whitespace is found
* - CommentState : the state between ""
* - DeclarationState : the state between "" or ""
* - CDataState : the state between ""
* - InTagState : the state when inside a tag
* - AttrValueStatue : the state when starting an attribute value, before the starting single or double quote
* - DocTypeState : the state when inside a doctype declaration
* - ElExpressionState : the state when inside a ElExpression
* - DocTypeState : inside a document type declaration
* - DocTypeExternalIdState : inside an "external id" part of a dtd
* - AttrValueBetweenSingleQuotesState : inside an attribute that is surrounded by single quotes (')
* - AttrValueBetweenDoubleQuotesState : inside an attribute that is surrounded by double quotes (")
* - JspDirectiveState : inside a JSP directive not yet reaching the attributes of the directive
* - JspDirectiveAttributesState : inside the attributes part of a directive
* - JspScriptletState : inside a scriptlet <% ... %>
* - JspExpressionState : inside an expression <%= ... %>
* - JspDeclarationState : inside a declaration <%! ... %>
* - JspCommentState : inside a comment <%-- ... --%>
* - HtmlScriptContentState : inside an HTML script
*/
<*> TOKEN :
{
<#ALPHA_CHAR: [
"\u0024",
"\u0041"-"\u005a",
"\u005f",
"\u0061"-"\u007a",
"\u00c0"-"\u00d6",
"\u00d8"-"\u00f6",
"\u00f8"-"\u00ff"
] >
| <#NUM_CHAR: [
"\u0030"-"\u0039"
] >
| <#ALPHANUM_CHAR: ( | ) >
| <#IDENTIFIER_CHAR: ( | [ "_", "-", ".", ":" ] ) >
| <#IDENTIFIER: ()* >
| <#XMLNAME: ( | "_" | ":") ()* >
| <#QUOTED_STRING_NO_BREAKS: ( "'" ( ~["'", "\r", "\n"] )* "'" )
| ( "\"" ( ~["\"", "\r", "\n"] )* "\"" ) >
| <#QUOTED_STRING: ( "'" ( ~["'"] )* "'" ) | ( "\"" ( ~["\""] )* "\"" ) >
| <#WHITESPACE: ( " " | "\t" | "\n" | "\r" ) >
| <#NEWLINE: ( "\r\n" | "\r" | "\n" ) >
| <#QUOTE: ( "'" | "\"" )>
| <#NO_WHITESPACE_OR_LT_OR_DOLLAR: (~[" ", "\t", "\n", "\r", "<", "$", "#"])>
| <#DOLLAR_OR_HASH: ("$" | "#")>
| <#NO_OPENBRACE: (~["{"]) >
| <#NO_LT_OR_DOLLAR_OR_HASH: (~["<","$","#"])>
| <#NO_ENDTAG_START: (~["<"]~["/"]) >
| <#TEXT_IN_EL: (~["}", "'", "\""])+ >
| <#EL_ESCAPE: ("\\${" | "\\#{") >
// anything but --%>
| <#NO_JSP_COMMENT_END: (~["-"] | "-" ~["-"] | "--" ~["%"] | "--%" ~[">"])+ >
| <#NO_JSP_TAG_END: ( ~["%"] | ("%" ~[">"]) )+ >
}
SKIP :
{
< ()+ >
}
SPECIAL_TOKEN :
{
< ()+ >
}
TOKEN :
{
: StartTagState
| : StartTagState
| : CommentState
| : StartTagState
| : DocTypeState
| : CDataState
| : JspCommentState
| : JspDeclarationState
| : JspExpressionState
| : JspScriptletState
| : JspDirectiveState
| : InTagState
}
TOKEN :
{
| )* "}")
|
("#{" ( | )* "}")
>
| |
|
)+ >
}
TOKEN :
{
> : JspDirectiveAttributesState
}
TOKEN :
{
>
|
| >
| " > : AfterTagState
}
TOKEN :
{
" > : AfterTagState
| >
}
TOKEN :
{
" > : AfterTagState
| >
}
TOKEN :
{
" > : AfterTagState
| >
}
TOKEN :
{
" > : AfterTagState
| >
}
TOKEN :
{
)+ >
}
TOKEN:
{
) > : DocTypeExternalIdState
}
TOKEN:
{
|
| " > : AfterTagState
| ) >
}
TOKEN :
{
| ") > : AfterTagState
}
TOKEN :
{
> : InTagState
| : DEFAULT
}
TOKEN :
{
>
| " > : AfterTagState
| " | "!>") > : AfterTagState
| " > : AfterTagState
| : AttrValueState
|
}
TOKEN :
{
: AttrValueBetweenSingleQuotesState
| : AttrValueBetweenDoubleQuotesState
| { input_stream.backup(1);} : AttrValueNoQuotesState
| : InTagState //support for empty attributes
}
TOKEN:
{
| )* "}" >
| | )* "}" >
| "%>" >
}
TOKEN :
{
: InTagState
| )+ >
}
TOKEN :
{
: InTagState
| )+ >
| : InTagState
}
TOKEN :
{
: InTagState
| )+ >
| : InTagState
}
TOKEN :
{
< COMMENT_END: ("--" (" ")* ">" | "->") > : AfterTagState
| < COMMENT_TEXT: (~[]) >
}
TOKEN :
{
| : AfterTagState
}
/** ******************************************************************** */
/** ************************* JSP GRAMMAR **************************** */
/** ******************************************************************** */
/**
* The root of the AST of a JSP.
*/
ASTCompilationUnit CompilationUnit() :
{}
{
Prolog()
Content()
{ return jjtThis; }
}
/**
* The optional prolog of a JSP, including (xml) declarations and DTD.
*/
void Prolog() #void :
{}
{
(
LOOKAHEAD( ( CommentTag() | JspComment() )* Declaration() )
( CommentTag() | JspComment() )*
Declaration()
)?
(
LOOKAHEAD( ( CommentTag() | JspComment() )* DoctypeDeclaration() )
( CommentTag() | JspComment() )*
DoctypeDeclaration()
)?
}
/**
* Everything between a start-tag and the corresponding end-tag of an element (if an end tag exists).
*/
void Content() :
{}
{
( Text() | ContentElement() )*
}
/**
* A single (non-text) element that can occur between a start-tag and end-tag of an element.
*
*/
void ContentElement() #void :
{}
{
(
CommentTag()
| Element()
| CData()
| JspComment()
| JspDeclaration()
| JspExpression()
| JspScriptlet()
| JspDirective()
| HtmlScript()
)
}
void JspDirective() :
{ Token t; }
{
t = { jjtThis.setName(t.image); }
(
JspDirectiveAttribute()
)*
}
void JspDirectiveAttribute() :
{ Token t; }
{
t = { jjtThis.setName(t.image); }
t = { jjtThis.setValue(quoteContent(t.image)); }
}
void JspScriptlet() :
{ Token t; }
{
t = { jjtThis.setImage(t.image.trim()); }
}
void JspExpression() :
{ Token t; }
{
t = { jjtThis.setImage(t.image.trim()); }
}
void JspDeclaration() :
{ Token t; }
{
t = { jjtThis.setImage(t.image.trim()); }
}
void JspComment() :
{ Token t; }
{
t = { jjtThis.setImage(t.image.trim()); }
}
/**
* This production groups all characters between two tags, where
* tag is an xml-tag "<...>" or a jsp-page-tag "<%...%>" or CDATA "".
* Text consists of unparsed text and/or Expression Language expressions.
*/
void Text() :
{
StringBuffer content = new StringBuffer();
String tmp;
}
{
(
tmp = UnparsedText() { content.append(tmp); }
| tmp = ElExpression() { content.append(tmp); }
)+
{jjtThis.setImage(content.toString());}
}
String UnparsedText() :
{ Token t; }
{
t =
{
jjtThis.setImage(t.image);
return t.image;
}
}
String UnparsedTextNoWhitespace() #UnparsedText :
{ Token t;}
{
(
t =
)
{
jjtThis.setImage(t.image);
return t.image;
}
}
/**
* Text that contains no single quotes, and that does not contain the start
* of a EL expression or value binding.
*/
String UnparsedTextNoSingleQuotes() #UnparsedText :
{ Token t; }
{
t =
{
jjtThis.setImage(t.image);
return t.image;
}
}
/**
* Text that contains no double quotes, and that does not contain the start
* of a EL expression or value binding.
*/
String UnparsedTextNoDoubleQuotes() #UnparsedText :
{ Token t; }
{
t =
{
jjtThis.setImage(t.image);
return t.image;
}
}
/**
* An EL expression, not within an attribute value.
*/
String ElExpression() :
{ Token t; }
{
t =
{
jjtThis.setImage(expressionContent(t.image));
return t.image;
}
}
String ValueBindingInAttribute() #ValueBinding :
{ Token t; }
{
t =
{
jjtThis.setImage(expressionContent(t.image));
return t.image;
}
}
String ElExpressionInAttribute() #ElExpression :
{ Token t; }
{
t =
{
jjtThis.setImage(expressionContent(t.image));
return t.image;
}
}
void CData() :
{
StringBuffer content = new StringBuffer();
Token t;
}
{
( t = { content.append(t.image); } )*
{
jjtThis.setImage(content.toString());
}
}
/**
* A XML element, either with a single empty tag, or with a starting and closing tag
* with optional contained content.
*/
void Element() :
{
Token startTag;
Token endTag;
String tagName;
}
{
(
(
startTag = { tagName = startTag.image;
jjtThis.setName(tagName);
tagRegister.openTag(jjtThis);
}
)
(Attribute())*
(
(
{ jjtThis.setEmpty(false);}
(Content())
(
endTag = {tagRegister.closeTag(endTag.image);}
)?
)
|
( { jjtThis.setEmpty(true);
jjtThis.setUnclosed(false);
}
)
)
)
}
void Attribute() :
{ Token t; }
{
t = { jjtThis.setName(t.image); }
(
AttributeValue()
)
}
/**
* The value of an attribute of an element.
* EL expressions, JSF value bindings, and JSP expressions
* are parsed as sub-nodes of the AttributeValue node.
*/
void AttributeValue() :
{
StringBuffer content = new StringBuffer();
String tmp;
Token t = null ;
}
{
(
(
( ( tmp = UnparsedTextNoDoubleQuotes()
| tmp = QuoteIndependentAttributeValueContent()
) { content.append(tmp); } )*
(
| t = { content.append(t.image.substring(0, 1)); }
)
)
|
(
( ( tmp = UnparsedTextNoSingleQuotes() | tmp = QuoteIndependentAttributeValueContent() )
{ content.append(tmp); } )*
(
| t = { content.append(t.image.substring(0, 1)); }
)
)
|
(
( ( tmp = UnparsedTextNoWhitespace() | tmp = QuoteIndependentAttributeValueContent() )
{ content.append(tmp); }
)*
( )
)
|
)
{ jjtThis.setImage( content.toString() );
}
}
/**
* Partial content of an attribute value that can contain all quotes.
* This groups EL expressions, value bindings, and JSP expressions.
*/
String QuoteIndependentAttributeValueContent() #void :
{ String tmp; }
{
( tmp = ElExpressionInAttribute()
| tmp = ValueBindingInAttribute()
| tmp = JspExpressionInAttribute()
)
{ return tmp; }
}
String JspExpressionInAttribute() :
{ Token t; }
{
t =
{
jjtThis.setImage(t.image.substring(3, t.image.length()-2).trim()); // without <% and %>
return t.image;
}
}
void CommentTag() :
{
StringBuffer content = new StringBuffer();
Token t;
}
{
( t = { content.append(t.image); } )*
{
jjtThis.setImage(content.toString().trim());
}
}
void Declaration() :
{ Token t; }
{
t = { jjtThis.setName(t.image); }
(Attribute())*
}
void DoctypeDeclaration() :
{ Token t; }
{
t = { jjtThis.setName(t.image); }
()?
(DoctypeExternalId() ()?)?
}
void DoctypeExternalId() :
{
Token systemLiteral;
Token pubIdLiteral;
}
{
(
systemLiteral =
{ jjtThis.setUri(quoteContent(systemLiteral.image)); }
)
|
(
pubIdLiteral =
{ jjtThis.setPublicId(quoteContent(pubIdLiteral.image)); }
systemLiteral =
{ jjtThis.setUri(quoteContent(systemLiteral.image)); }
)
}
void HtmlScript() :
{
StringBuffer content = new StringBuffer();
String tagName;
Token t;
}
{
{}
(Attribute() )* {}
(
(
{token_source.SwitchTo(HtmlScriptContentState);}
(t = { content.append(t.image); })*
{ jjtThis.setImage(content.toString().trim());}
)
|
(
)
)
}