/* * Added capability for Tracking Tokens. * * Amit Kumar Prasad 10/2015 *==================================================================== * JSP Parser for PMD. * It supports supports more-or-less well written JSP files. * The JSP Document style is supported, except for inline DTD. * The JSP Page style (<% ... %>) is supported. * Java code is not parsed. * Script code inside is not parsed. */ options { USER_CHAR_STREAM = true; NODE_USES_PARSER=true; UNICODE_INPUT=true; FORCE_LA_CHECK = false; IGNORE_CASE = true; STATIC = false; MULTI=true; VISITOR=true; TRACK_TOKENS = true; } PARSER_BEGIN(JspParser) package net.sourceforge.pmd.lang.jsp.ast; import net.sourceforge.pmd.lang.ast.CharStream; import net.sourceforge.pmd.lang.ast.TokenMgrError; /** * JSP Parser for PMD. * @author Pieter, Application Engineers NV/SA, http://www.ae.be */ public class JspParser { /** * Counter used to keep track of unclosed tags */ private OpenTagRegister tagRegister = new OpenTagRegister(); /** * Return the contents of a quote. * @param quote String - starting and ending with " or ' * @return String a substring of quote: quote without the first and list * character. */ private static String quoteContent(String quote) { return quote.substring(1, quote.length()-1); } /** * Return the contents of a EL expression or a Value Binding expression. * @param expression String - starting with ${ or #{ and ending with } * @return String a substring of expression: expression without the first two and list * characters. */ private static String expressionContent(String expression) { return expression.substring(2, expression.length()-1).trim(); } } PARSER_END(JspParser) /** ******************************************************************** */ /** ************************* JSP LEXICON **************************** */ /** ******************************************************************** */ /* This JavaCC lexicon has the following states: * - StartTagState : this is the state entered after the "<" of a tag, until a * non-whitespace is found. * This is only for tags, not for xml-comments, declarations, etc. * - AfterTagState : this is the state entered after the closing ">" of a tag, * or xml-comment or declaration, until some non-whitespace is found * - CommentState : the state between "" * - DeclarationState : the state between "" * - CDataState : the state between "" * - InTagState : the state when inside a tag * - AttrValueStatue : the state when starting an attribute value, before the starting single or double quote * - DocTypeState : the state when inside a doctype declaration * - ElExpressionState : the state when inside a ElExpression * - DocTypeState : inside a document type declaration * - DocTypeExternalIdState : inside an "external id" part of a dtd * - AttrValueBetweenSingleQuotesState : inside an attribute that is surrounded by single quotes (') * - AttrValueBetweenDoubleQuotesState : inside an attribute that is surrounded by double quotes (") * - JspDirectiveState : inside a JSP directive not yet reaching the attributes of the directive * - JspDirectiveAttributesState : inside the attributes part of a directive * - JspScriptletState : inside a scriptlet <% ... %> * - JspExpressionState : inside an expression <%= ... %> * - JspDeclarationState : inside a declaration <%! ... %> * - JspCommentState : inside a comment <%-- ... --%> * - HtmlScriptContentState : inside an HTML script */ <*> TOKEN : { <#ALPHA_CHAR: [ "\u0024", "\u0041"-"\u005a", "\u005f", "\u0061"-"\u007a", "\u00c0"-"\u00d6", "\u00d8"-"\u00f6", "\u00f8"-"\u00ff" ] > | <#NUM_CHAR: [ "\u0030"-"\u0039" ] > | <#ALPHANUM_CHAR: ( | ) > | <#IDENTIFIER_CHAR: ( | [ "_", "-", ".", ":" ] ) > | <#IDENTIFIER: ()* > | <#XMLNAME: ( | "_" | ":") ()* > | <#QUOTED_STRING_NO_BREAKS: ( "'" ( ~["'", "\r", "\n"] )* "'" ) | ( "\"" ( ~["\"", "\r", "\n"] )* "\"" ) > | <#QUOTED_STRING: ( "'" ( ~["'"] )* "'" ) | ( "\"" ( ~["\""] )* "\"" ) > | <#WHITESPACE: ( " " | "\t" | "\n" | "\r" ) > | <#NEWLINE: ( "\r\n" | "\r" | "\n" ) > | <#QUOTE: ( "'" | "\"" )> | <#NO_WHITESPACE_OR_LT_OR_DOLLAR: (~[" ", "\t", "\n", "\r", "<", "$", "#"])> | <#DOLLAR_OR_HASH: ("$" | "#")> | <#NO_OPENBRACE: (~["{"]) > | <#NO_LT_OR_DOLLAR_OR_HASH: (~["<","$","#"])> | <#NO_ENDTAG_START: (~["<"]~["/"]) > | <#TEXT_IN_EL: (~["}", "'", "\""])+ > | <#EL_ESCAPE: ("\\${" | "\\#{") > // anything but --%> | <#NO_JSP_COMMENT_END: (~["-"] | "-" ~["-"] | "--" ~["%"] | "--%" ~[">"])+ > | <#NO_JSP_TAG_END: ( ~["%"] | ("%" ~[">"]) )+ > } SKIP : { < ()+ > } SPECIAL_TOKEN : { < ()+ > } TOKEN : { : StartTagState | : StartTagState | : CommentState | : StartTagState | : DocTypeState | : CDataState | : JspCommentState | : JspDeclarationState | : JspScriptletState | : JspDirectiveState | : InTagState } TOKEN : { | )* "}") | ("#{" ( | )* "}") > | | | )+ > } TOKEN : { > : JspDirectiveAttributesState } TOKEN : { > | " > : AfterTagState } TOKEN : { " > : AfterTagState | > } TOKEN : { " > : AfterTagState | > } TOKEN : { " > : AfterTagState | > } TOKEN : { " > : AfterTagState | > } TOKEN : { )+ > } TOKEN: { ) > : DocTypeExternalIdState } TOKEN: { | | " > : AfterTagState | ) > } TOKEN : { | ") > : AfterTagState } TOKEN : { > : InTagState | : DEFAULT } TOKEN : { > | " > : AfterTagState | " | "!>") > : AfterTagState | " > : AfterTagState | : AttrValueBetweenSingleQuotesState | : AttrValueBetweenDoubleQuotesState | { input_stream.backup(1);} : AttrValueNoQuotesState | : InTagState //support for empty attributes } TOKEN: { | )* "}" > | | )* "}" > | " > } TOKEN : { : InTagState | )+ > } TOKEN : { : InTagState | )+ > | : InTagState } TOKEN : { : InTagState | )+ > | : InTagState } TOKEN : { < COMMENT_END: ("--" (" ")* ">" | "->") > : AfterTagState | < COMMENT_TEXT: (~[]) > } TOKEN : { | : AfterTagState } /** ******************************************************************** */ /** ************************* JSP GRAMMAR **************************** */ /** ******************************************************************** */ /** * The root of the AST of a JSP. */ ASTCompilationUnit CompilationUnit() : {} { Prolog() Content() { return jjtThis; } } /** * The optional prolog of a JSP, including (xml) declarations and DTD. */ void Prolog() #void : {} { ( LOOKAHEAD( ( CommentTag() | JspComment() )* Declaration() ) ( CommentTag() | JspComment() )* Declaration() )? ( LOOKAHEAD( ( CommentTag() | JspComment() )* DoctypeDeclaration() ) ( CommentTag() | JspComment() )* DoctypeDeclaration() )? } /** * Everything between a start-tag and the corresponding end-tag of an element (if an end tag exists). */ void Content() : {} { ( Text() | ContentElement() )* } /** * A single (non-text) element that can occur between a start-tag and end-tag of an element. * */ void ContentElement() #void : {} { ( CommentTag() | Element() | CData() | JspComment() | JspDeclaration() | JspExpression() | JspScriptlet() | JspDirective() | HtmlScript() ) } void JspDirective() : { Token t; } { t = { jjtThis.setName(t.image); } ( JspDirectiveAttribute() )* } void JspDirectiveAttribute() : { Token t; } { t = { jjtThis.setName(t.image); } t = { jjtThis.setValue(quoteContent(t.image)); } } void JspScriptlet() : { Token t; } { t = { jjtThis.setImage(t.image.trim()); } } void JspExpression() : { Token t; } { t = { jjtThis.setImage(t.image.trim()); } } void JspDeclaration() : { Token t; } { t = { jjtThis.setImage(t.image.trim()); } } void JspComment() : { Token t; } { t = { jjtThis.setImage(t.image.trim()); } } /** * This production groups all characters between two tags, where * tag is an xml-tag "<...>" or a jsp-page-tag "<%...%>" or CDATA "". * Text consists of unparsed text and/or Expression Language expressions. */ void Text() : { StringBuffer content = new StringBuffer(); String tmp; } { ( tmp = UnparsedText() { content.append(tmp); } | tmp = ElExpression() { content.append(tmp); } )+ {jjtThis.setImage(content.toString());} } String UnparsedText() : { Token t; } { t = { jjtThis.setImage(t.image); return t.image; } } String UnparsedTextNoWhitespace() #UnparsedText : { Token t;} { ( t = ) { jjtThis.setImage(t.image); return t.image; } } /** * Text that contains no single quotes, and that does not contain the start * of a EL expression or value binding. */ String UnparsedTextNoSingleQuotes() #UnparsedText : { Token t; } { t = { jjtThis.setImage(t.image); return t.image; } } /** * Text that contains no double quotes, and that does not contain the start * of a EL expression or value binding. */ String UnparsedTextNoDoubleQuotes() #UnparsedText : { Token t; } { t = { jjtThis.setImage(t.image); return t.image; } } /** * An EL expression, not within an attribute value. */ String ElExpression() : { Token t; } { t = { jjtThis.setImage(expressionContent(t.image)); return t.image; } } String ValueBindingInAttribute() #ValueBinding : { Token t; } { t = { jjtThis.setImage(expressionContent(t.image)); return t.image; } } String ElExpressionInAttribute() #ElExpression : { Token t; } { t = { jjtThis.setImage(expressionContent(t.image)); return t.image; } } void CData() : { StringBuffer content = new StringBuffer(); Token t; } { ( t = { content.append(t.image); } )* { jjtThis.setImage(content.toString()); } } /** * A XML element, either with a single empty tag, or with a starting and closing tag * with optional contained content. */ void Element() : { Token startTag; Token endTag; String tagName; } { ( ( startTag = { tagName = startTag.image; jjtThis.setName(tagName); tagRegister.openTag(jjtThis); } ) (Attribute())* ( ( { jjtThis.setEmpty(false);} (Content()) ( endTag = {tagRegister.closeTag(endTag.image);} )? ) | ( { jjtThis.setEmpty(true); jjtThis.setUnclosed(false); } ) ) ) } void Attribute() : { Token t; } { t = { jjtThis.setName(t.image); } ( AttributeValue() ) } /** * The value of an attribute of an element. * EL expressions, JSF value bindings, and JSP expressions * are parsed as sub-nodes of the AttributeValue node. */ void AttributeValue() : { StringBuffer content = new StringBuffer(); String tmp; Token t = null ; } { ( ( ( ( tmp = UnparsedTextNoDoubleQuotes() | tmp = QuoteIndependentAttributeValueContent() ) { content.append(tmp); } )* ( | t = { content.append(t.image.substring(0, 1)); } ) ) | ( ( ( tmp = UnparsedTextNoSingleQuotes() | tmp = QuoteIndependentAttributeValueContent() ) { content.append(tmp); } )* ( | t = { content.append(t.image.substring(0, 1)); } ) ) | ( ( ( tmp = UnparsedTextNoWhitespace() | tmp = QuoteIndependentAttributeValueContent() ) { content.append(tmp); } )* ( ) ) | ) { jjtThis.setImage( content.toString() ); } } /** * Partial content of an attribute value that can contain all quotes. * This groups EL expressions, value bindings, and JSP expressions. */ String QuoteIndependentAttributeValueContent() #void : { String tmp; } { ( tmp = ElExpressionInAttribute() | tmp = ValueBindingInAttribute() | tmp = JspExpressionInAttribute() ) { return tmp; } } String JspExpressionInAttribute() : { Token t; } { t = { jjtThis.setImage(t.image.substring(3, t.image.length()-2).trim()); // without <% and %> return t.image; } } void CommentTag() : { StringBuffer content = new StringBuffer(); Token t; } { ( t = { content.append(t.image); } )* { jjtThis.setImage(content.toString().trim()); } } void Declaration() : { Token t; } { t = { jjtThis.setName(t.image); } (Attribute())* } void DoctypeDeclaration() : { Token t; } { t = { jjtThis.setName(t.image); } ()? (DoctypeExternalId() ()?)? } void DoctypeExternalId() : { Token systemLiteral; Token pubIdLiteral; } { ( systemLiteral = { jjtThis.setUri(quoteContent(systemLiteral.image)); } ) | ( pubIdLiteral = { jjtThis.setPublicId(quoteContent(pubIdLiteral.image)); } systemLiteral = { jjtThis.setUri(quoteContent(systemLiteral.image)); } ) } void HtmlScript() : { StringBuffer content = new StringBuffer(); String tagName; Token t; } { {} (Attribute() )* {} ( ( {token_source.SwitchTo(HtmlScriptContentState);} (t = { content.append(t.image); })* { jjtThis.setImage(content.toString().trim());} ) | ( ) ) }