Clément Fournier a11f45e511 Cleanup spec
2020-09-17 22:54:59 +02:00

163 lines
4.4 KiB
Plaintext

/**
* This Matlab grammar is derived from MParser ANTLR grammar. (http://www.mit.edu/~wingated/code/mparser_0.1.tar.gz)
*
* The ' character is ambiguous, because it is the tranpose operator but can also denote the start of a string.
* (see https://www.mathworks.com/matlabcentral/newsreader/view_thread/25108)
*
* Rule of the thumb:
*
* A single quotation character is a tranpose operator if it directly follows a right bracket ("]"), right parenthesis (")"),
* right brace ("}"), letter, digit, underline ("_"), punctuation mark ("."), or another single quote character ("'"). *
*
* To implement this an extra lexical state 'TRANSPOSE' was introduced. In this state the single quote character "'" will always be parsed as the TRANSPOSE operator.
*/
options {
BUILD_PARSER=false;
CACHE_TOKENS=true;
UNICODE_INPUT = true;
}
PARSER_BEGIN(MatlabParserImpl)
package net.sourceforge.pmd.lang.matlab.ast;
import net.sourceforge.pmd.lang.ast.impl.javacc.CharStream;
import net.sourceforge.pmd.lang.ast.TokenMgrError;
public class MatlabParserImpl {
}
PARSER_END(MatlabParserImpl)
<DEFAULT, TRANSPOSE> SKIP :
{
" " : DEFAULT
|
"\t" : DEFAULT
|
"\r\n" : DEFAULT
|
"\n" : DEFAULT
}
<DEFAULT, TRANSPOSE> MORE:
{ "%{": IN_COMMENT }
<DEFAULT, TRANSPOSE> SPECIAL_TOKEN:
{ <SINGLE_LINE_COMMENT: "%"~["{"](~["\n","\r"])* ("\n"|"\r"|"\r\n")?> }
<IN_COMMENT> SPECIAL_TOKEN:
{ <MULTI_LINE_COMMENT: "%}">: DEFAULT }
<IN_COMMENT> MORE:
{ < ~[] > }
<DEFAULT, TRANSPOSE> TOKEN : /* SEPARATORS AND OTHER USEFULL LANGUAGE CONSTRUCTS*/
{
< SEMI: ";" > : DEFAULT
| < LPAREN: "(" > : DEFAULT
| < RPAREN: ")" > : TRANSPOSE
| < LBRACE: "{" > : DEFAULT
| < RBRACE: "}" > : TRANSPOSE
| < LSBRACE: "[" > : DEFAULT
| < RSBRACE: "]" > : TRANSPOSE
| < AT: "@" > : DEFAULT
| < DOT: "." > : TRANSPOSE
| < COMMA: "," > : DEFAULT
| < QUESTIONMARK: "?" > : DEFAULT
}
<DEFAULT, TRANSPOSE> TOKEN : /* OPERATORS AND ASSIGNMENTS */
{
< DOUBLE_EQ: "==" > : DEFAULT
| < LOG_OR: "||" > : DEFAULT
| < LOG_AND: "&&" > : DEFAULT
| < LSTE: "<=" > : DEFAULT
| < GRTE: ">=" > : DEFAULT
| < NEQ: "~=" > : DEFAULT
| < EL_TIMES: ".*" > : DEFAULT
| < EL_LEFTDIV: "./" > : DEFAULT
| < EL_RIGHTDIV: ".\\" > : DEFAULT
| < EL_EXP: ".^" > : DEFAULT
| < EL_CCT: ".'" > : DEFAULT
| < EQ: "=" > : DEFAULT
| < BIN_OR: "|" > : DEFAULT
| < BIN_AND: "&" > : DEFAULT
| < LST: "<" > : DEFAULT
| < GRT: ">" > : DEFAULT
| < COLON: ":" > : DEFAULT
| < PLUS: "+" > : DEFAULT
| < MINUS: "-" > : DEFAULT
| < NEG: "~" > : DEFAULT
| < TIMES: "*" > : DEFAULT
| < LEFTDIV: "/" > : DEFAULT
| < RIGHTDIV: "\\" > : DEFAULT
| < EXP: "^" > : DEFAULT
}
<DEFAULT, TRANSPOSE> TOKEN : /* KEYWORDS */
{
< BREAK: "break" > : DEFAULT
| < CASE: "case" > : DEFAULT
| < CATCH: "catch" > : DEFAULT
| < CONTINUE: "continue" > : DEFAULT
| < ELSE: "else" > : DEFAULT
| < ELSEIF: "elseif" > : DEFAULT
| < END: "end" > : DEFAULT
| < FOR: "for" > : DEFAULT
| < FUNCTION: "function" > : DEFAULT
| < GLOBAL: "global" > : DEFAULT
| < IF: "if" > : DEFAULT
| < OTHERWISE: "otherwise" > : DEFAULT
| < PERSISTENT: "persistent" > : DEFAULT
| < RETURN: "return" > : DEFAULT
| < SWITCH: "switch" > : DEFAULT
| < TRY: "try" > : DEFAULT
| < VARARGIN: "varargin" > : DEFAULT
| < WHILE: "while" > : DEFAULT
| < CLEAR: "clear" > : DEFAULT
}
<DEFAULT, TRANSPOSE> TOKEN : /* Matlab identifiers */
{
< ID: <LETTER> ( <LETTER> | <DIGIT> | "_" )* > : TRANSPOSE
| < #LETTER: ["a"-"z", "A"-"Z"] >
}
<DEFAULT, TRANSPOSE> TOKEN :
{
< INT: ( <DIGIT> )+ > : DEFAULT
| < FLOAT:
<INT> "." ( <INT> )* ( <EXPONENT> )?
| "." ( <INT> )+ ( <EXPONENT> )?
| ( <INT> )+ <EXPONENT>
> : DEFAULT
| < #EXPONENT: ( "e" | "E" ) ( "+" | "-" )? <INT> >
| < #DIGIT: ["0"-"9"] >
}
<DEFAULT> TOKEN :
{
< STRING: "'" ( <ESC_SEQ> | "'" "'" | ~["\\","'","\n"] )* "'" >
| < DSTRING: "\"" ( "\\" | "\"" "\"" | ~["\\","\"","\n"] )* "\"" >
| < #ESC_SEQ:
"\\" ( "b" | "t" | "n" | "f" | "r" | "\"" | "'" | "\\" )
| <UNICODE_ESC>
| <OCTAL_ESC>
>
| < #UNICODE_ESC: "\\" "u" <HEX_DIGIT> <HEX_DIGIT> <HEX_DIGIT> <HEX_DIGIT> >
| < #OCTAL_ESC:
"\\" ["0" - "3"] <OCTAL_DIGIT> <OCTAL_DIGIT>
| "\\" <OCTAL_DIGIT> <OCTAL_DIGIT>
| "\\" <OCTAL_DIGIT>
>
| < #HEX_DIGIT: ["0"-"9", "a"-"f", "A"-"F"] >
| < #OCTAL_DIGIT: ["0"-"7"] >
}
<DEFAULT, TRANSPOSE> TOKEN :
{
< TR : "'" > : TRANSPOSE
}