Improved Dart tokenizer to handle strings correctly.

This is based on the grammar at https://github.com/chalin/dart-spec-and-grammar/blob/master/doc/grammar-AUTOGENERATED-DO-NOT-EDIT.txt.

The string handling is now much closer to this grammar, resulting in more files being tokenized correctly.
This commit is contained in:
Maikel Steneker
2019-05-21 14:33:43 +02:00
parent bcf0de6880
commit 806b130e4a

View File

@ -340,58 +340,49 @@ booleanLiteral
| 'false'
;
//stringLiteral: (MultilineString | SingleLineString)+;
stringLiteral: SingleLineString;
//stringLiteral: SingleLineString;
stringLiteral: (MultiLineString | SingleLineString)+;
SingleLineString
: '"' (~[\\"] | '\\\\' | ESCAPE_SEQUENCE | '\\"')* '"'
| '\'' (~[\\'] | '\\\\' | ESCAPE_SEQUENCE | '\\\'')* '\''
// | 'r\'' (~('\'' | NEWLINE))* '\'' // TODO
// | 'r"' (~('\'' | NEWLINE))* '"'
: '"' StringContentDQ* '"'
| '\'' StringContentSQ* '\''
| 'r\'' (~('\'' | '\n' | '\r'))* '\''
| 'r"' (~('"' | '\n' | '\r'))* '"'
;
//MultilineString
// : '"""' StringContentTDQ* '"""'
// | '\'\'\'' StringContentTDQ* '\'\'\''
// | 'r"""' (~'"""')* '"""' // TODO
// | 'r\'\'\'' (~'\'\'\'')* '\'\'\''
// ;
//StringContentSQ: .;// TODO
//StringContentTDQ: .;// TODO
fragment
ESCAPE_SEQUENCE
: '\\n'
| '\\r'
| '\\f'
| '\\b'
| '\\t'
| '\\v'
| '\\x' HEX_DIGIT HEX_DIGIT
| '\\u' HEX_DIGIT HEX_DIGIT HEX_DIGIT HEX_DIGIT
| '\\u{' HEX_DIGIT_SEQUENCE '}'
| '\\$'
StringContentDQ
: ~('\\' | '"' /*| '$'*/ | '\n' | '\r')
| '\\' ~('\n' | '\r')
//| stringInterpolation
;
fragment
HEX_DIGIT_SEQUENCE
: HEX_DIGIT HEX_DIGIT? HEX_DIGIT?
HEX_DIGIT? HEX_DIGIT? HEX_DIGIT?
StringContentSQ
: ~('\\' | '\'' /*| '$'*/ | '\n' | '\r')
| '\\' ~('\n' | '\r')
//| stringInterpolation
;
/*TODO
<stringContentDQ> ::= \~{}( `\\' | `"' | `$' | <NEWLINE> )
\alt `\\' \~{}( <NEWLINE> )
\alt <stringInterpolation>
<stringContentSQ> ::= \~{}( `\\' | `\'' | `$' | <NEWLINE> )
\alt `\\' \~{}( <NEWLINE> )
\alt <stringInterpolation>
<stringContentTDQ> ::= \~{}( `\\' | `"""' | `$')
\alt `\\' \~{}( <NEWLINE> )
\alt <stringInterpolation>
MultiLineString
: '"""' StringContentTDQ* '"""'
| '\'\'\'' StringContentTSQ* '\'\'\''
| 'r"""' (~'"' | '"' ~'"' | '""' ~'"')* '"""'
| 'r\'\'\'' (~'\'' | '\'' ~'\'' | '\'\'' ~'\'')* '\'\'\''
;
fragment
StringContentTDQ
: ~('\\' | '"' /*| '$'*/)
| '"' ~'"' | '""' ~'"'
//| stringInterpolation
;
fragment StringContentTSQ
: ~('\\' | '\'' /*| '$'*/)
| '\'' ~'\'' | '\'\'' ~'\''
//| stringInterpolation
;
<stringContentTSQ> ::= \~{}( `\\' | `\'\'\'' | `$')
\alt `\\' \~{}( <NEWLINE> )
\alt <stringInterpolation>
*/
NEWLINE
: '\n'
| '\r'