From 806b130e4a3c834280d695806274740bab7afb6c Mon Sep 17 00:00:00 2001 From: Maikel Steneker <maikel.steneker@tiobe.com> Date: Tue, 21 May 2019 14:33:43 +0200 Subject: [PATCH] Improved Dart tokenizer to handle strings correctly. This is based on the grammar at https://github.com/chalin/dart-spec-and-grammar/blob/master/doc/grammar-AUTOGENERATED-DO-NOT-EDIT.txt. The string handling is now much closer to this grammar, resulting in more files being tokenized correctly. --- .../sourceforge/pmd/lang/dart/antlr4/Dart2.g4 | 77 ++++++++----------- 1 file changed, 34 insertions(+), 43 deletions(-) diff --git a/pmd-dart/src/main/antlr4/net/sourceforge/pmd/lang/dart/antlr4/Dart2.g4 b/pmd-dart/src/main/antlr4/net/sourceforge/pmd/lang/dart/antlr4/Dart2.g4 index fcc42ae013..7d8793edab 100644 --- a/pmd-dart/src/main/antlr4/net/sourceforge/pmd/lang/dart/antlr4/Dart2.g4 +++ b/pmd-dart/src/main/antlr4/net/sourceforge/pmd/lang/dart/antlr4/Dart2.g4 @@ -340,58 +340,49 @@ booleanLiteral | 'false' ; -//stringLiteral: (MultilineString | SingleLineString)+; -stringLiteral: SingleLineString; -//stringLiteral: SingleLineString; +stringLiteral: (MultiLineString | SingleLineString)+; + SingleLineString - : '"' (~[\\"] | '\\\\' | ESCAPE_SEQUENCE | '\\"')* '"' - | '\'' (~[\\'] | '\\\\' | ESCAPE_SEQUENCE | '\\\'')* '\'' -// | 'r\'' (~('\'' | NEWLINE))* '\'' // TODO -// | 'r"' (~('\'' | NEWLINE))* '"' + : '"' StringContentDQ* '"' + | '\'' StringContentSQ* '\'' + | 'r\'' (~('\'' | '\n' | '\r'))* '\'' + | 'r"' (~('"' | '\n' | '\r'))* '"' ; -//MultilineString -// : '"""' StringContentTDQ* '"""' -// | '\'\'\'' StringContentTDQ* '\'\'\'' -// | 'r"""' (~'"""')* '"""' // TODO -// | 'r\'\'\'' (~'\'\'\'')* '\'\'\'' -// ; -//StringContentSQ: .;// TODO -//StringContentTDQ: .;// TODO fragment -ESCAPE_SEQUENCE - : '\\n' - | '\\r' - | '\\f' - | '\\b' - | '\\t' - | '\\v' - | '\\x' HEX_DIGIT HEX_DIGIT - | '\\u' HEX_DIGIT HEX_DIGIT HEX_DIGIT HEX_DIGIT - | '\\u{' HEX_DIGIT_SEQUENCE '}' - | '\\$' +StringContentDQ + : ~('\\' | '"' /*| '$'*/ | '\n' | '\r') + | '\\' ~('\n' | '\r') + //| stringInterpolation ; + fragment -HEX_DIGIT_SEQUENCE - : HEX_DIGIT HEX_DIGIT? HEX_DIGIT? - HEX_DIGIT? HEX_DIGIT? HEX_DIGIT? +StringContentSQ + : ~('\\' | '\'' /*| '$'*/ | '\n' | '\r') + | '\\' ~('\n' | '\r') + //| stringInterpolation ; -/*TODO -<stringContentDQ> ::= \~{}( `\\' | `"' | `$' | <NEWLINE> ) - \alt `\\' \~{}( <NEWLINE> ) - \alt <stringInterpolation> -<stringContentSQ> ::= \~{}( `\\' | `\'' | `$' | <NEWLINE> ) - \alt `\\' \~{}( <NEWLINE> ) - \alt <stringInterpolation> -<stringContentTDQ> ::= \~{}( `\\' | `"""' | `$') - \alt `\\' \~{}( <NEWLINE> ) - \alt <stringInterpolation> +MultiLineString + : '"""' StringContentTDQ* '"""' + | '\'\'\'' StringContentTSQ* '\'\'\'' + | 'r"""' (~'"' | '"' ~'"' | '""' ~'"')* '"""' + | 'r\'\'\'' (~'\'' | '\'' ~'\'' | '\'\'' ~'\'')* '\'\'\'' + ; + +fragment +StringContentTDQ + : ~('\\' | '"' /*| '$'*/) + | '"' ~'"' | '""' ~'"' + //| stringInterpolation + ; + +fragment StringContentTSQ + : ~('\\' | '\'' /*| '$'*/) + | '\'' ~'\'' | '\'\'' ~'\'' + //| stringInterpolation + ; -<stringContentTSQ> ::= \~{}( `\\' | `\'\'\'' | `$') - \alt `\\' \~{}( <NEWLINE> ) - \alt <stringInterpolation> -*/ NEWLINE : '\n' | '\r'