Fix wrong grammar for digit separators
This commit is contained in:
@ -284,20 +284,21 @@ TOKEN :
|
||||
|
||||
TOKEN [IGNORE_CASE] :
|
||||
{
|
||||
< OCTALINT : "0" (["'", "0"-"7"])* >
|
||||
< #DECIMALDIGIT: ["0"-"9"] >
|
||||
| < #OCTALDIGIT: ["0"-"7"] >
|
||||
| < #HEXDIGIT: ["a"-"f", "A"-"F", "0"-"9"] >
|
||||
|
||||
| < OCTALINT : "0" ("'" <OCTALDIGIT> | <OCTALDIGIT>)* >
|
||||
| < OCTALLONG : <OCTALINT> "l" >
|
||||
| < UNSIGNED_OCTALINT : <OCTALINT> "u" >
|
||||
| < UNSIGNED_OCTALLONG : <OCTALINT> ("ul" | "lu") >
|
||||
|
||||
| < #DECIMALDIGIT : ["'", "0"-"9"] >
|
||||
|
||||
| < DECIMALINT : ["1"-"9"] (<DECIMALDIGIT>)* >
|
||||
| < DECIMALINT : ["1"-"9"] ("'" <DECIMALDIGIT> | <DECIMALDIGIT>)* >
|
||||
| < DECIMALLONG : <DECIMALINT> ["u","l"] >
|
||||
| < UNSIGNED_DECIMALINT : <DECIMALINT> "u" >
|
||||
| < UNSIGNED_DECIMALLONG : <DECIMALINT> ("ul" | "lu") >
|
||||
|
||||
|
||||
| < HEXADECIMALINT : "0x" (<HEXDIGIT>)+ >
|
||||
| < HEXADECIMALINT : "0x" <HEXDIGIT> ("'" <HEXDIGIT> | <HEXDIGIT>)+ >
|
||||
| < HEXADECIMALLONG : <HEXADECIMALINT> (["u","l"])? >
|
||||
| < UNSIGNED_HEXADECIMALINT : <HEXADECIMALINT> "u" >
|
||||
| < UNSIGNED_HEXADECIMALLONG : <HEXADECIMALINT> ("ul" | "lu") >
|
||||
@ -384,20 +385,16 @@ TOKEN :
|
||||
{
|
||||
// the standard says ids are only CHAR_NON_DIGIT CHAR*
|
||||
// where CHAR is at least [a-zA-Z_0-9],
|
||||
// but can also be \ uhhhh or \ Uhhhhhh (unicode escapes)
|
||||
// but can also be \ uhhhh or \ Uhhhhhhhh (unicode escapes)
|
||||
// *and* other characters may be allowed by implementations, eg a raw unicode char (not escaped), or a dollar
|
||||
|
||||
// this grammar doesn't allow unicode escapes, maybe it should?
|
||||
|
||||
// ref: https://en.cppreference.com/w/cpp/language/identifiers#Unicode_characters_in_identifiers
|
||||
|
||||
< ID : <ID_START_CHAR> (<ID_CHAR>)* >
|
||||
|
||||
| < #UNICODE_ESCAPE: "\\" ( "u" <HEXDIGIT> <HEXDIGIT> <HEXDIGIT> <HEXDIGIT>
|
||||
| "U" <HEXDIGIT> <HEXDIGIT> <HEXDIGIT> <HEXDIGIT> <HEXDIGIT> <HEXDIGIT>
|
||||
) >
|
||||
| < #HEXDIGIT: ["a"-"f", "A"-"F", "0"-"9"] >
|
||||
| < #ID_CHAR: [
|
||||
| < #UNICODE_ESCAPE: "\\" ( "u" <HEXDIGIT_4> | "U" <HEXDIGIT_4> <HEXDIGIT_4> ) >
|
||||
| < #HEXDIGIT_4: <HEXDIGIT> <HEXDIGIT> <HEXDIGIT> <HEXDIGIT> >
|
||||
| < #ID_CHAR: <UNICODE_ESCAPE> | [
|
||||
"a"-"z", "A"-"Z", "0"-"9", "_", "$",
|
||||
"\u00a8", "\u00aa", "\u00ad", "\u00af",
|
||||
"\u00b2"-"\u00b5", "\u00b7"-"\u00ba",
|
||||
@ -418,7 +415,7 @@ TOKEN :
|
||||
] >
|
||||
// this production is the same as the above,
|
||||
// with some ranges subtracted
|
||||
| < #ID_START_CHAR: [
|
||||
| < #ID_START_CHAR: <UNICODE_ESCAPE> | [
|
||||
"a"-"z", "A"-"Z", "_", "$",
|
||||
"\u00a8", "\u00aa", "\u00ad", "\u00af",
|
||||
"\u00b2"-"\u00b5", "\u00b7"-"\u00ba",
|
||||
|
@ -73,6 +73,12 @@ public class CPPTokenizerTest extends CpdTextComparisonTest {
|
||||
doTest("specialComments");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testUnicodeEscapeInIdentifier() {
|
||||
Tokens tokens = parse(" void main() { int a\\u0048; }");
|
||||
assertEquals(10, tokens.size());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testMultiLineMacros() {
|
||||
doTest("multilineMacros");
|
||||
|
Reference in New Issue
Block a user