Relaxed C++ tokenizer so a '\'(backslash) can be use to escape any character inside a string or character.
The following 2 code snippets could not be tokenized: 1: if (*pbuf == '\0x05'), the problem is the '\0' in the character literal '\0x05'. 2: szPath = m_sdcacheDir + _T("\ oMedia");, the problem is the '\ ' in the string literal "\ oMedia". I relaxed the lexical grammar so a '\' (backslash) can escape any character inside a string or character literal. We can relax the grammar because CPD only needs the tokens, so it is no problem to accept 'invalid' string / character literals. (according to the ANSI C standard). Failing too fast because the tokenizer is too strict is annoying because then we can't check the files for duplicated code. Both snippets were taken from existing projects and be successfully compiled, so for some C / C++ compilers it is valid code.
This commit is contained in:
@ -318,35 +318,9 @@ TOKEN [IGNORE_CASE] :
|
||||
TOKEN :
|
||||
{
|
||||
|
||||
< CHARACTER : ("L")? "'"
|
||||
( (~["'","\\","\n","\r"])*
|
||||
| ("\\" (
|
||||
["n","t","v","b","r","f","a","\\","?","'","\""]
|
||||
|
|
||||
"0" (["0"-"7"])*
|
||||
|
|
||||
["1"-"9"] (["0"-"9"])*
|
||||
|
|
||||
("x" | "X") (["0"-"9","a"-"f","A"-"F"])+
|
||||
)
|
||||
)
|
||||
)
|
||||
"'" >
|
||||
< CHARACTER : ("L")? "'" ( ( ~["'","\\","\r","\n"] ) | ( "\\" ( ~["\n","\r"] ) ) )* "'" >
|
||||
|
||||
| < STRING : ("L")? "\""
|
||||
( ( ~["\"","\\","\n","\r"])
|
||||
| ("\\" (
|
||||
["n","t","v","b","r","f","a","\\","?","'","\"","\n"]
|
||||
|
|
||||
"0" (["0"-"7"])*
|
||||
|
|
||||
["1"-"9"] (["0"-"9"])*
|
||||
|
|
||||
("x" | "X") (["0"-"9","a"-"f","A"-"F"])+
|
||||
)
|
||||
)
|
||||
)*
|
||||
"\"" >
|
||||
| < STRING : ("L")? "\"" ( ( ~["\"","\\","\r","\n"] ) | ( "\\" ( ~["\n","\r"] | "\n" | "\r\n" ) ) )* "\"" >
|
||||
}
|
||||
|
||||
void translation_unit() :
|
||||
|
@ -113,6 +113,18 @@ public class CPPTokenizerTest {
|
||||
assertEquals(15, tokens.size());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testHexCharacter() {
|
||||
Tokens tokens = parse("if (*pbuf == '\\0x05')" + PMD.EOL);
|
||||
assertEquals(8, tokens.size());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testWhiteSpaceEscape() {
|
||||
Tokens tokens = parse("szPath = m_sdcacheDir + _T(\"\\ oMedia\");" + PMD.EOL);
|
||||
assertEquals(10, tokens.size());
|
||||
}
|
||||
|
||||
private Tokens parse(String snippet) {
|
||||
return parse(snippet, false);
|
||||
}
|
||||
|
Reference in New Issue
Block a user