diff --git a/pmd-cpp/etc/grammar/cpp.jj b/pmd-cpp/etc/grammar/cpp.jj index 3f65d72a8e..8b0043b686 100644 --- a/pmd-cpp/etc/grammar/cpp.jj +++ b/pmd-cpp/etc/grammar/cpp.jj @@ -149,6 +149,10 @@ SKIP : "/*" : IN_COMMENT | "#" : PREPROCESSOR_OUTPUT +| + "\\\r\n" +| + "\\\n" } SKIP: @@ -321,7 +325,7 @@ TOKEN : < CHARACTER : ("L")? "'" ( ( ~["'","\\","\r","\n"] ) | ( "\\" ( ~["\n","\r"] ) ) )* "'" > -| < STRING : ("L")? "\"" ( ( ~["\"","\\","\r","\n"] ) | ( "\\" ( ~["\n","\r"] | "\n" | "\r\n" ) ) )* "\"" > +| < STRING : ("L")? "\"" ( ( ~["\"","\\","\r","\n"] ) | ( ("\\")+ ( ~["\n","\r"] | "\n" | "\r\n" ) ) )* "\"" > | < RSTRING : "R\"(" ( ~[")"] | ( ")" ~["\""] ) )* ")\"" > } diff --git a/pmd-cpp/src/main/java/net/sourceforge/pmd/lang/cpp/ContinuationReader.java b/pmd-cpp/src/main/java/net/sourceforge/pmd/lang/cpp/ContinuationReader.java deleted file mode 100644 index b0143fa61a..0000000000 --- a/pmd-cpp/src/main/java/net/sourceforge/pmd/lang/cpp/ContinuationReader.java +++ /dev/null @@ -1,86 +0,0 @@ -/** - * BSD-style license; for more info see http://pmd.sourceforge.net/license.html - */ - -package net.sourceforge.pmd.lang.cpp; - -import java.io.IOException; -import java.io.PushbackReader; -import java.io.Reader; - -/** - * A custom {@link Reader} which completely omits C/C++ continuation character - * sequences from an underlying reader. Specifically the sequences {@code \ \n} - * (backslash, carriage return), or {@code \ \r \n} (backslash, line feed, - * carriage return). - *

- * This reader exists because to modify a JavaCC lexer to understand arbitrary - * continuations inside of any token is cumbersome, and just removing them from - * the input entirely is easier to implement. See this discussion on the JavaCC - * mailing list on line - * continuation character. - */ -public class ContinuationReader extends Reader { - private static final int EOF = -1; - private static final char BACKSLASH = '\\'; - private static final char CARRIAGE_RETURN = '\n'; - private static final char LINE_FEED = '\r'; - - /** the original stream is wrapped in this pushback reader. */ - protected final PushbackReader in; - - /** - * Creates a new {@link ContinuationReader} which filters the given reader. - * - * @param in - * the given reader - */ - public ContinuationReader(Reader in) { - this.in = new PushbackReader(in, 2); - } - - @Override - public int read(char[] cbuf, int off, int len) throws IOException { - int count = 0; - while (count < len) { - int c1 = in.read(); - if (c1 == EOF) { - break; - } else if (c1 == BACKSLASH) { - int c2 = in.read(); - if (c2 == EOF) { - // No match - } else if (c2 == CARRIAGE_RETURN) { - // Match: backslash, carriage return - continue; - } else if (c2 == LINE_FEED) { - int c3 = in.read(); - if (c3 == EOF) { - // No match - in.unread(c2); - } else if (c3 == CARRIAGE_RETURN) { - // Match: backslash, line feed, carriage return - continue; - } else { - // No match - in.unread(c3); - in.unread(c2); - } - } else { - // No match - in.unread(c2); - } - } - cbuf[off + count] = (char) c1; - count++; - } - - return count > 0 ? count : -1; - } - - @Override - public void close() throws IOException { - in.close(); - } -} diff --git a/pmd-cpp/src/main/java/net/sourceforge/pmd/lang/cpp/CppTokenManager.java b/pmd-cpp/src/main/java/net/sourceforge/pmd/lang/cpp/CppTokenManager.java index 328c85fe1c..5f8bb96018 100644 --- a/pmd-cpp/src/main/java/net/sourceforge/pmd/lang/cpp/CppTokenManager.java +++ b/pmd-cpp/src/main/java/net/sourceforge/pmd/lang/cpp/CppTokenManager.java @@ -23,7 +23,7 @@ public class CppTokenManager implements TokenManager { * the source code */ public CppTokenManager(Reader source) { - tokenManager = new CppParserTokenManager(new SimpleCharStream(new ContinuationReader(source))); + tokenManager = new CppParserTokenManager(new SimpleCharStream(source)); } public Object getNextToken() { diff --git a/pmd-cpp/src/test/java/net/sourceforge/pmd/cpd/CPPTokenizerContinuationTest.java b/pmd-cpp/src/test/java/net/sourceforge/pmd/cpd/CPPTokenizerContinuationTest.java new file mode 100644 index 0000000000..50772cfbe8 --- /dev/null +++ b/pmd-cpp/src/test/java/net/sourceforge/pmd/cpd/CPPTokenizerContinuationTest.java @@ -0,0 +1,96 @@ +/** + * BSD-style license; for more info see http://pmd.sourceforge.net/license.html + */ + +package net.sourceforge.pmd.cpd; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.fail; + +import java.nio.charset.StandardCharsets; +import java.util.ArrayList; +import java.util.List; +import java.util.Properties; + +import org.apache.commons.io.IOUtils; +import org.junit.Ignore; +import org.junit.Test; + +import net.sourceforge.pmd.PMD; +import net.sourceforge.pmd.cpd.SourceCode.StringCodeLoader; + +public class CPPTokenizerContinuationTest { + + @Test + public void parseWithContinuation() throws Exception { + String code = load("cpp_with_continuation.cpp"); + Tokens tokens = parse(code); + if (tokens.size() < 53) { + printTokens(tokens); + fail("Not enough tokens - probably parsing error"); + } + + assertEquals("static", findByLine(8, tokens).get(0).toString()); + assertEquals("int", findByLine(8, tokens).get(1).toString()); + + // Note: the token should be "ab", but since we skip "\\\n" between a and b, + // we end up with two tokens. -> the tokens are not joined. + // This could lead to false negatives in duplication detection. + // However, this is only a problem, if the continuation character is added *within* + // a token and not at token boundaries. + // see also test #testContinuationIntraToken + //assertEquals("ab", findByLine(8, tokens).get(2).toString()); + + assertEquals("int", findByLine(12, tokens).get(0).toString()); + assertEquals("main", findByLine(12, tokens).get(1).toString()); + assertEquals("(", findByLine(12, tokens).get(2).toString()); + assertEquals(")", findByLine(12, tokens).get(3).toString()); + assertEquals("{", findByLine(13, tokens).get(0).toString()); + assertEquals("}", findByLine(29, tokens).get(0).toString()); + } + + @Test + @Ignore + public void testContinuationIntraToken() throws Exception { + Tokens tokens = parse(load("cpp_continuation_intra_token.cpp")); + assertEquals(7, tokens.size()); + } + + @Test + public void testContinuationInterToken() throws Exception { + Tokens tokens = parse(load("cpp_continuation_inter_token.cpp")); + assertEquals(17, tokens.size()); + } + + private void printTokens(Tokens tokens) { + for (TokenEntry entry : tokens.getTokens()) { + System.out.printf("%02d: %s%s", entry.getBeginLine(), entry.toString(), PMD.EOL); + } + } + + private List findByLine(int line, Tokens tokens) { + List result = new ArrayList<>(); + for (TokenEntry entry : tokens.getTokens()) { + if (entry.getBeginLine() == line) { + result.add(entry); + } + } + if (result.isEmpty()) { + fail("No tokens at line " + line + " found"); + } + return result; + } + + private String load(String name) throws Exception { + return IOUtils.toString(CPPTokenizerContinuationTest.class + .getResourceAsStream("cpp/" + name), StandardCharsets.UTF_8); + } + + private Tokens parse(String code) { + CPPTokenizer tokenizer = new CPPTokenizer(); + tokenizer.setProperties(new Properties()); + Tokens tokens = new Tokens(); + tokenizer.tokenize(new SourceCode(new StringCodeLoader(code)), tokens); + return tokens; + } +} diff --git a/pmd-cpp/src/test/java/net/sourceforge/pmd/cpd/CPPTokenizerTest.java b/pmd-cpp/src/test/java/net/sourceforge/pmd/cpd/CPPTokenizerTest.java index b968e0a7a3..cc185a627f 100644 --- a/pmd-cpp/src/test/java/net/sourceforge/pmd/cpd/CPPTokenizerTest.java +++ b/pmd-cpp/src/test/java/net/sourceforge/pmd/cpd/CPPTokenizerTest.java @@ -54,18 +54,6 @@ public class CPPTokenizerTest { parse(TEST4); } - @Test - public void testContinuationIntraToken() { - Tokens tokens = parse(TEST5); - assertEquals(7, tokens.size()); - } - - @Test - public void testContinuationInterToken() { - Tokens tokens = parse(TEST6); - assertEquals(17, tokens.size()); - } - @Test public void testTokenizerWithSkipBlocks() throws Exception { String test = IOUtils.toString(CPPTokenizerTest.class.getResourceAsStream("cpp/cpp_with_asm.cpp")); @@ -162,14 +150,6 @@ public class CPPTokenizerTest { private static final String TEST4 = " void main() { char x = L'a'; }"; - private static final String TEST5 = "v\\" + PMD.EOL + "o\\" + PMD.EOL + "i\\" + PMD.EOL + "d\\" + PMD.EOL + " \\" - + PMD.EOL + "m\\" + PMD.EOL + "a\\" + PMD.EOL + "i\\" + PMD.EOL + "n\\" + PMD.EOL + "(\\" + PMD.EOL + ")\\" - + PMD.EOL + " \\" + PMD.EOL + "{\\" + PMD.EOL + " \\" + PMD.EOL + "}\\" + PMD.EOL; - - private static final String TEST6 = "#include " + PMD.EOL + PMD.EOL + "int main()" + PMD.EOL + "{" - + PMD.EOL + " std::cout << \"Hello, \" \\" + PMD.EOL + " \"world!\\n\";" + PMD.EOL - + " return 0;" + PMD.EOL + "}"; - private static final String TEST7 = "asm void eSPI_boot()" + PMD.EOL + "{" + PMD.EOL + " // setup stack pointer" + PMD.EOL + " lis r1, _stack_addr@h" + PMD.EOL + " ori r1, r1, _stack_addr@l" + PMD.EOL + "}"; } diff --git a/pmd-cpp/src/test/java/net/sourceforge/pmd/lang/cpp/ContinuationReaderTest.java b/pmd-cpp/src/test/java/net/sourceforge/pmd/lang/cpp/ContinuationReaderTest.java deleted file mode 100644 index add41d7d55..0000000000 --- a/pmd-cpp/src/test/java/net/sourceforge/pmd/lang/cpp/ContinuationReaderTest.java +++ /dev/null @@ -1,56 +0,0 @@ -/** - * BSD-style license; for more info see http://pmd.sourceforge.net/license.html - */ - -package net.sourceforge.pmd.lang.cpp; - -import static org.junit.Assert.assertEquals; - -import java.io.IOException; -import java.io.StringReader; - -import org.junit.Test; - -public class ContinuationReaderTest { - @Test - public void testHappyPath() throws IOException { - assertEquals("empty", "", filter("")); - assertEquals("anything", "anything", filter("anything")); - - assertEquals("partial: BS", "\\", filter("\\")); - assertEquals("partial: BS LF", "\\\r", filter("\\\r")); - assertEquals("full: BS CR", "", filter("\\\n")); - assertEquals("full: BS LF CR", "", filter("\\\r\n")); - - assertEquals("partial: BS: prefix", "prefix\\", filter("prefix\\")); - assertEquals("partial: BS LF: prefix", "prefix\\\r", filter("prefix\\\r")); - assertEquals("full: BS CR: prefix", "prefix", filter("prefix\\\n")); - assertEquals("full: BS LF CR: prefix", "prefix", filter("prefix\\\r\n")); - - assertEquals("partial: BS: suffix", "\\suffix", filter("\\suffix")); - assertEquals("partial: BS LF: suffix", "\\\rsuffix", filter("\\\rsuffix")); - assertEquals("full: BS CR: suffix", "suffix", filter("\\\nsuffix")); - assertEquals("full: BS LF CR: suffix", "suffix", filter("\\\r\nsuffix")); - - assertEquals("partial: BS: prefix, suffix", "prefix\\suffix", filter("prefix\\suffix")); - assertEquals("partial: BS LF: prefix, suffix", "prefix\\\rsuffix", filter("prefix\\\rsuffix")); - assertEquals("full: BS CR: prefix, suffix", "prefixsuffix", filter("prefix\\\nsuffix")); - assertEquals("full: BS LF CR: prefix, suffix", "prefixsuffix", filter("prefix\\\r\nsuffix")); - - assertEquals("complex mixed", "abc", filter("a\\\r\nb\\\n\\\n\\\r\nc")); - } - - private static String filter(String s) throws IOException { - ContinuationReader reader = new ContinuationReader(new StringReader(s)); - try { - StringBuilder buf = new StringBuilder(); - int c; - while ((c = reader.read()) >= 0) { - buf.append((char) c); - } - return buf.toString(); - } finally { - reader.close(); - } - } -} diff --git a/pmd-cpp/src/test/resources/net/sourceforge/pmd/cpd/cpp/cpp_continuation_inter_token.cpp b/pmd-cpp/src/test/resources/net/sourceforge/pmd/cpd/cpp/cpp_continuation_inter_token.cpp new file mode 100644 index 0000000000..76ea1e8990 --- /dev/null +++ b/pmd-cpp/src/test/resources/net/sourceforge/pmd/cpd/cpp/cpp_continuation_inter_token.cpp @@ -0,0 +1,8 @@ +#include + +int main() +{ + std::cout << "Hello, " \ + "world!\n"; + return 0; +} diff --git a/pmd-cpp/src/test/resources/net/sourceforge/pmd/cpd/cpp/cpp_continuation_intra_token.cpp b/pmd-cpp/src/test/resources/net/sourceforge/pmd/cpd/cpp/cpp_continuation_intra_token.cpp new file mode 100644 index 0000000000..737f0d81d6 --- /dev/null +++ b/pmd-cpp/src/test/resources/net/sourceforge/pmd/cpd/cpp/cpp_continuation_intra_token.cpp @@ -0,0 +1,15 @@ +v\ +o\ +i\ +d\ + \ +m\ +a\ +i\ +n\ +(\ +)\ + \ +{\ + \ +}\ diff --git a/pmd-cpp/src/test/resources/net/sourceforge/pmd/cpd/cpp/cpp_with_continuation.cpp b/pmd-cpp/src/test/resources/net/sourceforge/pmd/cpd/cpp/cpp_with_continuation.cpp new file mode 100644 index 0000000000..82f5f88a5f --- /dev/null +++ b/pmd-cpp/src/test/resources/net/sourceforge/pmd/cpd/cpp/cpp_with_continuation.cpp @@ -0,0 +1,29 @@ +#include + +// here is a multiline macro +#define MULTI \ + 1 + +// here is a multiline variable called ab +static int a\ +b = 5; + + +int main() +{ + // string continuation without backslash + std::cout << "1 Hello, " + "world!\n"; + // string continuation with backslash inside string + std::cout << "2 Hello, \ +world!\n"; + // string continuation with double backslash inside string + // -> compiler warning (invalid escape sequence \w), but still compiles + std::cout << "3 Hello, \\ +world!\n"; + // string continuation with backslash outside of string + std::cout << "4 Hello, " \ + "world!\n"; + std::cout << "ab=" << ab << "\n"; + return 0; +} diff --git a/src/site/markdown/overview/changelog.md b/src/site/markdown/overview/changelog.md index a9dd3f80c9..2b3e606224 100644 --- a/src/site/markdown/overview/changelog.md +++ b/src/site/markdown/overview/changelog.md @@ -44,6 +44,8 @@ Fields using generics are still Work in Progress, but we expect to fully support * General * [#407](https://github.com/pmd/pmd/issues/407): \[web] Release date is not properly formatted * [#429](https://github.com/pmd/pmd/issues/429): \[core] Error when running PMD from folder with space +* cpp + * [#431](https://github.com/pmd/pmd/issues/431): \[cpp] CPD gives wrong duplication blocks for CPP code * java * [#414](https://github.com/pmd/pmd/issues/414): \[java] Java 8 parsing problem with annotations for wildcards * [#415](https://github.com/pmd/pmd/issues/415): \[java] Parsing Error when having an Annotated Inner class