Fixed bug 3560464: c/c++ \ as a continuation character not supported

Add ContinuationReader which entirely omits \\n and \\r\n from the underlying Reader before anything reaches the parser.
2012-08-23 22:11:17 -05:00
parent 98c18554df
commit 6419c3d6f5
5 changed files with 178 additions and 6 deletions
--- a/pmd/etc/changelog.txt
+++ b/pmd/etc/changelog.txt
@@ -37,6 +37,7 @@ Fixed bug 3424397: Unable to parse
 Fixed bug 3530124: pmd: parsing of generic method call with super fails
 Fixed bug 3496028: pmd-4.2.6: MissingBreakInSwitch fails to report violation
 Fixed bug 3484404: Invalid NPath calculation in return statement. Thanks to Prabhjot Singh for the patch.
+Fixed bug 3560464: c/c++ \ as a continuation character not supported
 Improved JSP parser to be less strict with not valid XML documents (like HTML). Thanks to Victor Bucutea.
 Fixed bgastviewer not working. Thanks to Victor Bucutea.

--- a/pmd/src/main/java/net/sourceforge/pmd/lang/cpp/ContinuationReader.java
+++ b/pmd/src/main/java/net/sourceforge/pmd/lang/cpp/ContinuationReader.java
@@ -0,0 +1,79 @@
+/**
+ * BSD-style license; for more info see http://pmd.sourceforge.net/license.html
+ */
+package net.sourceforge.pmd.lang.cpp;
+
+import java.io.IOException;
+import java.io.PushbackReader;
+import java.io.Reader;
+
+/**
+ * A custom {@link Reader} which completely omits C/C++ continuation character
+ * sequences from an underlying reader. Specifically the sequences {@code \ \n}
+ * (backslash, carriage return), or {@code \ \r \n} (backslash, line feed,
+ * carriage return).
+ * <p>
+ * This reader exists because to modify a JavaCC lexer to understand arbitrary
+ * continuations inside of any token is cumbersome, and just removing them from
+ * the input entirely is easier to implement. See this discussion on the JavaCC
+ * mailing list on <a href=
+ * "http://java.net/projects/javacc/lists/users/archive/2005-06/message/16">line
+ * continuation character</a>.
+ */
+public class ContinuationReader extends Reader {
+	private static final int EOF = -1;
+	private static final char BACKSLASH = '\\';
+	private static final char CARRIAGE_RETURN = '\n';
+	private static final char LINE_FEED = '\r';
+
+	protected final PushbackReader in;
+
+	public ContinuationReader(Reader in) {
+		this.in = new PushbackReader(in, 2);
+	}
+
+	@Override
+	public int read(char[] cbuf, int off, int len) throws IOException {
+		int count = 0;
+		while (count < len) {
+			int c1 = in.read();
+			if (c1 == EOF) {
+				break;
+			} else if (c1 == BACKSLASH) {
+				int c2 = in.read();
+				if (c2 == EOF) {
+					// No match
+				} else if (c2 == CARRIAGE_RETURN) {
+					// Match: backslash, carriage return
+					continue;
+				} else if (c2 == LINE_FEED) {
+					int c3 = in.read();
+					if (c3 == EOF) {
+						// No match
+						in.unread(c2);
+					} else if (c3 == CARRIAGE_RETURN) {
+						// Match: backslash, line feed, carriage return
+						continue;
+					} else {
+						// No match
+						in.unread(c3);
+						in.unread(c2);
+					}
+				} else {
+					// No match
+					in.unread(c2);
+				}
+			}
+			System.out.println((off + count) + ": <" + ((char) c1) + ">");
+			cbuf[off + count] = (char) c1;
+			count++;
+		}
+
+		return count > 0 ? count : -1;
+	}
+
+	@Override
+	public void close() throws IOException {
+		in.close();
+	}
+}
--- a/pmd/src/main/java/net/sourceforge/pmd/lang/cpp/CppTokenManager.java
+++ b/pmd/src/main/java/net/sourceforge/pmd/lang/cpp/CppTokenManager.java
@@ -1,3 +1,6 @@
+/**
+ * BSD-style license; for more info see http://pmd.sourceforge.net/license.html
+ */
 package net.sourceforge.pmd.lang.cpp;

 import java.io.Reader;
@@ -13,7 +16,7 @@ public class CppTokenManager implements TokenManager {
    private final CppParserTokenManager tokenManager;

    public CppTokenManager(Reader source) {
-	tokenManager = new CppParserTokenManager(new SimpleCharStream(source));
+	tokenManager = new CppParserTokenManager(new SimpleCharStream(new ContinuationReader(source)));
    }

    public Object getNextToken() {
--- a/pmd/src/test/java/net/sourceforge/pmd/cpd/CPPTokenizerTest.java
+++ b/pmd/src/test/java/net/sourceforge/pmd/cpd/CPPTokenizerTest.java
@@ -12,10 +12,7 @@ public class CPPTokenizerTest {

    @Test
    public void testMultiLineMacros() throws Throwable {
-        CPPTokenizer tokenizer = new CPPTokenizer();
-        SourceCode code = new SourceCode(new SourceCode.StringCodeLoader(TEST1));
-        Tokens tokens = new Tokens();
-        tokenizer.tokenize(code, tokens);
+        Tokens tokens = parse(TEST1);
        assertEquals(7, tokens.size());
    }

@@ -34,11 +31,24 @@ public class CPPTokenizerTest {
        parse(TEST4);
    }

-    private void parse(String snippet) {
+    @Test
+    public void testContinuation_IntraToken() {
+    	Tokens tokens = parse(TEST5);
+        assertEquals(7, tokens.size());
+    }
+    
+    @Test
+    public void testContinuation_InterToken() {
+    	Tokens tokens = parse(TEST6);
+    	assertEquals(17, tokens.size());
+    }
+
+    private Tokens parse(String snippet) {
        CPPTokenizer tokenizer = new CPPTokenizer();
        SourceCode code = new SourceCode(new SourceCode.StringCodeLoader(snippet));
        Tokens tokens = new Tokens();
        tokenizer.tokenize(code, tokens);
+        return tokens;
    }

    private static final String TEST1 =
@@ -59,6 +69,33 @@ public class CPPTokenizerTest {

    private static final String TEST4 =
            " void main() { char x = L'a'; }";
+    
+    private static final String TEST5 =
+            "v\\" + PMD.EOL +
+            "o\\" + PMD.EOL +
+            "i\\" + PMD.EOL +
+            "d\\" + PMD.EOL +
+            " \\" + PMD.EOL +
+            "m\\" + PMD.EOL +
+            "a\\" + PMD.EOL +
+            "i\\" + PMD.EOL +
+            "n\\" + PMD.EOL +
+            "(\\" + PMD.EOL +
+            ")\\" + PMD.EOL +
+            " \\" + PMD.EOL +
+            "{\\" + PMD.EOL +
+            " \\" + PMD.EOL +
+            "}\\" + PMD.EOL;
+    
+    private static final String TEST6 =
+            "#include <iostream>" + PMD.EOL +
+            PMD.EOL +
+            "int main()" + PMD.EOL +
+            "{" + PMD.EOL +
+            "   std::cout << \"Hello, \" \\" + PMD.EOL +
+            "                \"world!\\n\";" + PMD.EOL +
+            "   return 0;" + PMD.EOL +
+            "}";

    public static junit.framework.Test suite() {
        return new junit.framework.JUnit4TestAdapter(CPPTokenizerTest.class);
--- a/pmd/src/test/java/net/sourceforge/pmd/lang/cpp/ContinuationReaderTest.java
+++ b/pmd/src/test/java/net/sourceforge/pmd/lang/cpp/ContinuationReaderTest.java
@@ -0,0 +1,52 @@
+package net.sourceforge.pmd.lang.cpp;
+
+import static org.junit.Assert.assertEquals;
+
+import java.io.IOException;
+import java.io.StringReader;
+
+import org.junit.Test;
+
+public class ContinuationReaderTest {
+	@Test
+	public void testHappyPath() throws IOException {
+		assertEquals("empty", "", filter(""));
+		assertEquals("anything", "anything", filter("anything"));
+
+		assertEquals("partial: BS", "\\", filter("\\"));
+		assertEquals("partial: BS LF", "\\\r", filter("\\\r"));
+		assertEquals("full: BS CR", "", filter("\\\n"));
+		assertEquals("full: BS LF CR", "", filter("\\\r\n"));
+
+		assertEquals("partial: BS: prefix", "prefix\\", filter("prefix\\"));
+		assertEquals("partial: BS LF: prefix", "prefix\\\r", filter("prefix\\\r"));
+		assertEquals("full: BS CR: prefix", "prefix", filter("prefix\\\n"));
+		assertEquals("full: BS LF CR: prefix", "prefix", filter("prefix\\\r\n"));
+
+		assertEquals("partial: BS: suffix", "\\suffix", filter("\\suffix"));
+		assertEquals("partial: BS LF: suffix", "\\\rsuffix", filter("\\\rsuffix"));
+		assertEquals("full: BS CR: suffix", "suffix", filter("\\\nsuffix"));
+		assertEquals("full: BS LF CR: suffix", "suffix", filter("\\\r\nsuffix"));
+
+		assertEquals("partial: BS: prefix, suffix", "prefix\\suffix", filter("prefix\\suffix"));
+		assertEquals("partial: BS LF: prefix, suffix", "prefix\\\rsuffix", filter("prefix\\\rsuffix"));
+		assertEquals("full: BS CR: prefix, suffix", "prefixsuffix", filter("prefix\\\nsuffix"));
+		assertEquals("full: BS LF CR: prefix, suffix", "prefixsuffix", filter("prefix\\\r\nsuffix"));
+
+		assertEquals("complex mixed", "abc", filter("a\\\r\nb\\\n\\\n\\\r\nc"));
+	}
+
+	private static String filter(String s) throws IOException {
+		ContinuationReader reader = new ContinuationReader(new StringReader(s));
+		try {
+			StringBuilder buf = new StringBuilder();
+			int c;
+			while ((c = reader.read()) >= 0) {
+				buf.append((char) c);
+			}
+			return buf.toString();
+		} finally {
+			reader.close();
+		}
+	}
+}