diff --git a/pmd-core/src/main/java/net/sourceforge/pmd/lang/ast/AbstractNode.java b/pmd-core/src/main/java/net/sourceforge/pmd/lang/ast/AbstractNode.java index 64762d30ef..6f6e0e4140 100644 --- a/pmd-core/src/main/java/net/sourceforge/pmd/lang/ast/AbstractNode.java +++ b/pmd-core/src/main/java/net/sourceforge/pmd/lang/ast/AbstractNode.java @@ -115,7 +115,7 @@ public abstract class AbstractNode implements Node { } public boolean hasImageEqualTo(String image) { - return this.image != null && this.image.equals(image); + return this.getImage() != null && this.getImage().equals(image); } public int getBeginLine() { diff --git a/pmd-core/src/main/java/net/sourceforge/pmd/renderers/XMLRenderer.java b/pmd-core/src/main/java/net/sourceforge/pmd/renderers/XMLRenderer.java index b76cd536f1..0465b1066d 100644 --- a/pmd-core/src/main/java/net/sourceforge/pmd/renderers/XMLRenderer.java +++ b/pmd-core/src/main/java/net/sourceforge/pmd/renderers/XMLRenderer.java @@ -24,6 +24,8 @@ public class XMLRenderer extends AbstractIncrementingRenderer { public static final StringProperty ENCODING = new StringProperty("encoding", "XML encoding format, defaults to UTF-8.", "UTF-8", 0); + private boolean useUTF8 = false; + public XMLRenderer() { super(NAME, "XML format."); @@ -45,6 +47,9 @@ public class XMLRenderer extends AbstractIncrementingRenderer { @Override public void start() throws IOException { String encoding = getProperty(ENCODING); + if (encoding.equalsIgnoreCase("utf-8")) { + useUTF8 = true; + } Writer writer = getWriter(); StringBuilder buf = new StringBuilder(500); @@ -76,7 +81,7 @@ public class XMLRenderer extends AbstractIncrementingRenderer { } filename = rv.getFilename(); buf.append("").append(PMD.EOL); } @@ -85,9 +90,9 @@ public class XMLRenderer extends AbstractIncrementingRenderer { buf.append("\" begincolumn=\"").append(rv.getBeginColumn()); buf.append("\" endcolumn=\"").append(rv.getEndColumn()); buf.append("\" rule=\""); - StringUtil.appendXmlEscaped(buf, rv.getRule().getName()); + StringUtil.appendXmlEscaped(buf, rv.getRule().getName(), useUTF8); buf.append("\" ruleset=\""); - StringUtil.appendXmlEscaped(buf, rv.getRule().getRuleSetName()); + StringUtil.appendXmlEscaped(buf, rv.getRule().getRuleSetName(), useUTF8); buf.append('"'); maybeAdd("package", rv.getPackageName(), buf); maybeAdd("class", rv.getClassName(), buf); @@ -97,7 +102,7 @@ public class XMLRenderer extends AbstractIncrementingRenderer { buf.append(" priority=\""); buf.append(rv.getRule().getPriority().getPriority()); buf.append("\">").append(PMD.EOL); - StringUtil.appendXmlEscaped(buf, rv.getDescription()); + StringUtil.appendXmlEscaped(buf, rv.getDescription(), useUTF8); buf.append(PMD.EOL); buf.append(""); @@ -121,9 +126,9 @@ public class XMLRenderer extends AbstractIncrementingRenderer { for (Report.ProcessingError pe : errors) { buf.setLength(0); buf.append("").append(PMD.EOL); writer.write(buf.toString()); } @@ -133,13 +138,13 @@ public class XMLRenderer extends AbstractIncrementingRenderer { for (Report.SuppressedViolation s : suppressed) { buf.setLength(0); buf.append("").append(PMD.EOL); writer.write(buf.toString()); } @@ -151,7 +156,7 @@ public class XMLRenderer extends AbstractIncrementingRenderer { private void maybeAdd(String attr, String value, StringBuilder buf) { if (value != null && value.length() > 0) { buf.append(' ').append(attr).append("=\""); - StringUtil.appendXmlEscaped(buf, value); + StringUtil.appendXmlEscaped(buf, value, useUTF8); buf.append('"'); } } diff --git a/pmd-core/src/main/java/net/sourceforge/pmd/util/StringUtil.java b/pmd-core/src/main/java/net/sourceforge/pmd/util/StringUtil.java index d87ed15e4d..78184c2f59 100644 --- a/pmd-core/src/main/java/net/sourceforge/pmd/util/StringUtil.java +++ b/pmd-core/src/main/java/net/sourceforge/pmd/util/StringUtil.java @@ -189,7 +189,10 @@ public final class StringUtil { * * @param buf The destination XML stream * @param src The String to append to the stream + * + * @deprecated use {@link #appendXmlEscaped(StringBuilder, String, boolean)} instead */ + @Deprecated public static void appendXmlEscaped(StringBuilder buf, String src) { appendXmlEscaped(buf, src, SUPPORTS_UTF8); } @@ -244,7 +247,14 @@ public final class StringUtil { c = src.charAt(i); if (c > '~') {// 126 if (!supportUTF8) { - buf.append("&#x").append(Integer.toHexString(c)).append(';'); + int codepoint = c; + // surrogate characters are not allowed in XML + if (Character.isHighSurrogate(c)) { + char low = src.charAt(i + 1); + codepoint = Character.toCodePoint(c, low); + i += 1; + } + buf.append("&#x").append(Integer.toHexString(codepoint)).append(';'); } else { buf.append(c); } diff --git a/pmd-core/src/test/java/net/sourceforge/pmd/renderers/XMLRendererTest.java b/pmd-core/src/test/java/net/sourceforge/pmd/renderers/XMLRendererTest.java index b9bada2450..060d39d120 100644 --- a/pmd-core/src/test/java/net/sourceforge/pmd/renderers/XMLRendererTest.java +++ b/pmd-core/src/test/java/net/sourceforge/pmd/renderers/XMLRendererTest.java @@ -3,8 +3,26 @@ */ package net.sourceforge.pmd.renderers; +import java.io.StringReader; + +import javax.xml.parsers.DocumentBuilderFactory; + +import net.sourceforge.pmd.FooRule; import net.sourceforge.pmd.PMD; +import net.sourceforge.pmd.Report; import net.sourceforge.pmd.Report.ProcessingError; +import net.sourceforge.pmd.ReportTest; +import net.sourceforge.pmd.RuleContext; +import net.sourceforge.pmd.RuleViolation; +import net.sourceforge.pmd.lang.ast.DummyNode; +import net.sourceforge.pmd.lang.ast.Node; +import net.sourceforge.pmd.lang.rule.ParametricRuleViolation; + +import org.junit.Assert; +import org.junit.Test; +import org.w3c.dom.Document; +import org.w3c.dom.NodeList; +import org.xml.sax.InputSource; public class XMLRendererTest extends AbstractRendererTst { @@ -65,4 +83,43 @@ public class XMLRendererTest extends AbstractRendererTst { String result = expected.replaceAll(" timestamp=\"[^\"]+\">", " timestamp=\"\">"); return result; } + + private static RuleViolation createRuleViolation(String description) { + DummyNode node = new DummyNode(1); + node.testingOnly__setBeginLine(1); + node.testingOnly__setBeginColumn(1); + node.testingOnly__setEndLine(1); + node.testingOnly__setEndColumn(1); + RuleContext ctx = new RuleContext(); + ctx.setSourceCodeFilename("n/a"); + return new ParametricRuleViolation(new FooRule(), ctx, node, description); + } + + private void verifyXmlEscaping(Renderer renderer, String shouldContain) throws Exception { + Report report = new Report(); + String surrogatePair = "\ud801\udc1c"; + String msg = "The String literal \"Tokenizer " + surrogatePair + "\" appears..."; + report.addRuleViolation(createRuleViolation(msg)); + String actual = ReportTest.render(renderer, report); + Assert.assertTrue(actual.contains(shouldContain)); + Document doc = DocumentBuilderFactory.newInstance().newDocumentBuilder().parse(new InputSource(new StringReader(actual))); + NodeList violations = doc.getElementsByTagName("violation"); + Assert.assertEquals(1, violations.getLength()); + Assert.assertEquals(msg, + violations.item(0).getTextContent().trim()); + } + + @Test + public void testXMLEscapingWithUTF8() throws Exception { + Renderer renderer = getRenderer(); + renderer.setProperty(XMLRenderer.ENCODING, "UTF-8"); + verifyXmlEscaping(renderer, "\ud801\udc1c"); + } + + @Test + public void testXMLEscapingWithoutUTF8() throws Exception { + Renderer renderer = getRenderer(); + renderer.setProperty(XMLRenderer.ENCODING, "ISO-8859-1"); + verifyXmlEscaping(renderer, "𐐜"); + } } diff --git a/pmd-java/src/main/java/net/sourceforge/pmd/lang/java/ast/ASTLiteral.java b/pmd-java/src/main/java/net/sourceforge/pmd/lang/java/ast/ASTLiteral.java index 4a89bb0f30..3e4fe87986 100644 --- a/pmd-java/src/main/java/net/sourceforge/pmd/lang/java/ast/ASTLiteral.java +++ b/pmd-java/src/main/java/net/sourceforge/pmd/lang/java/ast/ASTLiteral.java @@ -104,6 +104,35 @@ public class ASTLiteral extends AbstractJavaTypeNode { return isString; } + /** + * Tries to reconstruct the original string literal. + * If the original length is greater than the parsed String literal, then + * probably some unicode escape sequences have been used. + * + * @return + */ + public String getEscapedStringLiteral() { + String image = getImage(); + if (!isStringLiteral() && !isCharLiteral()) { + return image; + } + int fullLength = getEndColumn() - getBeginColumn(); + if (fullLength > image.length()) { + StringBuilder result = new StringBuilder(fullLength); + for (int i = 0; i < image.length(); i++) { + char c = image.charAt(i); + if (c < 0x20 || c > 0xff || image.length() == 1) { + String hex = "0000" + Integer.toHexString(c); + result.append("\\u").append(hex.substring(hex.length() - 4)); + } else { + result.append(c); + } + } + return result.toString(); + } + return image; + } + /** * Returns true if this is a String literal with only one character. * Handles octal and escape characters. diff --git a/pmd-java/src/main/java/net/sourceforge/pmd/lang/java/rule/strings/AvoidDuplicateLiteralsRule.java b/pmd-java/src/main/java/net/sourceforge/pmd/lang/java/rule/strings/AvoidDuplicateLiteralsRule.java index b7c1afc5d0..5bc4084d0c 100644 --- a/pmd-java/src/main/java/net/sourceforge/pmd/lang/java/rule/strings/AvoidDuplicateLiteralsRule.java +++ b/pmd-java/src/main/java/net/sourceforge/pmd/lang/java/rule/strings/AvoidDuplicateLiteralsRule.java @@ -143,9 +143,11 @@ public class AvoidDuplicateLiteralsRule extends AbstractJavaRule { for (Map.Entry> entry : literals.entrySet()) { List occurrences = entry.getValue(); if (occurrences.size() >= threshold) { - Object[] args = new Object[] { entry.getKey(), Integer.valueOf(occurrences.size()), - Integer.valueOf(occurrences.get(0).getBeginLine()) }; - addViolation(data, occurrences.get(0), args); + ASTLiteral first = occurrences.get(0); + String rawImage = first.getEscapedStringLiteral(); + Object[] args = new Object[] { rawImage, Integer.valueOf(occurrences.size()), + Integer.valueOf(first.getBeginLine()) }; + addViolation(data, first, args); } } } diff --git a/pmd-java/src/test/java/net/sourceforge/pmd/lang/java/ast/ASTLiteralTest.java b/pmd-java/src/test/java/net/sourceforge/pmd/lang/java/ast/ASTLiteralTest.java index 56795c1fb0..5f43dc1dd3 100644 --- a/pmd-java/src/test/java/net/sourceforge/pmd/lang/java/ast/ASTLiteralTest.java +++ b/pmd-java/src/test/java/net/sourceforge/pmd/lang/java/ast/ASTLiteralTest.java @@ -1,5 +1,6 @@ package net.sourceforge.pmd.lang.java.ast; +import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertTrue; @@ -54,6 +55,50 @@ public class ASTLiteralTest extends ParserTst { assertTrue(((ASTLiteral)(literals.iterator().next())).isCharLiteral()); } + @Test + public void testStringUnicodeEscapesNotEscaped() { + ASTLiteral literal = new ASTLiteral(1); + literal.setStringLiteral(); + literal.setImage("abcüabc"); + literal.testingOnly__setBeginColumn(1); + literal.testingOnly__setEndColumn(7); + assertEquals("abcüabc", literal.getEscapedStringLiteral()); + assertEquals("abcüabc", literal.getImage()); + } + + @Test + public void testStringUnicodeEscapesInvalid() { + ASTLiteral literal = new ASTLiteral(1); + literal.setStringLiteral(); + literal.setImage("abc\\uXYZAabc"); + literal.testingOnly__setBeginColumn(1); + literal.testingOnly__setEndColumn(12); + assertEquals("abc\\uXYZAabc", literal.getEscapedStringLiteral()); + assertEquals("abc\\uXYZAabc", literal.getImage()); + } + + @Test + public void testStringUnicodeEscapesValid() { + ASTLiteral literal = new ASTLiteral(1); + literal.setStringLiteral(); + literal.setImage("abc\u1234abc"); + literal.testingOnly__setBeginColumn(1); + literal.testingOnly__setEndColumn(12); + assertEquals("abc\\u1234abc", literal.getEscapedStringLiteral()); + assertEquals("abcሴabc", literal.getImage()); + } + + @Test + public void testCharacterUnicodeEscapesValid() { + ASTLiteral literal = new ASTLiteral(1); + literal.setCharLiteral(); + literal.setImage("\u0030"); + literal.testingOnly__setBeginColumn(1); + literal.testingOnly__setEndColumn(6); + assertEquals("\\u0030", literal.getEscapedStringLiteral()); + assertEquals("0", literal.getImage()); + } + private static final String TEST1 = "public class Foo {" + PMD.EOL + " String x = \"foo\";" + PMD.EOL + @@ -88,8 +133,4 @@ public class ASTLiteralTest extends ParserTst { "public class Foo {" + PMD.EOL + " char x = 'x';" + PMD.EOL + "}"; - - public static junit.framework.Test suite() { - return new junit.framework.JUnit4TestAdapter(ASTLiteralTest.class); - } } diff --git a/pmd-java/src/test/java/net/sourceforge/pmd/lang/java/rule/strings/AvoidDuplicateLiteralsRuleTest.java b/pmd-java/src/test/java/net/sourceforge/pmd/lang/java/rule/strings/AvoidDuplicateLiteralsRuleTest.java index cd109d4cc3..d923ee4917 100644 --- a/pmd-java/src/test/java/net/sourceforge/pmd/lang/java/rule/strings/AvoidDuplicateLiteralsRuleTest.java +++ b/pmd-java/src/test/java/net/sourceforge/pmd/lang/java/rule/strings/AvoidDuplicateLiteralsRuleTest.java @@ -8,19 +8,9 @@ import static org.junit.Assert.assertTrue; import java.util.Set; -import net.sourceforge.pmd.Rule; -import net.sourceforge.pmd.testframework.SimpleAggregatorTst; - import org.junit.Test; -public class AvoidDuplicateLiteralsRuleTest extends SimpleAggregatorTst { - @Test - public void testAll() { - Rule rule = findRule("java-strings", "AvoidDuplicateLiterals"); - rule.setProperty(AvoidDuplicateLiteralsRule.THRESHOLD_DESCRIPTOR, 2); - runTests(rule); - } - +public class AvoidDuplicateLiteralsRuleTest { @Test public void testStringParserEmptyString() { AvoidDuplicateLiteralsRule.ExceptionParser p = new AvoidDuplicateLiteralsRule.ExceptionParser(','); diff --git a/pmd-java/src/test/java/net/sourceforge/pmd/lang/java/rule/strings/StringsRulesTest.java b/pmd-java/src/test/java/net/sourceforge/pmd/lang/java/rule/strings/StringsRulesTest.java index 18a42f1cb6..60bc5684ac 100644 --- a/pmd-java/src/test/java/net/sourceforge/pmd/lang/java/rule/strings/StringsRulesTest.java +++ b/pmd-java/src/test/java/net/sourceforge/pmd/lang/java/rule/strings/StringsRulesTest.java @@ -12,6 +12,7 @@ public class StringsRulesTest extends SimpleAggregatorTst { @Override public void setUp() { addRule(RULESET, "AppendCharacterWithChar"); + addRule(RULESET, "AvoidDuplicateLiterals"); addRule(RULESET, "AvoidStringBufferField"); addRule(RULESET, "ConsecutiveAppendsShouldReuse"); addRule(RULESET, "ConsecutiveLiteralAppends"); diff --git a/pmd-java/src/test/resources/net/sourceforge/pmd/lang/java/rule/strings/xml/AvoidDuplicateLiterals.xml b/pmd-java/src/test/resources/net/sourceforge/pmd/lang/java/rule/strings/xml/AvoidDuplicateLiterals.xml index d7e15bf31b..0bd2a1a193 100644 --- a/pmd-java/src/test/resources/net/sourceforge/pmd/lang/java/rule/strings/xml/AvoidDuplicateLiterals.xml +++ b/pmd-java/src/test/resources/net/sourceforge/pmd/lang/java/rule/strings/xml/AvoidDuplicateLiterals.xml @@ -165,4 +165,22 @@ public class Foo { } ]]> + + + #1425 Invalid XML Characters in Output + 1 + + The String literal "Tokenizer \ud801\udc1ctest" appears 4 times in this file; the first occurrence is on line 2 + + + diff --git a/src/site/markdown/overview/changelog.md b/src/site/markdown/overview/changelog.md index 9041404538..6316e23ee1 100644 --- a/src/site/markdown/overview/changelog.md +++ b/src/site/markdown/overview/changelog.md @@ -18,5 +18,6 @@ * [#1428](https://sourceforge.net/p/pmd/bugs/1428/): False positive in UnusedPrivateField when local variable hides member variable * General * [#1429](https://sourceforge.net/p/pmd/bugs/1429/): Java - Parse Error: Cast in return expression + * [#1425](https://sourceforge.net/p/pmd/bugs/1425/): Invalid XML Characters in Output **API Changes:**