[core] Provide a backwards compatible XMLOldRenderer as "xmlold"

2024-06-23 20:39:41 +02:00
parent 0340bf0568
commit 33f9268cf7
10 changed files with 214 additions and 34 deletions
--- a/docs/pages/pmd/userdocs/cpd/cpd_report_formats.md
+++ b/docs/pages/pmd/userdocs/cpd/cpd_report_formats.md
@ -105,7 +105,12 @@ Example:

 ```xml
 <?xml version="1.0" encoding="UTF-8"?>
-<pmd-cpd>
+<pmd-cpd xmlns="https://pmd-code.org/schema/cpd-report"
+         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         pmdVersion="7.3.0"
+         timestamp="2024-06-23T09:00:00+02:00"
+         version="1.0.0"
+         xsi:schemaLocation="https://pmd-code.org/schema/cpd-report https://pmd.github.io/schema/cpd-report_1_0_0.xsd">
   <file path="/home/pmd/source/pmd-core/src/test/java/net/sourceforge/pmd/RuleReferenceTest.java" totalNumberOfTokens="523"/>
   <file path="/home/pmd/source/pmd-core/src/test/java/net/sourceforge/pmd/lang/rule/xpath/JaxenXPathRuleQueryTest.java" totalNumberOfTokens="120"/>
   <duplication lines="33" tokens="239">
--- a/docs/pages/release_notes.md
+++ b/docs/pages/release_notes.md
@ -29,10 +29,27 @@ This is a {{ site.pmd.release_type }} release.

 #### CPD Report Format XML

-The CPD XML report will now also contain processing errors (if CPD is called with `--skip-lexical-errors`).
+There are some important changes:
+
+1. The XML format will now use an XSD schema, that is available at <https://pmd.github.io/schema/cpd-report_1_0_0.xsd>.
+   This schema defines the valid elements and attributes that one can expect from a CPD report.
+2. The root element `pmd-cpd` contains the new attributes `pmdVersion`, `timestamp` and `version`. The latter is
+   the schema version and is currently "1.0.0".
+3. The CPD XML report will now also contain recoverable errors as additional `<error>` elements.

 See [Report formats for CPD](pmd_userdocs_cpd_report_formats.html#xml) for an example.

+The XML format should be compatible as only attributes and elements have been added. However, if you parse
+the document with a namespace aware parser, you might encounter some issues like no elements being found.
+In case the new format doesn't work for you (e.g. namespaces, unexpected error elements), you can
+go back using the old format with the renderer "xmlold" ({%jdoc core::cpd.XMLOldRenderer %}). Note, that
+this old renderer is deprecated and only there for compatibility reasons. Whatever tooling is used to
+read the XML format should be updated.
+
+#### Deprecated for removal
+
+* {%jdoc !!core::cpd.XMLOldRenderer %} (the CPD format "xmlold").
+
 ### ✨ External Contributions

 {% endtocmaker %}
--- a/pmd-ant/src/main/java/net/sourceforge/pmd/ant/CPDTask.java
+++ b/pmd-ant/src/main/java/net/sourceforge/pmd/ant/CPDTask.java
@ -29,6 +29,7 @@ import net.sourceforge.pmd.cpd.CPDReportRenderer;
 import net.sourceforge.pmd.cpd.CSVRenderer;
 import net.sourceforge.pmd.cpd.CpdAnalysis;
 import net.sourceforge.pmd.cpd.SimpleRenderer;
+import net.sourceforge.pmd.cpd.XMLOldRenderer;
 import net.sourceforge.pmd.cpd.XMLRenderer;
 import net.sourceforge.pmd.lang.Language;
 import net.sourceforge.pmd.lang.LanguageRegistry;
@ -66,6 +67,8 @@ public class CPDTask extends Task {

    private static final String TEXT_FORMAT = "text";
    private static final String XML_FORMAT = "xml";
+    @Deprecated
+    private static final String XMLOLD_FORMAT = "xmlold";
    private static final String CSV_FORMAT = "csv";

    private String format = TEXT_FORMAT;
@ -177,6 +180,8 @@ public class CPDTask extends Task {
            return new SimpleRenderer();
        } else if (CSV_FORMAT.equals(format)) {
            return new CSVRenderer();
+        } else if (XMLOLD_FORMAT.equals(format)) {
+            return new XMLOldRenderer();
        }
        return new XMLRenderer();
    }
@ -253,7 +258,7 @@ public class CPDTask extends Task {
    }

    public static class FormatAttribute extends EnumeratedAttribute {
-        private static final String[] FORMATS = new String[] { XML_FORMAT, TEXT_FORMAT, CSV_FORMAT };
+        private static final String[] FORMATS = new String[] { XML_FORMAT, TEXT_FORMAT, CSV_FORMAT, XMLOLD_FORMAT };

        @Override
        public String[] getValues() {
--- a/pmd-cli/src/test/java/net/sourceforge/pmd/cli/CpdCliTest.java
+++ b/pmd-cli/src/test/java/net/sourceforge/pmd/cli/CpdCliTest.java
@ -10,7 +10,6 @@ import static net.sourceforge.pmd.util.CollectionUtil.listOf;
 import static org.hamcrest.CoreMatchers.startsWith;
 import static org.hamcrest.Matchers.containsString;
 import static org.hamcrest.Matchers.emptyString;
-import static org.hamcrest.Matchers.equalTo;
 import static org.hamcrest.Matchers.not;

 import java.nio.charset.StandardCharsets;
--- a/pmd-core/etc/xslt/cpdhtml-v2.xslt
+++ b/pmd-core/etc/xslt/cpdhtml-v2.xslt
@ -1,5 +1,5 @@
 <?xml version="1.0" encoding="utf-8"?>
-<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform" >
+<xsl:stylesheet version="2.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform" xpath-default-namespace="https://pmd-code.org/schema/cpd-report">
 <!--
   PMD CPD (Copy and Paste Detector) XML to HTML transformer 
 -->
--- a/pmd-core/etc/xslt/cpdhtml.xslt
+++ b/pmd-core/etc/xslt/cpdhtml.xslt
@ -1,7 +1,7 @@
 <?xml version="1.0" encoding="utf-8"?>
 <!-- Stylesheet to turn the XML output of CPD into a nice-looking HTML page -->
 <!-- $Id$ -->
-<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="2.0">
+<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" xpath-default-namespace="https://pmd-code.org/schema/cpd-report" version="2.0">
 <xsl:output method="html" encoding="utf-8" doctype-system="about:legacy-compat"/>
 <xsl:param name="lines" required="yes">30</xsl:param>

--- a/pmd-core/src/main/java/net/sourceforge/pmd/cpd/CPDConfiguration.java
+++ b/pmd-core/src/main/java/net/sourceforge/pmd/cpd/CPDConfiguration.java
@ -39,6 +39,7 @@ public class CPDConfiguration extends AbstractConfiguration {
    static {
        RENDERERS.put(DEFAULT_RENDERER, SimpleRenderer.class);
        RENDERERS.put("xml", XMLRenderer.class);
+        RENDERERS.put("xmlold", XMLOldRenderer.class);
        RENDERERS.put("csv", CSVRenderer.class);
        RENDERERS.put("csv_with_linecount_per_file", CSVWithLinecountPerFileRenderer.class);
        RENDERERS.put("vs", VSRenderer.class);
--- a/pmd-core/src/main/java/net/sourceforge/pmd/cpd/XMLOldRenderer.java
+++ b/pmd-core/src/main/java/net/sourceforge/pmd/cpd/XMLOldRenderer.java
@ -0,0 +1,34 @@
+/**
+ * BSD-style license; for more info see http://pmd.sourceforge.net/license.html
+ */
+
+package net.sourceforge.pmd.cpd;
+
+import java.io.IOException;
+import java.io.Writer;
+
+/**
+ * Provides backwards compatible XML renderer, which doesn't use namespaces, schema and
+ * doesn't output error information.
+ *
+ * <p>This renderer is available as "xmlold".
+ *
+ * @deprecated Update your tools to use the standard XML renderer "xml" again.
+ */
+@Deprecated
+public class XMLOldRenderer implements CPDReportRenderer {
+    private final XMLRenderer xmlRenderer;
+
+    public XMLOldRenderer() {
+        this(null);
+    }
+
+    public XMLOldRenderer(String encoding) {
+        this.xmlRenderer = new XMLRenderer(encoding, false);
+    }
+
+    @Override
+    public void render(CPDReport report, Writer writer) throws IOException {
+        xmlRenderer.render(report, writer);
+    }
+}
--- a/pmd-core/src/main/java/net/sourceforge/pmd/cpd/XMLRenderer.java
+++ b/pmd-core/src/main/java/net/sourceforge/pmd/cpd/XMLRenderer.java
@ -42,11 +42,13 @@ public final class XMLRenderer implements CPDReportRenderer {

    private String encoding;

+    private final boolean newFormat;
+
    /**
     * Creates a XML Renderer with the default (platform dependent) encoding.
     */
    public XMLRenderer() {
-        this(null);
+        this(null, true);
    }

    /**
@ -57,7 +59,12 @@ public final class XMLRenderer implements CPDReportRenderer {
     *            dependent) encoding is used.
     */
    public XMLRenderer(String encoding) {
+        this(encoding, true);
+    }
+
+    XMLRenderer(String encoding, boolean newFormat) {
        setEncoding(encoding);
+        this.newFormat = newFormat;
    }

    public void setEncoding(String encoding) {
@ -90,7 +97,11 @@ public final class XMLRenderer implements CPDReportRenderer {
            transformer.setOutputProperty(OutputKeys.METHOD, "xml");
            transformer.setOutputProperty(OutputKeys.ENCODING, encoding);
            transformer.setOutputProperty(OutputKeys.INDENT, "yes");
-            transformer.setOutputProperty(OutputKeys.CDATA_SECTION_ELEMENTS, "{" + NAMESPACE_URI + "}codefragment");
+            if (newFormat) {
+                transformer.setOutputProperty(OutputKeys.CDATA_SECTION_ELEMENTS, "{" + NAMESPACE_URI + "}codefragment");
+            } else {
+                transformer.setOutputProperty(OutputKeys.CDATA_SECTION_ELEMENTS, "codefragment");
+            }
            transformer.transform(new DOMSource(doc), new StreamResult(writer));
        } catch (TransformerException e) {
            throw new IllegalStateException(e);
@ -101,19 +112,22 @@ public final class XMLRenderer implements CPDReportRenderer {
    @Override
    public void render(final CPDReport report, final Writer writer) throws IOException {
        final Document doc = createDocument();
-        final Element root = doc.createElementNS(NAMESPACE_URI, "pmd-cpd");
-        root.setAttributeNS(XMLConstants.W3C_XML_SCHEMA_INSTANCE_NS_URI, "xsi:schemaLocation", NAMESPACE_URI + " " + NAMESPACE_LOCATION);
+        final Element root = createElement(doc, "pmd-cpd");
+
+        if (newFormat) {
+            root.setAttributeNS(XMLConstants.W3C_XML_SCHEMA_INSTANCE_NS_URI, "xsi:schemaLocation", NAMESPACE_URI + " " + NAMESPACE_LOCATION);
+            root.setAttributeNS(NAMESPACE_URI, "version", SCHEMA_VERSION);
+            root.setAttributeNS(NAMESPACE_URI, "pmdVersion", PMDVersion.VERSION);
+            root.setAttributeNS(NAMESPACE_URI, "timestamp", OffsetDateTime.now().format(DateTimeFormatter.ISO_OFFSET_DATE_TIME));
+        }

-        root.setAttributeNS(NAMESPACE_URI, "version", SCHEMA_VERSION);
-        root.setAttributeNS(NAMESPACE_URI, "pmdVersion", PMDVersion.VERSION);
-        root.setAttributeNS(NAMESPACE_URI, "timestamp", OffsetDateTime.now().format(DateTimeFormatter.ISO_OFFSET_DATE_TIME));
        final Map<FileId, Integer> numberOfTokensPerFile = report.getNumberOfTokensPerFile();
        doc.appendChild(root);

        for (final Map.Entry<FileId, Integer> pair : numberOfTokensPerFile.entrySet()) {
-            final Element fileElement = doc.createElementNS(NAMESPACE_URI, "file");
-            fileElement.setAttributeNS(NAMESPACE_URI, "path", report.getDisplayName(pair.getKey()));
-            fileElement.setAttributeNS(NAMESPACE_URI, "totalNumberOfTokens", String.valueOf(pair.getValue()));
+            final Element fileElement = createElement(doc, "file");
+            setAttribute(fileElement, "path", report.getDisplayName(pair.getKey()));
+            setAttribute(fileElement, "totalNumberOfTokens", String.valueOf(pair.getValue()));
            root.appendChild(fileElement);
        }

@ -124,12 +138,14 @@ public final class XMLRenderer implements CPDReportRenderer {
            root.appendChild(dupElt);
        }

-        for (Report.ProcessingError error : report.getProcessingErrors()) {
-            Element errorElt = doc.createElementNS(NAMESPACE_URI, "error");
-            errorElt.setAttributeNS(NAMESPACE_URI, "filename", report.getDisplayName(error.getFileId()));
-            errorElt.setAttributeNS(NAMESPACE_URI, "msg", error.getMsg());
-            errorElt.setTextContent(error.getDetail());
-            root.appendChild(errorElt);
+        if (newFormat) {
+            for (Report.ProcessingError error : report.getProcessingErrors()) {
+                Element errorElt = doc.createElementNS(NAMESPACE_URI, "error");
+                errorElt.setAttributeNS(NAMESPACE_URI, "filename", report.getDisplayName(error.getFileId()));
+                errorElt.setAttributeNS(NAMESPACE_URI, "msg", error.getMsg());
+                errorElt.setTextContent(error.getDetail());
+                root.appendChild(errorElt);
+            }
        }

        dumpDocToWriter(doc, writer);
@ -138,17 +154,17 @@ public final class XMLRenderer implements CPDReportRenderer {

    private void addFilesToDuplicationElement(Document doc, Element duplication, Match match, CPDReport report) {
        for (Mark mark : match) {
-            final Element file = doc.createElementNS(NAMESPACE_URI, "file");
+            final Element file = createElement(doc, "file");
            FileLocation loc = mark.getLocation();
-            file.setAttributeNS(NAMESPACE_URI, "line", String.valueOf(loc.getStartLine()));
+            setAttribute(file, "line", String.valueOf(loc.getStartLine()));
            // only remove invalid characters, escaping is done by the DOM impl.
            String filenameXml10 = StringUtil.removedInvalidXml10Characters(report.getDisplayName(loc.getFileId()));
-            file.setAttributeNS(NAMESPACE_URI, "path", filenameXml10);
-            file.setAttributeNS(NAMESPACE_URI, "endline", String.valueOf(loc.getEndLine()));
-            file.setAttributeNS(NAMESPACE_URI, "column", String.valueOf(loc.getStartColumn()));
-            file.setAttributeNS(NAMESPACE_URI, "endcolumn", String.valueOf(loc.getEndColumn()));
-            file.setAttributeNS(NAMESPACE_URI, "begintoken", String.valueOf(mark.getBeginTokenIndex()));
-            file.setAttributeNS(NAMESPACE_URI, "endtoken", String.valueOf(mark.getEndTokenIndex()));
+            setAttribute(file, "path", filenameXml10);
+            setAttribute(file, "endline", String.valueOf(loc.getEndLine()));
+            setAttribute(file, "column", String.valueOf(loc.getStartColumn()));
+            setAttribute(file, "endcolumn", String.valueOf(loc.getEndColumn()));
+            setAttribute(file, "begintoken", String.valueOf(mark.getBeginTokenIndex()));
+            setAttribute(file, "endtoken", String.valueOf(mark.getEndTokenIndex()));
            duplication.appendChild(file);
        }
    }
@ -158,7 +174,7 @@ public final class XMLRenderer implements CPDReportRenderer {
        if (codeSnippet != null) {
            // the code snippet has normalized line endings
            String platformSpecific = codeSnippet.toString().replace("\n", System.lineSeparator());
-            Element codefragment = doc.createElementNS(NAMESPACE_URI, "codefragment");
+            Element codefragment = createElement(doc, "codefragment");
            // only remove invalid characters, escaping is not necessary in CDATA.
            // if the string contains the end marker of a CDATA section, then the DOM impl will
            // create two cdata sections automatically.
@ -168,9 +184,24 @@ public final class XMLRenderer implements CPDReportRenderer {
    }

    private Element createDuplicationElement(Document doc, Match match) {
-        Element duplication = doc.createElementNS(NAMESPACE_URI, "duplication");
-        duplication.setAttributeNS(NAMESPACE_URI, "lines", String.valueOf(match.getLineCount()));
-        duplication.setAttributeNS(NAMESPACE_URI, "tokens", String.valueOf(match.getTokenCount()));
+        Element duplication = createElement(doc, "duplication");
+        setAttribute(duplication, "lines", String.valueOf(match.getLineCount()));
+        setAttribute(duplication, "tokens", String.valueOf(match.getTokenCount()));
        return duplication;
    }
+
+    private Element createElement(Document doc, String name) {
+        if (newFormat) {
+            return doc.createElementNS(NAMESPACE_URI, name);
+        }
+        return doc.createElement(name);
+    }
+
+    private void setAttribute(Element element, String name, String value) {
+        if (newFormat) {
+            element.setAttributeNS(NAMESPACE_URI, name, value);
+        } else {
+            element.setAttribute(name, value);
+        }
+    }
 }
--- a/pmd-core/src/test/java/net/sourceforge/pmd/cpd/XMLOldRendererTest.java
+++ b/pmd-core/src/test/java/net/sourceforge/pmd/cpd/XMLOldRendererTest.java
@ -0,0 +1,88 @@
+/*
+ * BSD-style license; for more info see http://pmd.sourceforge.net/license.html
+ */
+
+package net.sourceforge.pmd.cpd;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+
+import java.io.ByteArrayInputStream;
+import java.io.IOException;
+import java.io.StringWriter;
+import java.util.Collections;
+import javax.xml.parsers.DocumentBuilderFactory;
+import javax.xml.parsers.ParserConfigurationException;
+
+import org.junit.jupiter.api.Test;
+import org.w3c.dom.Document;
+import org.w3c.dom.Node;
+import org.w3c.dom.NodeList;
+import org.xml.sax.SAXException;
+
+import net.sourceforge.pmd.lang.document.FileId;
+
+class XMLOldRendererTest {
+    private static final String ENCODING = (String) System.getProperties().get("file.encoding");
+
+    @Test
+    void testWithNoDuplication() throws IOException, ParserConfigurationException, SAXException {
+        CPDReportRenderer renderer = new XMLOldRenderer();
+        StringWriter sw = new StringWriter();
+        renderer.render(CpdTestUtils.makeReport(Collections.emptyList()), sw);
+        String report = sw.toString();
+
+        assertEquals("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<pmd-cpd/>\n",
+                report,
+                "no namespace expected");
+
+        Document doc = DocumentBuilderFactory.newInstance().newDocumentBuilder()
+                .parse(new ByteArrayInputStream(report.getBytes(ENCODING)));
+        NodeList nodes = doc.getChildNodes();
+        Node n = nodes.item(0);
+        assertEquals("pmd-cpd", n.getNodeName());
+        assertEquals(0, doc.getElementsByTagName("duplication").getLength());
+    }
+
+    @Test
+    void testWithOneDuplication() throws Exception {
+        CPDReportRenderer renderer = new XMLOldRenderer();
+        CpdTestUtils.CpdReportBuilder builder = new CpdTestUtils.CpdReportBuilder();
+        int lineCount = 6;
+        FileId foo1 = CpdTestUtils.FOO_FILE_ID;
+        Mark mark1 = builder.createMark("public", foo1, 1, lineCount);
+        Mark mark2 = builder.createMark("stuff", foo1, 73, lineCount);
+        builder.addMatch(new Match(75, mark1, mark2));
+
+        StringWriter sw = new StringWriter();
+        renderer.render(builder.build(), sw);
+        String report = sw.toString();
+
+        Document doc = DocumentBuilderFactory.newInstance().newDocumentBuilder()
+                .parse(new ByteArrayInputStream(report.getBytes(ENCODING)));
+        NodeList dupes = doc.getElementsByTagName("duplication");
+        assertEquals(1, dupes.getLength());
+        Node file = dupes.item(0).getFirstChild();
+        while (file != null && file.getNodeType() != Node.ELEMENT_NODE) {
+            file = file.getNextSibling();
+        }
+        if (file != null) {
+            assertEquals("1", file.getAttributes().getNamedItem("line").getNodeValue());
+            assertEquals(foo1.getAbsolutePath(), file.getAttributes().getNamedItem("path").getNodeValue());
+            assertEquals("6", file.getAttributes().getNamedItem("endline").getNodeValue());
+            assertEquals("1", file.getAttributes().getNamedItem("column").getNodeValue());
+            assertEquals("1", file.getAttributes().getNamedItem("endcolumn").getNodeValue());
+            file = file.getNextSibling();
+            while (file != null && file.getNodeType() != Node.ELEMENT_NODE) {
+                file = file.getNextSibling();
+            }
+        }
+        if (file != null) {
+            assertEquals("73", file.getAttributes().getNamedItem("line").getNodeValue());
+            assertEquals("78", file.getAttributes().getNamedItem("endline").getNodeValue());
+            assertEquals("1", file.getAttributes().getNamedItem("column").getNodeValue());
+            assertEquals("1", file.getAttributes().getNamedItem("endcolumn").getNodeValue());
+        }
+        assertEquals(1, doc.getElementsByTagName("codefragment").getLength());
+        assertEquals(CpdTestUtils.generateDummyContent(lineCount), doc.getElementsByTagName("codefragment").item(0).getTextContent());
+    }
+}