Implement DOMLineNumbers to determine the line numbers after the parsing
This commit is contained in:
@ -0,0 +1,155 @@
|
||||
/**
|
||||
* BSD-style license; for more info see http://pmd.sourceforge.net/license.html
|
||||
*/
|
||||
package net.sourceforge.pmd.lang.xml.ast;
|
||||
|
||||
import java.util.Map;
|
||||
import java.util.TreeMap;
|
||||
import java.util.regex.Matcher;
|
||||
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.w3c.dom.Document;
|
||||
import org.w3c.dom.DocumentType;
|
||||
import org.w3c.dom.EntityReference;
|
||||
import org.w3c.dom.NamedNodeMap;
|
||||
import org.w3c.dom.Node;
|
||||
import org.w3c.dom.NodeList;
|
||||
import org.w3c.dom.ProcessingInstruction;
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
class DOMLineNumbers {
|
||||
private final Document document;
|
||||
private final String xmlString;
|
||||
private Map<Integer, Integer> lines;
|
||||
|
||||
public DOMLineNumbers(Document document, String xmlString) {
|
||||
this.document = document;
|
||||
this.xmlString = xmlString;
|
||||
}
|
||||
|
||||
public void determine() {
|
||||
calculateLinesMap();
|
||||
determineLocation(document, 0);
|
||||
}
|
||||
private int determineLocation(Node n, int index) {
|
||||
int nextIndex = index;
|
||||
if (n.getNodeType() == Node.DOCUMENT_TYPE_NODE) {
|
||||
nextIndex = xmlString.indexOf("<!DOCTYPE", nextIndex);
|
||||
} else if (n.getNodeType() == Node.COMMENT_NODE) {
|
||||
nextIndex = xmlString.indexOf("<!--", nextIndex);
|
||||
} else if (n.getNodeType() == Node.ELEMENT_NODE) {
|
||||
nextIndex = xmlString.indexOf("<" + n.getNodeName(), nextIndex);
|
||||
} else if (n.getNodeType() == Node.CDATA_SECTION_NODE) {
|
||||
nextIndex = xmlString.indexOf("<![CDATA[", nextIndex);
|
||||
} else if (n.getNodeType() == Node.PROCESSING_INSTRUCTION_NODE) {
|
||||
ProcessingInstruction pi = (ProcessingInstruction)n;
|
||||
nextIndex = xmlString.indexOf("<?" + pi.getTarget(), nextIndex);
|
||||
} else if (n.getNodeType() == Node.TEXT_NODE) {
|
||||
String te = unexpandEntities(n, n.getNodeValue());
|
||||
int newIndex = xmlString.indexOf(te, nextIndex);
|
||||
if (newIndex > 0) {
|
||||
nextIndex = newIndex;
|
||||
} else {
|
||||
System.out.println("Still not found: " + n.getNodeValue());
|
||||
}
|
||||
} else if (n.getNodeType() == Node.ENTITY_REFERENCE_NODE) {
|
||||
nextIndex = xmlString.indexOf("&" + n.getNodeName() + ";", nextIndex);
|
||||
}
|
||||
setBeginLocation(n, nextIndex);
|
||||
if (n.hasChildNodes()) {
|
||||
NodeList childs = n.getChildNodes();
|
||||
for (int i = 0; i < childs.getLength(); i++) {
|
||||
nextIndex = determineLocation(childs.item(i), nextIndex);
|
||||
}
|
||||
}
|
||||
if (n.getNodeType() == Node.ELEMENT_NODE) {
|
||||
nextIndex += 2 + n.getNodeName().length() + 1; // </nodename>
|
||||
} else if (n.getNodeType() == Node.DOCUMENT_TYPE_NODE) {
|
||||
Node nextSibling = n.getNextSibling();
|
||||
if (nextSibling.getNodeType() == Node.ELEMENT_NODE) {
|
||||
nextIndex = xmlString.indexOf("<" + nextSibling.getNodeName(), nextIndex) - 1;
|
||||
} else if (nextSibling.getNodeType() == Node.COMMENT_NODE) {
|
||||
nextIndex = xmlString.indexOf("<!--", nextIndex);
|
||||
} else {
|
||||
nextIndex = xmlString.indexOf(">", nextIndex);
|
||||
}
|
||||
} else if (n.getNodeType() == Node.COMMENT_NODE) {
|
||||
nextIndex += 4 + 3; // <!-- and -->
|
||||
nextIndex += n.getNodeValue().length();
|
||||
} else if (n.getNodeType() == Node.TEXT_NODE) {
|
||||
String te = unexpandEntities(n, n.getNodeValue());
|
||||
nextIndex += te.length();
|
||||
} else if (n.getNodeType() == Node.CDATA_SECTION_NODE) {
|
||||
nextIndex += "<![CDATA[".length() + n.getNodeValue().length() + "]]>".length();
|
||||
} else if (n.getNodeType() == Node.PROCESSING_INSTRUCTION_NODE) {
|
||||
ProcessingInstruction pi = (ProcessingInstruction)n;
|
||||
nextIndex += "<?".length() + pi.getTarget().length() + "?>".length() + pi.getData().length();
|
||||
}
|
||||
setEndLocation(n, nextIndex - 1);
|
||||
return nextIndex;
|
||||
}
|
||||
|
||||
private String unexpandEntities(Node n, String te) {
|
||||
String result = te;
|
||||
DocumentType doctype = n.getOwnerDocument().getDoctype();
|
||||
// implicit entities
|
||||
result = result.replaceAll(Matcher.quoteReplacement("&"), "&");
|
||||
result = result.replaceAll(Matcher.quoteReplacement("<"), "<");
|
||||
result = result.replaceAll(Matcher.quoteReplacement(">"), ">");
|
||||
result = result.replaceAll(Matcher.quoteReplacement("\""), """);
|
||||
result = result.replaceAll(Matcher.quoteReplacement("'"), "'");
|
||||
|
||||
if (doctype != null) {
|
||||
NamedNodeMap entities = doctype.getEntities();
|
||||
for (int i = 0; i < entities.getLength(); i++) {
|
||||
Node item = entities.item(i);
|
||||
result = result.replaceAll(Matcher.quoteReplacement(item.getFirstChild().getNodeValue()), "&" + item.getNodeName() + ";");
|
||||
}
|
||||
|
||||
}
|
||||
return result;
|
||||
}
|
||||
private void setBeginLocation(Node n, int index) {
|
||||
if (n != null) {
|
||||
n.setUserData(XmlNode.BEGIN_LINE, toLine(index), null);
|
||||
n.setUserData(XmlNode.BEGIN_COLUMN, toColumn(index), null);
|
||||
}
|
||||
}
|
||||
private void setEndLocation(Node n, int index) {
|
||||
if (n != null) {
|
||||
n.setUserData(XmlNode.END_LINE, toLine(index), null);
|
||||
n.setUserData(XmlNode.END_COLUMN, toColumn(index), null);
|
||||
}
|
||||
}
|
||||
|
||||
private void calculateLinesMap() {
|
||||
lines = new TreeMap<Integer, Integer>();
|
||||
int index = -1;
|
||||
int count = StringUtils.countMatches(xmlString, "\n");
|
||||
for (int line = 1; line <= count; line++) {
|
||||
lines.put(line, index + 1);
|
||||
index = xmlString.indexOf("\n", index + 1);
|
||||
}
|
||||
lines.put(count + 1, index + 1);
|
||||
}
|
||||
|
||||
private int toLine(int index) {
|
||||
int line = 1;
|
||||
for (Map.Entry<Integer, Integer> e : lines.entrySet()) {
|
||||
line = e.getKey();
|
||||
if (e.getValue() > index) {
|
||||
line--;
|
||||
break;
|
||||
}
|
||||
}
|
||||
return line;
|
||||
}
|
||||
private int toColumn(int index) {
|
||||
int line = toLine(index);
|
||||
int column = index - lines.get(line);
|
||||
return column + 1;
|
||||
}
|
||||
|
||||
}
|
@ -5,6 +5,7 @@ package net.sourceforge.pmd.lang.xml.ast;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
import java.io.StringReader;
|
||||
import java.lang.reflect.Proxy;
|
||||
import java.util.Arrays;
|
||||
import java.util.HashMap;
|
||||
@ -12,19 +13,19 @@ import java.util.LinkedHashSet;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
import javax.xml.parsers.DocumentBuilder;
|
||||
import javax.xml.parsers.DocumentBuilderFactory;
|
||||
import javax.xml.parsers.ParserConfigurationException;
|
||||
import javax.xml.parsers.SAXParser;
|
||||
import javax.xml.parsers.SAXParserFactory;
|
||||
|
||||
import net.sourceforge.pmd.lang.ast.ParseException;
|
||||
import net.sourceforge.pmd.lang.ast.RootNode;
|
||||
import net.sourceforge.pmd.lang.xml.XmlParserOptions;
|
||||
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.w3c.dom.Document;
|
||||
import org.w3c.dom.Node;
|
||||
import org.xml.sax.InputSource;
|
||||
import org.xml.sax.SAXException;
|
||||
import org.xml.sax.XMLReader;
|
||||
|
||||
public class XmlParser {
|
||||
protected final XmlParserOptions parserOptions;
|
||||
@ -37,23 +38,24 @@ public class XmlParser {
|
||||
protected Document parseDocument(Reader reader) throws ParseException {
|
||||
nodeCache.clear();
|
||||
try {
|
||||
SAXParserFactory saxParserFactory = SAXParserFactory.newInstance();
|
||||
saxParserFactory.setFeature("http://xml.org/sax/features/external-general-entities", false);
|
||||
saxParserFactory.setFeature("http://xml.org/sax/features/external-parameter-entities", false);
|
||||
saxParserFactory.setNamespaceAware(parserOptions.isNamespaceAware());
|
||||
saxParserFactory.setValidating(parserOptions.isValidating());
|
||||
saxParserFactory.setXIncludeAware(parserOptions.isXincludeAware());
|
||||
SAXParser saxParser = saxParserFactory.newSAXParser();
|
||||
String xmlData = IOUtils.toString(reader);
|
||||
|
||||
LineNumberAwareSaxHandler handler = new LineNumberAwareSaxHandler(parserOptions);
|
||||
XMLReader xmlReader = saxParser.getXMLReader();
|
||||
xmlReader.setContentHandler(handler);
|
||||
xmlReader.setProperty("http://xml.org/sax/properties/lexical-handler", handler);
|
||||
xmlReader.setProperty("http://xml.org/sax/properties/declaration-handler", handler);
|
||||
xmlReader.setEntityResolver(parserOptions.getEntityResolver());
|
||||
|
||||
xmlReader.parse(new InputSource(reader));
|
||||
return handler.getDocument();
|
||||
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
|
||||
dbf.setNamespaceAware(parserOptions.isNamespaceAware());
|
||||
dbf.setValidating(parserOptions.isValidating());
|
||||
dbf.setIgnoringComments(parserOptions.isIgnoringComments());
|
||||
dbf.setIgnoringElementContentWhitespace(parserOptions.isIgnoringElementContentWhitespace());
|
||||
dbf.setExpandEntityReferences(parserOptions.isExpandEntityReferences());
|
||||
dbf.setCoalescing(parserOptions.isCoalescing());
|
||||
dbf.setXIncludeAware(parserOptions.isXincludeAware());
|
||||
dbf.setFeature("http://xml.org/sax/features/external-general-entities", false);
|
||||
dbf.setFeature("http://xml.org/sax/features/external-parameter-entities", false);
|
||||
DocumentBuilder documentBuilder = dbf.newDocumentBuilder();
|
||||
documentBuilder.setEntityResolver(parserOptions.getEntityResolver());
|
||||
Document document = documentBuilder.parse(new InputSource(new StringReader(xmlData)));
|
||||
DOMLineNumbers lineNumbers = new DOMLineNumbers(document, xmlData);
|
||||
lineNumbers.determine();
|
||||
return document;
|
||||
} catch (ParserConfigurationException e) {
|
||||
throw new ParseException(e);
|
||||
} catch (SAXException e) {
|
||||
|
@ -12,6 +12,7 @@ import java.util.Iterator;
|
||||
import net.sourceforge.pmd.lang.LanguageRegistry;
|
||||
import net.sourceforge.pmd.lang.LanguageVersionHandler;
|
||||
import net.sourceforge.pmd.lang.Parser;
|
||||
import net.sourceforge.pmd.lang.ParserOptions;
|
||||
import net.sourceforge.pmd.lang.ast.Node;
|
||||
import net.sourceforge.pmd.lang.ast.xpath.Attribute;
|
||||
import net.sourceforge.pmd.lang.xml.ast.XmlNode;
|
||||
@ -274,7 +275,7 @@ public class XmlParserTest {
|
||||
|
||||
assertNode(document, "document", 1);
|
||||
Node rootElement = document.jjtGetChild(0);
|
||||
assertNode(rootElement, "pmd:rootElement", 7);
|
||||
assertNode(rootElement, "pmd:rootElement", 7, "xmlns:pmd", "http://pmd.sf.net");
|
||||
Assert.assertEquals("http://pmd.sf.net", ((XmlNode)rootElement).getNode().getNamespaceURI());
|
||||
Assert.assertEquals("pmd", ((XmlNode)rootElement).getNode().getPrefix());
|
||||
Assert.assertEquals("rootElement", ((XmlNode)rootElement).getNode().getLocalName());
|
||||
@ -353,6 +354,19 @@ public class XmlParserTest {
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testWithProcessingInstructions() {
|
||||
String xml = "<?xml version=\"1.0\"?><?mypi?><!DOCTYPE testDoc [<!ENTITY myentity \"e\">]><!--Comment--><foo abc=\"abc\"><bar>TEXT</bar><![CDATA[cdata!]]>>&myentity;<</foo>";
|
||||
LanguageVersionHandler xmlVersionHandler = LanguageRegistry.getLanguage(XmlLanguageModule.NAME).getDefaultVersion().getLanguageVersionHandler();
|
||||
XmlParserOptions options = (XmlParserOptions)xmlVersionHandler.getDefaultParserOptions();
|
||||
options.setExpandEntityReferences(false);
|
||||
Parser parser = xmlVersionHandler.getParser(options);
|
||||
Node document = parser.parse(null, new StringReader(xml));
|
||||
Assert.assertNotNull(document);
|
||||
assertNode(document.jjtGetChild(0), "mypi", 0);
|
||||
assertLineNumbers(document.jjtGetChild(0), 1, 22, 1, 29);
|
||||
}
|
||||
|
||||
/**
|
||||
* Asserts a single node inclusive attributes.
|
||||
* @param node the node
|
||||
|
@ -77,10 +77,8 @@ public class AbstractDomXmlRuleTest {
|
||||
// assertEquals(0, visited.size());
|
||||
|
||||
visited = rule.visitedNodes.get("EntityReference");
|
||||
assertEquals(3, visited.size());
|
||||
assertEquals("gt", ((EntityReference) visited.get(0)).getNodeName());
|
||||
assertEquals("entity", ((EntityReference) visited.get(1)).getNodeName());
|
||||
assertEquals("lt", ((EntityReference) visited.get(2)).getNodeName());
|
||||
assertEquals(1, visited.size());
|
||||
assertEquals("entity", ((EntityReference) visited.get(0)).getNodeName());
|
||||
|
||||
// TODO Figure out how to trigger this.
|
||||
// visited = rule.visitedNodes.get("Notation");
|
||||
@ -92,9 +90,11 @@ public class AbstractDomXmlRuleTest {
|
||||
((ProcessingInstruction) visited.get(0)).getTarget());
|
||||
|
||||
visited = rule.visitedNodes.get("Text");
|
||||
assertEquals(2, visited.size());
|
||||
assertEquals(4, visited.size());
|
||||
assertEquals("TEXT", ((Text) visited.get(0)).getData());
|
||||
assertEquals("e", ((Text) visited.get(1)).getData());
|
||||
assertEquals(">", ((Text) visited.get(1)).getData());
|
||||
assertEquals("e", ((Text) visited.get(2)).getData());
|
||||
assertEquals("<", ((Text) visited.get(3)).getData());
|
||||
}
|
||||
|
||||
@Test
|
||||
|
Reference in New Issue
Block a user