[core] Introduce TextNode and CommentNode for XPath/HTML

This commit is contained in:
Andreas Dangel 2023-12-19 20:04:18 +01:00
parent bf66f816f0
commit 411d32607b
No known key found for this signature in database
GPG Key ID: 93450DF2DF9A3FA3
13 changed files with 166 additions and 79 deletions

View File

@ -0,0 +1,17 @@
/*
* BSD-style license; for more info see http://pmd.sourceforge.net/license.html
*/
package net.sourceforge.pmd.lang.rule.xpath;
import net.sourceforge.pmd.lang.ast.Node;
public interface CommentNode extends Node {
String getData();
@Override
default String getXPathNodeName() {
return "#comment";
}
}

View File

@ -0,0 +1,16 @@
/*
* BSD-style license; for more info see http://pmd.sourceforge.net/license.html
*/
package net.sourceforge.pmd.lang.rule.xpath;
import net.sourceforge.pmd.lang.ast.Node;
public interface TextNode extends Node {
String getText();
@Override
default String getXPathNodeName() {
return "#text";
}
}

View File

@ -11,6 +11,7 @@ import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.function.Predicate;
import java.util.stream.Collectors;
import org.apache.commons.lang3.mutable.MutableInt;
import org.checkerframework.checker.nullness.qual.Nullable;
@ -18,6 +19,8 @@ import org.checkerframework.checker.nullness.qual.Nullable;
import net.sourceforge.pmd.lang.ast.Node;
import net.sourceforge.pmd.lang.ast.RootNode;
import net.sourceforge.pmd.lang.rule.xpath.Attribute;
import net.sourceforge.pmd.lang.rule.xpath.CommentNode;
import net.sourceforge.pmd.lang.rule.xpath.TextNode;
import net.sourceforge.pmd.util.CollectionUtil;
import net.sf.saxon.Configuration;
@ -67,13 +70,10 @@ public final class AstElementNode extends BaseNodeInfo implements SiblingCountin
}
private static int determineType(Node node) {
// As of PMD 6.48.0, only the experimental HTML module uses this naming
// convention to identify non-element nodes.
// TODO PMD 7: maybe generalize this to other languages
String name = node.getXPathNodeName();
if ("#text".equals(name)) {
// As of PMD 7, only the HTML module uses these interfaces
if (node instanceof TextNode) {
return Type.TEXT;
} else if ("#comment".equals(name)) {
} else if (node instanceof CommentNode) {
return Type.COMMENT;
}
return Type.ELEMENT;
@ -209,8 +209,12 @@ public final class AstElementNode extends BaseNodeInfo implements SiblingCountin
@Override
public CharSequence getStringValueCS() {
if (getNodeKind() == Type.TEXT || getNodeKind() == Type.COMMENT) {
return getUnderlyingNode().getImage();
Node node = getUnderlyingNode();
if (node instanceof TextNode) {
return ((TextNode) node).getText();
}
if (node instanceof CommentNode) {
return ((CommentNode) node).getData();
}
// https://www.w3.org/TR/xpath-datamodel-31/#ElementNode
@ -220,9 +224,11 @@ public final class AstElementNode extends BaseNodeInfo implements SiblingCountin
// descendants, the zero-length string.
// Since we represent all our Nodes as elements, there are no
// text nodes
// TODO: for some languages like html we have text nodes
return "";
// text nodes usually, except for HTML module - there we have
// potentially text nodes
return node.descendants(TextNode.class).toStream()
.map(TextNode::getText)
.collect(Collectors.joining(""));
}
@Override

View File

@ -98,7 +98,14 @@ public class DummyLanguageModule extends SimpleLanguageModuleBase implements Cpd
for (int i = 0; i < text.length(); i++) {
char c = text.charAt(i);
if (c == '(') {
DummyNode node = new DummyNode();
DummyNode node;
if (text.startsWith("#text", i + 1)) {
node = new DummyNode.DummyTextNode();
} else if (text.startsWith("#comment", i + 1)) {
node = new DummyNode.DummyCommentNode();
} else {
node = new DummyNode();
}
node.setParent(top);
top.addChild(node, top.getNumChildren());
// setup coordinates, temporary (will be completed when node closes)

View File

@ -20,6 +20,8 @@ import net.sourceforge.pmd.lang.document.FileId;
import net.sourceforge.pmd.lang.document.TextDocument;
import net.sourceforge.pmd.lang.document.TextRegion;
import net.sourceforge.pmd.lang.rule.xpath.Attribute;
import net.sourceforge.pmd.lang.rule.xpath.CommentNode;
import net.sourceforge.pmd.lang.rule.xpath.TextNode;
public class DummyNode extends AbstractNode<DummyNode, DummyNode> {
@ -171,4 +173,28 @@ public class DummyNode extends AbstractNode<DummyNode, DummyNode> {
super("dummyNodeB");
}
}
public static class DummyTextNode extends DummyNode implements TextNode {
@Override
public String getText() {
return getImage();
}
@Override
public String getXPathNodeName() {
return TextNode.super.getXPathNodeName();
}
}
public static class DummyCommentNode extends DummyNode implements CommentNode {
@Override
public String getData() {
return getImage();
}
@Override
public String getXPathNodeName() {
return CommentNode.super.getXPathNodeName();
}
}
}

View File

@ -7,12 +7,15 @@ package net.sourceforge.pmd.lang.html.ast;
import org.jsoup.nodes.Comment;
public final class ASTHtmlComment extends AbstractHtmlNode<Comment> {
import net.sourceforge.pmd.lang.rule.xpath.CommentNode;
public final class ASTHtmlComment extends AbstractHtmlNode<Comment> implements CommentNode {
ASTHtmlComment(Comment node) {
super(node);
}
@Override
public String getData() {
return node.getData();
}
@ -21,4 +24,9 @@ public final class ASTHtmlComment extends AbstractHtmlNode<Comment> {
protected <P, R> R acceptHtmlVisitor(HtmlVisitor<? super P, ? extends R> visitor, P data) {
return visitor.visit(this, data);
}
@Override
public String getXPathNodeName() {
return CommentNode.super.getXPathNodeName();
}
}

View File

@ -7,7 +7,7 @@ package net.sourceforge.pmd.lang.html.ast;
import org.jsoup.nodes.TextNode;
public class ASTHtmlTextNode extends AbstractHtmlNode<TextNode> {
public class ASTHtmlTextNode extends AbstractHtmlNode<TextNode> implements net.sourceforge.pmd.lang.rule.xpath.TextNode {
ASTHtmlTextNode(TextNode node) {
super(node);
@ -18,11 +18,17 @@ public class ASTHtmlTextNode extends AbstractHtmlNode<TextNode> {
return visitor.visit(this, data);
}
public String getNormalizedText() {
public String getWholeText() {
return node.getWholeText();
}
@Override
public String getText() {
return node.text();
}
public String getText() {
return node.getWholeText();
@Override
public String getXPathNodeName() {
return net.sourceforge.pmd.lang.rule.xpath.TextNode.super.getXPathNodeName();
}
}

View File

@ -27,6 +27,7 @@ abstract class AbstractHtmlNode<T extends Node> extends AbstractNode<AbstractHtm
@Override
public String getXPathNodeName() {
// note: this might return "#text" or "#comment" as well
return node.nodeName();
}

View File

@ -45,7 +45,7 @@ public class HtmlTokenizer implements Tokenizer {
String image = node.getXPathNodeName();
if (node instanceof ASTHtmlTextNode) {
image = ((ASTHtmlTextNode) node).getText();
image = ((ASTHtmlTextNode) node).getWholeText();
}
tokenEntries.recordToken(image, node.getReportLocation());

View File

@ -37,7 +37,7 @@ class LineNumbers {
} else if (n instanceof ASTHtmlXmlDeclaration) {
nextIndex = htmlString.indexOf("<?", nextIndex);
} else if (n instanceof ASTHtmlTextNode) {
textLength = ((ASTHtmlTextNode) n).getText().length();
textLength = ((ASTHtmlTextNode) n).getWholeText().length();
} else if (n instanceof ASTHtmlDocumentType) {
nextIndex = index;
}

View File

@ -1,15 +1,15 @@
+- #document[@NodeName = "#document"]
+- #doctype[@Name = "html", @NodeName = "#doctype", @PublicId = "", @SystemId = ""]
+- #text[@NodeName = "#text", @NormalizedText = " ", @Text = "\n"]
+- #text[@NodeName = "#text", @Text = " ", @WholeText = "\n"]
+- html[@NodeName = "html"]
| +- #text[@NodeName = "#text", @NormalizedText = " ", @Text = "\n "]
| +- #text[@NodeName = "#text", @Text = " ", @WholeText = "\n "]
| +- head[@NodeName = "head"]
| | +- #text[@NodeName = "#text", @NormalizedText = "hello", @Text = "hello"]
| +- #text[@NodeName = "#text", @NormalizedText = " ", @Text = "\n "]
| | +- #text[@NodeName = "#text", @Text = "hello", @WholeText = "hello"]
| +- #text[@NodeName = "#text", @Text = " ", @WholeText = "\n "]
| +- body[@NodeName = "body"]
| | +- #text[@NodeName = "#text", @NormalizedText = " ", @Text = "\n "]
| | +- #text[@NodeName = "#text", @Text = " ", @WholeText = "\n "]
| | +- h1[@NodeName = "h1"]
| | | +- #text[@NodeName = "#text", @NormalizedText = "world", @Text = "world"]
| | +- #text[@NodeName = "#text", @NormalizedText = " ", @Text = "\n "]
| +- #text[@NodeName = "#text", @NormalizedText = " ", @Text = "\n"]
+- #text[@NodeName = "#text", @NormalizedText = " ", @Text = "\n"]
| | | +- #text[@NodeName = "#text", @Text = "world", @WholeText = "world"]
| | +- #text[@NodeName = "#text", @Text = " ", @WholeText = "\n "]
| +- #text[@NodeName = "#text", @Text = " ", @WholeText = "\n"]
+- #text[@NodeName = "#text", @Text = " ", @WholeText = "\n"]

View File

@ -1,9 +1,9 @@
+- #document[@NodeName = "#document"]
+- #declaration[@Name = "xml", @NodeName = "#declaration"]
+- #text[@NodeName = "#text", @NormalizedText = " ", @Text = "\n"]
+- #text[@NodeName = "#text", @Text = " ", @WholeText = "\n"]
+- root[@NodeName = "root"]
| +- #text[@NodeName = "#text", @NormalizedText = " ", @Text = "\n "]
| +- #text[@NodeName = "#text", @Text = " ", @WholeText = "\n "]
| +- child[@NodeName = "child", @attr1 = "value1"]
| | +- #text[@NodeName = "#text", @NormalizedText = "text & entities", @Text = "text & entities"]
| +- #text[@NodeName = "#text", @NormalizedText = " ", @Text = "\n"]
+- #text[@NodeName = "#text", @NormalizedText = " ", @Text = "\n"]
| | +- #text[@NodeName = "#text", @Text = "text & entities", @WholeText = "text & entities"]
| +- #text[@NodeName = "#text", @Text = " ", @WholeText = "\n"]
+- #text[@NodeName = "#text", @Text = " ", @WholeText = "\n"]

View File

@ -1,83 +1,83 @@
+- #document[@NodeName = "#document"]
+- #comment[@Data = " from https://raw.githubusercontent.com/trailheadapps/lwc-recipes-oss/main/src/modules/ui/app/app.html ", @NodeName = "#comment"]
+- #text[@NodeName = "#text", @NormalizedText = " ", @Text = "\n"]
+- #text[@NodeName = "#text", @Text = " ", @WholeText = "\n"]
+- template[@NodeName = "template"]
| +- #text[@NodeName = "#text", @NormalizedText = " ", @Text = "\n "]
| +- #text[@NodeName = "#text", @Text = " ", @WholeText = "\n "]
| +- section[@NodeName = "section", @class = "content container page-background"]
| | +- #text[@NodeName = "#text", @NormalizedText = " ", @Text = "\n "]
| | +- #text[@NodeName = "#text", @Text = " ", @WholeText = "\n "]
| | +- ui-navbar[@NodeName = "ui-navbar", @nav-items = "{navigationItems}", @oncategorychange = "{handleCategoryChange}", @selected-item = "{currentNavigationItem}"]
| | | +- #text[@NodeName = "#text", @NormalizedText = " ", @Text = "\n "]
| | +- #text[@NodeName = "#text", @NormalizedText = " ", @Text = "\n\n "]
| | | +- #text[@NodeName = "#text", @Text = " ", @WholeText = "\n "]
| | +- #text[@NodeName = "#text", @Text = " ", @WholeText = "\n\n "]
| | +- article[@NodeName = "article", @class = "container"]
| | | +- #text[@NodeName = "#text", @NormalizedText = " ", @Text = "\n "]
| | | +- #text[@NodeName = "#text", @Text = " ", @WholeText = "\n "]
| | | +- div[@NodeName = "div"]
| | | | +- #text[@NodeName = "#text", @NormalizedText = " ", @Text = "\n "]
| | | | +- #text[@NodeName = "#text", @Text = " ", @WholeText = "\n "]
| | | | +- template[@NodeName = "template", @if:true = "{navigationItems.hello.visible}"]
| | | | | +- #text[@NodeName = "#text", @NormalizedText = " ", @Text = "\n "]
| | | | | +- #text[@NodeName = "#text", @Text = " ", @WholeText = "\n "]
| | | | | +- recipe-hello[@NodeName = "recipe-hello"]
| | | | | +- #text[@NodeName = "#text", @NormalizedText = " ", @Text = "\n "]
| | | | | +- #text[@NodeName = "#text", @Text = " ", @WholeText = "\n "]
| | | | | +- recipe-hello-binding[@NodeName = "recipe-hello-binding"]
| | | | | +- #text[@NodeName = "#text", @NormalizedText = " ", @Text = "\n "]
| | | | | +- #text[@NodeName = "#text", @Text = " ", @WholeText = "\n "]
| | | | | +- recipe-hello-expressions[@NodeName = "recipe-hello-expressions"]
| | | | | +- #text[@NodeName = "#text", @NormalizedText = " ", @Text = "\n "]
| | | | | +- #text[@NodeName = "#text", @Text = " ", @WholeText = "\n "]
| | | | | +- recipe-hello-expressions-track[@NodeName = "recipe-hello-expressions-track"]
| | | | | +- #text[@NodeName = "#text", @NormalizedText = " ", @Text = "\n "]
| | | | | +- #text[@NodeName = "#text", @Text = " ", @WholeText = "\n "]
| | | | | +- recipe-hello-conditional-rendering[@NodeName = "recipe-hello-conditional-rendering"]
| | | | | +- #text[@NodeName = "#text", @NormalizedText = " ", @Text = "\n "]
| | | | | +- #text[@NodeName = "#text", @Text = " ", @WholeText = "\n "]
| | | | | +- recipe-hello-for-each[@NodeName = "recipe-hello-for-each"]
| | | | | +- recipe-hello-iterator[@NodeName = "recipe-hello-iterator"]
| | | | | +- #text[@NodeName = "#text", @NormalizedText = " ", @Text = "\n "]
| | | | +- #text[@NodeName = "#text", @NormalizedText = " ", @Text = "\n "]
| | | | | +- #text[@NodeName = "#text", @Text = " ", @WholeText = "\n "]
| | | | +- #text[@NodeName = "#text", @Text = " ", @WholeText = "\n "]
| | | | +- template[@NodeName = "template", @if:true = "{navigationItems.composition.visible}"]
| | | | | +- #text[@NodeName = "#text", @NormalizedText = " ", @Text = "\n "]
| | | | | +- #text[@NodeName = "#text", @Text = " ", @WholeText = "\n "]
| | | | | +- recipe-composition-basics[@NodeName = "recipe-composition-basics"]
| | | | | +- #text[@NodeName = "#text", @NormalizedText = " ", @Text = "\n "]
| | | | | +- #text[@NodeName = "#text", @Text = " ", @WholeText = "\n "]
| | | | | +- recipe-composition-iteration[@NodeName = "recipe-composition-iteration"]
| | | | | +- #text[@NodeName = "#text", @NormalizedText = " ", @Text = "\n "]
| | | | | +- #text[@NodeName = "#text", @Text = " ", @WholeText = "\n "]
| | | | | +- recipe-composition-contact-search[@NodeName = "recipe-composition-contact-search"]
| | | | | +- #text[@NodeName = "#text", @NormalizedText = " ", @Text = "\n "]
| | | | | +- #text[@NodeName = "#text", @Text = " ", @WholeText = "\n "]
| | | | | +- recipe-composition-dynamic[@NodeName = "recipe-composition-dynamic"]
| | | | | +- #text[@NodeName = "#text", @NormalizedText = " ", @Text = "\n "]
| | | | +- #text[@NodeName = "#text", @NormalizedText = " ", @Text = "\n "]
| | | | | +- #text[@NodeName = "#text", @Text = " ", @WholeText = "\n "]
| | | | +- #text[@NodeName = "#text", @Text = " ", @WholeText = "\n "]
| | | | +- template[@NodeName = "template", @if:true = "{navigationItems.child.visible}"]
| | | | | +- #text[@NodeName = "#text", @NormalizedText = " ", @Text = "\n "]
| | | | | +- #text[@NodeName = "#text", @Text = " ", @WholeText = "\n "]
| | | | | +- recipe-event-simple[@NodeName = "recipe-event-simple"]
| | | | | +- #text[@NodeName = "#text", @NormalizedText = " ", @Text = "\n "]
| | | | | +- #text[@NodeName = "#text", @Text = " ", @WholeText = "\n "]
| | | | | +- recipe-event-with-data[@NodeName = "recipe-event-with-data"]
| | | | | +- #text[@NodeName = "#text", @NormalizedText = " ", @Text = "\n "]
| | | | | +- #text[@NodeName = "#text", @Text = " ", @WholeText = "\n "]
| | | | | +- recipe-event-bubbling[@NodeName = "recipe-event-bubbling"]
| | | | | +- #text[@NodeName = "#text", @NormalizedText = " ", @Text = "\n "]
| | | | +- #text[@NodeName = "#text", @NormalizedText = " ", @Text = "\n "]
| | | | | +- #text[@NodeName = "#text", @Text = " ", @WholeText = "\n "]
| | | | +- #text[@NodeName = "#text", @Text = " ", @WholeText = "\n "]
| | | | +- template[@NodeName = "template", @if:true = "{navigationItems.parent.visible}"]
| | | | | +- #text[@NodeName = "#text", @NormalizedText = " ", @Text = "\n "]
| | | | | +- #text[@NodeName = "#text", @Text = " ", @WholeText = "\n "]
| | | | | +- recipe-api-property[@NodeName = "recipe-api-property"]
| | | | | +- #text[@NodeName = "#text", @NormalizedText = " ", @Text = "\n "]
| | | | | +- #text[@NodeName = "#text", @Text = " ", @WholeText = "\n "]
| | | | | +- recipe-api-function[@NodeName = "recipe-api-function"]
| | | | | +- #text[@NodeName = "#text", @NormalizedText = " ", @Text = "\n "]
| | | | | +- #text[@NodeName = "#text", @Text = " ", @WholeText = "\n "]
| | | | | +- recipe-api-setter-getter[@NodeName = "recipe-api-setter-getter"]
| | | | | +- #text[@NodeName = "#text", @NormalizedText = " ", @Text = "\n "]
| | | | +- #text[@NodeName = "#text", @NormalizedText = " ", @Text = "\n "]
| | | | | +- #text[@NodeName = "#text", @Text = " ", @WholeText = "\n "]
| | | | +- #text[@NodeName = "#text", @Text = " ", @WholeText = "\n "]
| | | | +- template[@NodeName = "template", @if:true = "{navigationItems.misc.visible}"]
| | | | | +- #text[@NodeName = "#text", @NormalizedText = " ", @Text = "\n "]
| | | | | +- #text[@NodeName = "#text", @Text = " ", @WholeText = "\n "]
| | | | | +- recipe-misc-shared-java-script[@NodeName = "recipe-misc-shared-java-script"]
| | | | | +- #text[@NodeName = "#text", @NormalizedText = " ", @Text = "\n "]
| | | | | +- #text[@NodeName = "#text", @Text = " ", @WholeText = "\n "]
| | | | | +- recipe-misc-rest-api-call[@NodeName = "recipe-misc-rest-api-call"]
| | | | | +- #text[@NodeName = "#text", @NormalizedText = " ", @Text = "\n "]
| | | | | +- #text[@NodeName = "#text", @Text = " ", @WholeText = "\n "]
| | | | | +- recipe-misc-dom-query[@NodeName = "recipe-misc-dom-query"]
| | | | | +- #text[@NodeName = "#text", @NormalizedText = " ", @Text = "\n "]
| | | | | +- #text[@NodeName = "#text", @Text = " ", @WholeText = "\n "]
| | | | | +- recipe-misc-multiple-templates[@NodeName = "recipe-misc-multiple-templates"]
| | | | | +- #text[@NodeName = "#text", @NormalizedText = " ", @Text = "\n "]
| | | | +- #text[@NodeName = "#text", @NormalizedText = " ", @Text = "\n "]
| | | | | +- #text[@NodeName = "#text", @Text = " ", @WholeText = "\n "]
| | | | +- #text[@NodeName = "#text", @Text = " ", @WholeText = "\n "]
| | | | +- template[@NodeName = "template", @if:true = "{navigationItems.party.visible}"]
| | | | | +- #text[@NodeName = "#text", @NormalizedText = " ", @Text = "\n "]
| | | | | +- #text[@NodeName = "#text", @Text = " ", @WholeText = "\n "]
| | | | | +- recipe-libs-d3[@NodeName = "recipe-libs-d3"]
| | | | | +- #text[@NodeName = "#text", @NormalizedText = " ", @Text = "\n "]
| | | | | +- #text[@NodeName = "#text", @Text = " ", @WholeText = "\n "]
| | | | | +- recipe-libs-chartjs[@NodeName = "recipe-libs-chartjs"]
| | | | | +- #text[@NodeName = "#text", @NormalizedText = " ", @Text = "\n "]
| | | | +- #text[@NodeName = "#text", @NormalizedText = " ", @Text = "\n "]
| | | | | +- #text[@NodeName = "#text", @Text = " ", @WholeText = "\n "]
| | | | +- #text[@NodeName = "#text", @Text = " ", @WholeText = "\n "]
| | | | +- ui-navfooter[@NodeName = "ui-navfooter", @label-next = "{nextNavigationItem}", @label-previous = "{previousNavigationItem}", @onnextclicked = "{handleNavigateNext}", @onpreviousclicked = "{handleNavigatePrevious}"]
| | | | +- #text[@NodeName = "#text", @NormalizedText = " ", @Text = "\n "]
| | | +- #text[@NodeName = "#text", @NormalizedText = " ", @Text = "\n "]
| | +- #text[@NodeName = "#text", @NormalizedText = " ", @Text = "\n "]
| +- #text[@NodeName = "#text", @NormalizedText = " ", @Text = "\n"]
+- #text[@NodeName = "#text", @NormalizedText = " ", @Text = "\n"]
| | | | +- #text[@NodeName = "#text", @Text = " ", @WholeText = "\n "]
| | | +- #text[@NodeName = "#text", @Text = " ", @WholeText = "\n "]
| | +- #text[@NodeName = "#text", @Text = " ", @WholeText = "\n "]
| +- #text[@NodeName = "#text", @Text = " ", @WholeText = "\n"]
+- #text[@NodeName = "#text", @Text = " ", @WholeText = "\n"]