Refactor EscapeUtils, use StringEscapeUtils for escaping html

This commit is contained in:
Andreas Dangel
2018-11-18 12:47:19 +01:00
parent 9cdfe6ff96
commit 75cbffaab4
3 changed files with 52 additions and 104 deletions

View File

@ -6,7 +6,13 @@ package net.sourceforge.pmd.docs;
import java.util.List;
import org.apache.commons.text.StringEscapeUtils;
public final class EscapeUtils {
private static final String BACKTICK = "`";
private static final String URL_START = "<http";
private static final String QUOTE_START = "> ";
private EscapeUtils() {
// This is a utility class
}
@ -21,115 +27,53 @@ public final class EscapeUtils {
.replace("|", "\\|");
}
private enum State {
S, LT, LT_H, LT_H_T, LT_H_T_T, LT_H_T_T_P, LT_H_T_T_P1, LT_H_T_T_P_S, LT_H_T_T_P_S1;
}
public static String escapeSingleLine(String line) {
StringBuilder escaped = new StringBuilder(line.length() + 16);
State s = State.S;
boolean needsEscape = true;
for (int i = 0; i < line.length(); i++) {
char c = line.charAt(i);
if (c == '`') {
needsEscape = !needsEscape;
}
switch (s) {
case S:
if (c == '<') {
s = State.LT;
} else if (c == '>') {
if (needsEscape && i > 0) {
escaped.append("&gt;");
} else {
escaped.append(c);
}
} else if (c == '"') {
if (needsEscape) {
escaped.append("&quot;");
} else {
escaped.append(c);
}
} else {
escaped.append(c);
}
break;
case LT:
if (c == 'h' || c == 'H') {
s = State.LT_H;
} else {
if (needsEscape) {
escaped.append("&lt;").append(c);
} else {
escaped.append("<").append(c);
}
s = State.S;
}
break;
case LT_H:
if (c == 't' || c == 'T') {
s = State.LT_H_T;
} else {
escaped.append("&lt;h").append(c);
s = State.S;
}
break;
case LT_H_T:
if (c == 't' || c == 'T') {
s = State.LT_H_T_T;
} else {
escaped.append("&lt;ht").append(c);
s = State.S;
}
break;
case LT_H_T_T:
if (c == 'p' || c == 'P') {
s = State.LT_H_T_T_P;
} else {
escaped.append("&lt;htt").append(c);
s = State.S;
}
break;
case LT_H_T_T_P:
if (c == 's' || c == 'S') {
s = State.LT_H_T_T_P_S;
} else if (c == ':') {
escaped.append("<http:");
s = State.LT_H_T_T_P1;
} else {
escaped.append("&lt;htt").append(c);
s = State.S;
}
break;
case LT_H_T_T_P1:
escaped.append(c);
if (c == '>') {
s = State.S;
}
break;
case LT_H_T_T_P_S:
if (c == ':') {
escaped.append("<https:");
s = State.LT_H_T_T_P_S1;
} else {
escaped.append("&lt;https").append(c);
s = State.S;
}
break;
case LT_H_T_T_P_S1:
escaped.append(c);
if (c == '>') {
s = State.S;
}
break;
default:
escaped.append(c);
break;
}
String currentLine = line;
if (currentLine.startsWith(QUOTE_START)) {
escaped.append(currentLine.substring(0, 2));
currentLine = currentLine.substring(2);
}
int url = currentLine.indexOf(URL_START);
while (url > -1) {
String before = currentLine.substring(0, url);
before = escapeBackticks(escaped, before);
escaped.append(StringEscapeUtils.escapeHtml4(before));
int urlEnd = currentLine.indexOf(">", url) + 1;
// add the url unescaped
escaped.append(currentLine.substring(url, urlEnd));
currentLine = currentLine.substring(urlEnd);
url = currentLine.indexOf(URL_START);
}
currentLine = escapeBackticks(escaped, currentLine);
escaped.append(StringEscapeUtils.escapeHtml4(currentLine));
return escaped.toString();
}
private static String escapeBackticks(StringBuilder escaped, String linePart) {
String currentLine = linePart;
int pos = currentLine.indexOf(BACKTICK);
boolean needsEscaping = true;
while (pos > -1) {
String before = currentLine.substring(0, pos);
if (needsEscaping) {
escaped.append(StringEscapeUtils.escapeHtml4(before));
escaped.append(BACKTICK);
needsEscaping = false;
} else {
escaped.append(before);
escaped.append(BACKTICK);
needsEscaping = true;
}
currentLine = currentLine.substring(pos + 1);
pos = currentLine.indexOf(BACKTICK);
}
return currentLine;
}
public static List<String> escapeLines(List<String> lines) {
boolean needsEscape = true;
for (int i = 0; i < lines.size(); i++) {

View File

@ -38,6 +38,10 @@ public class EscapeUtilsTest {
EscapeUtils.escapeSingleLine("URL: <http://www.google.com> is a url without ssl"));
assertEquals("> this is a quote line",
EscapeUtils.escapeSingleLine("> this is a quote line"));
assertEquals("combination of URLs and backticks: <https://pmd.github.io> but `<script>` &lt;strong&gt;escaped&lt;/strong&gt;",
EscapeUtils.escapeSingleLine("combination of URLs and backticks: <https://pmd.github.io> but `<script>` <strong>escaped</strong>"));
assertEquals("combination of URLs and backticks: `<script>` &lt;strong&gt;escaped&lt;/strong&gt; but <https://pmd.github.io>",
EscapeUtils.escapeSingleLine("combination of URLs and backticks: `<script>` <strong>escaped</strong> but <https://pmd.github.io>"));
}
@Test

View File

@ -261,7 +261,7 @@ The paragraph after the quotation.
> in cdata
If the description contains a code example, then e.g. "quotes" should not be escaped:
If the description contains a code example, then e.g. &quot;quotes&quot; should not be escaped:
```
if (0 > 1 && 0 < 1) {