Pull some trimming logic into pmd-core

This commit is contained in:
Clément Fournier
2022-04-24 14:56:58 +02:00
parent b3818954ac
commit 0d10425aac
5 changed files with 188 additions and 122 deletions

View File

@ -49,12 +49,7 @@ public class SimpleRenderer implements Renderer, CPDRenderer {
String source = match.getSourceCodeSlice();
if (trimLeadingWhitespace) {
String[] lines = source.split("\n");
int trimDepth = StringUtil.maxCommonLeadingWhitespaceForAll(lines);
if (trimDepth > 0) {
lines = StringUtil.trimStartOn(lines, trimDepth);
}
for (String line : lines) {
for (String line : StringUtil.linesWithTrimIndent(source)) {
writer.append(line).append(PMD.EOL);
}
return;

View File

@ -13,6 +13,8 @@ import java.nio.CharBuffer;
import java.nio.charset.Charset;
import java.util.Iterator;
import java.util.regex.Pattern;
import java.util.stream.Stream;
import java.util.stream.StreamSupport;
import org.checkerframework.checker.nullness.qual.NonNull;
@ -196,6 +198,27 @@ public final class Chars implements CharSequence {
return -1;
}
/**
* See {@link String#lastIndexOf(int, int)}.
*/
public int lastIndexOf(int ch, int fromIndex) {
if (fromIndex < 0 || fromIndex >= len) {
return -1;
}
// we want to avoid searching too far in the string
// so we don't use String#indexOf, as it would be looking
// in the rest of the file too, which in the worst case is
// horrible
for (int i = start + fromIndex; i >= start; i--) {
char c = str.charAt(i);
if (c == ch) {
return i - start;
}
}
return -1;
}
/**
* See {@link String#startsWith(String, int)}.
*/
@ -221,6 +244,13 @@ public final class Chars implements CharSequence {
return str.charAt(start + fromIndex) == prefix;
}
/**
* See {@link String#endsWith(String)}.
*/
public boolean endsWith(String suffix) {
return startsWith(suffix, length() - suffix.length());
}
/**
* Returns a subsequence which does not start with control characters ({@code <= 32}).
* This is consistent with {@link String#trim()}.
@ -254,6 +284,17 @@ public final class Chars implements CharSequence {
return trimStart().trimEnd();
}
/**
* Remove the suffix if it is present, otherwise returns this.
*/
public Chars removeSuffix(String charSeq) {
int trimmedLen = length() - charSeq.length();
if (startsWith(charSeq, trimmedLen)) {
return slice(0, trimmedLen);
}
return this;
}
/**
* Returns true if this char sequence is logically equal to the
@ -443,6 +484,13 @@ public final class Chars implements CharSequence {
};
}
/**
* Returns a stream of lines yielded by {@link #lines()}.
*/
public Stream<Chars> lineStream() {
return StreamSupport.stream(lines().spliterator(), false);
}
/**
* Returns a new reader for the whole contents of this char sequence.

View File

@ -5,6 +5,7 @@
package net.sourceforge.pmd.util;
import java.text.MessageFormat;
import java.util.Arrays;
import java.util.List;
import java.util.Locale;
import java.util.regex.Matcher;
@ -15,6 +16,7 @@ import org.apache.commons.lang3.StringUtils;
import net.sourceforge.pmd.annotation.InternalApi;
import net.sourceforge.pmd.internal.util.AssertionUtil;
import net.sourceforge.pmd.lang.document.Chars;
/**
* A number of String-specific utility methods for use by PMD or its IDE
@ -278,53 +280,21 @@ public final class StringUtil {
*
* @throws NullPointerException If the parameter is null
*/
public static int maxCommonLeadingWhitespaceForAll(String[] strings) {
int shortest = lengthOfShortestIn(strings);
if (shortest == 0) {
return 0;
}
char[] matches = new char[shortest];
for (int m = 0; m < matches.length; m++) {
matches[m] = strings[0].charAt(m);
if (!Character.isWhitespace(matches[m])) {
return m;
}
for (String str : strings) {
if (str.charAt(m) != matches[m]) {
return m;
}
private static int maxCommonLeadingWhitespaceForAll(List<? extends CharSequence> lines) {
// the max *common* leading WS length is the min length of all leading WS
int maxCommonWs = Integer.MAX_VALUE;
for (int i = 0; i < lines.size(); i++) {
CharSequence line = lines.get(i);
// compute common prefix
if (!StringUtils.isAllBlank(line) || i == lines.size() - 1) {
maxCommonWs = Math.min(maxCommonWs, StringUtil.countLeadingWhitespace(line));
}
}
return shortest;
}
/**
* Return the length of the shortest string in the array. If the collection
* is empty or any one of them is null then it returns 0.
*
* @throws NullPointerException If the parameter is null
*/
public static int lengthOfShortestIn(String[] strings) {
if (strings.length == 0) {
return 0;
if (maxCommonWs == Integer.MAX_VALUE) {
// common prefix not found
maxCommonWs = 0;
}
int minLength = Integer.MAX_VALUE;
for (String string : strings) {
if (string == null) {
return 0;
}
minLength = Math.min(minLength, string.length());
}
return minLength;
return maxCommonWs;
}
@ -334,7 +304,7 @@ public final class StringUtil {
*
* @return String[]
*/
public static String[] trimStartOn(String[] strings, int trimDepth) {
private static String[] trimStartOn(String[] strings, int trimDepth) {
if (trimDepth == 0) {
return strings;
@ -347,6 +317,92 @@ public final class StringUtil {
return results;
}
/**
* Trim common indentation in the lines of the string.
* Does not discard
*/
public static StringBuilder trimIndent(Chars string) {
List<Chars> lines = string.lineStream().collect(Collectors.toList());
StringBuilder sb = new StringBuilder(string.length());
trimIndentIntoStringBuilder(lines, sb);
return sb;
}
public static void trimIndentIntoStringBuilder(List<Chars> lines, StringBuilder sb) {
int prefixLength = maxCommonLeadingWhitespaceForAll(lines);
appendWithoutCommonPrefix(lines, prefixLength, sb);
}
private static void appendWithoutCommonPrefix(List<Chars> lines, int prefixLength, StringBuilder output) {
for (int i = 0; i < lines.size(); i++) {
Chars line = lines.get(i);
// remove common whitespace prefix
if (!StringUtils.isAllBlank(line) && line.length() >= prefixLength) {
line = line.subSequence(prefixLength, line.length());
}
line = line.trimEnd();
line.appendChars(output);
boolean isLastLine = i == lines.size() - 1;
boolean isFirstLine = i == 0;
// todo is this &&?
if (!isLastLine || !isFirstLine && !StringUtils.isAllBlank(line)) {
output.append('\n'); // normalize line endings to LF
}
}
}
/**
* Remove trailing and leading blank lines.
*/
public static Chars trimBlankLines(Chars string) {
int offsetOfFirstNonBlankChar = string.length();
for (int i = 0; i < string.length(); i++) {
if (!Character.isWhitespace(string.charAt(i))) {
offsetOfFirstNonBlankChar = i;
break;
}
}
int offsetOfLastNonBlankChar = 0;
for (int i = string.length() - 1; i > offsetOfFirstNonBlankChar; i--) {
if (!Character.isWhitespace(string.charAt(i))) {
offsetOfLastNonBlankChar = i;
break;
}
}
int lastNonBlankLine = string.indexOf('\n', offsetOfLastNonBlankChar);
int firstNonBlankLine = string.lastIndexOf('\n', offsetOfFirstNonBlankChar);
return string.subSequence(
minus1Default(firstNonBlankLine, 0),
minus1Default(lastNonBlankLine, string.length())
);
}
private static int minus1Default(int i, int defaultValue) {
return i == -1 ? defaultValue : i;
}
private static int countLeadingWhitespace(CharSequence s) {
int count = 0;
while (count < s.length() && Character.isWhitespace(s.charAt(count))) {
count++;
}
return count;
}
public static String[] linesWithTrimIndent(String source) {
String[] lines = source.split("\n");
int trimDepth = maxCommonLeadingWhitespaceForAll(Arrays.asList(lines));
if (trimDepth > 0) {
lines = trimStartOn(lines, trimDepth);
}
return lines;
}
/**
* Are the two String values the same. The Strings can be optionally trimmed

View File

@ -4,14 +4,16 @@
package net.sourceforge.pmd.lang.java.ast;
import java.util.Arrays;
import java.util.List;
import java.util.stream.Collectors;
import org.apache.commons.lang3.StringEscapeUtils;
import org.apache.commons.lang3.StringUtils;
import org.checkerframework.checker.nullness.qual.NonNull;
import org.checkerframework.checker.nullness.qual.Nullable;
import net.sourceforge.pmd.lang.document.Chars;
import net.sourceforge.pmd.util.StringUtil;
/**
* Represents a string literal. The image of this node is the literal as it appeared
* in the source ({@link #getText()}). {@link #getConstValue()} allows to recover
@ -71,7 +73,7 @@ public final class ASTStringLiteral extends AbstractLiteral implements ASTLitera
@Override
protected @Nullable Object buildConstValue() {
if (isTextBlock()) {
return determineTextBlockContent(getImage());
return determineTextBlockContent(getText());
} else {
CharSequence image = getText();
CharSequence woDelims = image.subSequence(1, image.length() - 1);
@ -79,46 +81,36 @@ public final class ASTStringLiteral extends AbstractLiteral implements ASTLitera
}
}
static String determineTextBlockContent(String image) {
// normalize line endings to LF
String content = image.replaceAll("\r\n|\r", "\n");
int start = determineContentStart(content);
content = content.substring(start, content.length() - TEXTBLOCK_DELIMITER.length());
int prefixLength = Integer.MAX_VALUE;
List<String> lines = Arrays.asList(content.split("\\n"));
for (int i = 0; i < lines.size(); i++) {
String line = lines.get(i);
// compute common prefix
if (!StringUtils.isAllBlank(line) || i == lines.size() - 1) {
prefixLength = Math.min(prefixLength, countLeadingWhitespace(line));
}
}
if (prefixLength == Integer.MAX_VALUE) {
// common prefix not found
prefixLength = 0;
}
StringBuilder sb = new StringBuilder(content.length());
for (int i = 0; i < lines.size(); i++) {
String line = lines.get(i);
// remove common whitespace prefix
if (!StringUtils.isAllBlank(line) && line.length() >= prefixLength) {
line = line.substring(prefixLength);
}
line = removeTrailingWhitespace(line);
sb.append(line);
boolean isLastLine = i == lines.size() - 1;
boolean isFirstLine = i == 0;
if (!isLastLine || !isFirstLine && !StringUtils.isAllBlank(line)) {
sb.append('\n');
}
}
static String determineTextBlockContent(Chars image) {
List<Chars> lines = getContentLines(image);
StringBuilder sb = new StringBuilder(image.length());
StringUtil.trimIndentIntoStringBuilder(lines, sb);
interpretEscapeSequences(sb);
return sb.toString();
}
static String determineTextBlockContent(String image) {
return determineTextBlockContent(Chars.wrap(image));
}
/**
* Returns the lines of the parameter minus the delimiters.
*/
private static @NonNull List<Chars> getContentLines(Chars chars) {
List<Chars> lines = chars.lineStream().collect(Collectors.toList());
assert lines.size() >= 2 : "invalid text block syntax " + chars;
// remove first line, it's just """ and some whitespace
lines = lines.subList(1, lines.size());
// trim the """ off the last line.
int lastIndex = lines.size() - 1;
Chars lastLine = lines.get(lastIndex);
assert lastLine.endsWith(TEXTBLOCK_DELIMITER);
lines.set(lastIndex, lastLine.removeSuffix(TEXTBLOCK_DELIMITER));
return lines;
}
private static void interpretEscapeSequences(StringBuilder sb) {
// interpret escape sequences "\<LF>" (line continuation), "n","t","b","r","f", "s", "\"", "\'", "\\"
// we need to interpret everything in one pass, so regex replacement is inappropriate
@ -172,33 +164,4 @@ public final class ASTStringLiteral extends AbstractLiteral implements ASTLitera
}
}
}
private static int determineContentStart(String s) {
int start = TEXTBLOCK_DELIMITER.length(); // this is the opening delimiter
// the content begins after at the first character after the line terminator
// of the opening delimiter
while (start < s.length() && Character.isWhitespace(s.charAt(start))) {
if (s.charAt(start) == '\n') {
return start + 1;
}
start++;
}
return start;
}
private static int countLeadingWhitespace(String s) {
int count = 0;
while (count < s.length() && Character.isWhitespace(s.charAt(count))) {
count++;
}
return count;
}
private static String removeTrailingWhitespace(String s) {
int endIndexIncluding = s.length() - 1;
while (endIndexIncluding >= 0 && Character.isWhitespace(s.charAt(endIndexIncluding))) {
endIndexIncluding--;
}
return s.substring(0, endIndexIncluding + 1);
}
}

View File

@ -44,16 +44,19 @@ import net.sourceforge.pmd.RuleViolation;
import net.sourceforge.pmd.lang.Language;
import net.sourceforge.pmd.lang.LanguageRegistry;
import net.sourceforge.pmd.lang.LanguageVersion;
import net.sourceforge.pmd.lang.document.Chars;
import net.sourceforge.pmd.lang.document.TextFile;
import net.sourceforge.pmd.processor.AbstractPMDProcessor;
import net.sourceforge.pmd.properties.PropertyDescriptor;
import net.sourceforge.pmd.renderers.TextRenderer;
import net.sourceforge.pmd.reporting.GlobalAnalysisListener;
import net.sourceforge.pmd.util.StringUtil;
/**
* Advanced methods for test cases
*/
public abstract class RuleTst {
private final DocumentBuilder documentBuilder;
/** Use a single classloader for all tests. */
@ -483,15 +486,16 @@ public abstract class RuleTst {
throw new RuntimeException("No matching code fragment found for coderef");
}
}
code = StringUtil.trimBlankLines(Chars.wrap(code)).toString();
String description = getNodeValue(testCode, "description", true);
int expectedProblems = Integer.parseInt(getNodeValue(testCode, "expected-problems", true));
int expectedProblems = Integer.parseInt(getNodeValue(testCode, "expected-problems", true).trim());
String languageVersionString = getNodeValue(testCode, "source-type", false);
if (languageVersionString == null) {
tests[i] = new TestDescriptor(code, description, expectedProblems, rule);
} else {
languageVersionString = languageVersionString.trim();
LanguageVersion languageVersion = parseSourceType(languageVersionString);
if (languageVersion != null) {
tests[i] = new TestDescriptor(code, description, expectedProblems, rule, languageVersion);
@ -553,6 +557,6 @@ public abstract class RuleTst {
buffer.append(node.getNodeValue());
}
}
return buffer.toString().trim();
return buffer.toString();
}
}