Pull some trimming logic into pmd-core

2022-04-24 14:56:58 +02:00
parent b3818954ac
commit 0d10425aac
5 changed files with 188 additions and 122 deletions
--- a/pmd-core/src/main/java/net/sourceforge/pmd/cpd/SimpleRenderer.java
+++ b/pmd-core/src/main/java/net/sourceforge/pmd/cpd/SimpleRenderer.java
@ -49,12 +49,7 @@ public class SimpleRenderer implements Renderer, CPDRenderer {
        String source = match.getSourceCodeSlice();

        if (trimLeadingWhitespace) {
-            String[] lines = source.split("\n");
-            int trimDepth = StringUtil.maxCommonLeadingWhitespaceForAll(lines);
-            if (trimDepth > 0) {
-                lines = StringUtil.trimStartOn(lines, trimDepth);
-            }
-            for (String line : lines) {
+            for (String line : StringUtil.linesWithTrimIndent(source)) {
                writer.append(line).append(PMD.EOL);
            }
            return;
--- a/pmd-core/src/main/java/net/sourceforge/pmd/lang/document/Chars.java
+++ b/pmd-core/src/main/java/net/sourceforge/pmd/lang/document/Chars.java
@ -13,6 +13,8 @@ import java.nio.CharBuffer;
 import java.nio.charset.Charset;
 import java.util.Iterator;
 import java.util.regex.Pattern;
+import java.util.stream.Stream;
+import java.util.stream.StreamSupport;

 import org.checkerframework.checker.nullness.qual.NonNull;

@ -196,6 +198,27 @@ public final class Chars implements CharSequence {
        return -1;
    }

+    /**
+     * See {@link String#lastIndexOf(int, int)}.
+     */
+    public int lastIndexOf(int ch, int fromIndex) {
+        if (fromIndex < 0 || fromIndex >= len) {
+            return -1;
+        }
+        // we want to avoid searching too far in the string
+        // so we don't use String#indexOf, as it would be looking
+        // in the rest of the file too, which in the worst case is
+        // horrible
+
+        for (int i = start + fromIndex; i >= start; i--) {
+            char c = str.charAt(i);
+            if (c == ch) {
+                return i - start;
+            }
+        }
+        return -1;
+    }
+
    /**
     * See {@link String#startsWith(String, int)}.
     */
@ -221,6 +244,13 @@ public final class Chars implements CharSequence {
        return str.charAt(start + fromIndex) == prefix;
    }

+    /**
+     * See {@link String#endsWith(String)}.
+     */
+    public boolean endsWith(String suffix) {
+        return startsWith(suffix, length() - suffix.length());
+    }
+
    /**
     * Returns a subsequence which does not start with control characters ({@code <= 32}).
     * This is consistent with {@link String#trim()}.
@ -254,6 +284,17 @@ public final class Chars implements CharSequence {
        return trimStart().trimEnd();
    }

+    /**
+     * Remove the suffix if it is present, otherwise returns this.
+     */
+    public Chars removeSuffix(String charSeq) {
+        int trimmedLen = length() - charSeq.length();
+        if (startsWith(charSeq, trimmedLen)) {
+            return slice(0, trimmedLen);
+        }
+        return this;
+    }
+

    /**
     * Returns true if this char sequence is logically equal to the
@ -443,6 +484,13 @@ public final class Chars implements CharSequence {
        };
    }

+    /**
+     * Returns a stream of lines yielded by {@link #lines()}.
+     */
+    public Stream<Chars> lineStream() {
+        return StreamSupport.stream(lines().spliterator(), false);
+    }
+

    /**
     * Returns a new reader for the whole contents of this char sequence.
--- a/pmd-core/src/main/java/net/sourceforge/pmd/util/StringUtil.java
+++ b/pmd-core/src/main/java/net/sourceforge/pmd/util/StringUtil.java
@ -5,6 +5,7 @@
 package net.sourceforge.pmd.util;

 import java.text.MessageFormat;
+import java.util.Arrays;
 import java.util.List;
 import java.util.Locale;
 import java.util.regex.Matcher;
@ -15,6 +16,7 @@ import org.apache.commons.lang3.StringUtils;

 import net.sourceforge.pmd.annotation.InternalApi;
 import net.sourceforge.pmd.internal.util.AssertionUtil;
+import net.sourceforge.pmd.lang.document.Chars;

 /**
 * A number of String-specific utility methods for use by PMD or its IDE
@ -278,53 +280,21 @@ public final class StringUtil {
     *
     * @throws NullPointerException If the parameter is null
     */
-    public static int maxCommonLeadingWhitespaceForAll(String[] strings) {
-
-        int shortest = lengthOfShortestIn(strings);
-        if (shortest == 0) {
-            return 0;
-        }
-
-        char[] matches = new char[shortest];
-
-        for (int m = 0; m < matches.length; m++) {
-            matches[m] = strings[0].charAt(m);
-            if (!Character.isWhitespace(matches[m])) {
-                return m;
-            }
-            for (String str : strings) {
-                if (str.charAt(m) != matches[m]) {
-                    return m;
-                }
+    private static int maxCommonLeadingWhitespaceForAll(List<? extends CharSequence> lines) {
+        // the max *common* leading WS length is the min length of all leading WS
+        int maxCommonWs = Integer.MAX_VALUE;
+        for (int i = 0; i < lines.size(); i++) {
+            CharSequence line = lines.get(i);
+            // compute common prefix
+            if (!StringUtils.isAllBlank(line) || i == lines.size() - 1) {
+                maxCommonWs = Math.min(maxCommonWs, StringUtil.countLeadingWhitespace(line));
            }
        }
-
-        return shortest;
-    }
-
-
-    /**
-     * Return the length of the shortest string in the array. If the collection
-     * is empty or any one of them is null then it returns 0.
-     *
-     * @throws NullPointerException If the parameter is null
-     */
-    public static int lengthOfShortestIn(String[] strings) {
-
-        if (strings.length == 0) {
-            return 0;
+        if (maxCommonWs == Integer.MAX_VALUE) {
+            // common prefix not found
+            maxCommonWs = 0;
        }
-
-        int minLength = Integer.MAX_VALUE;
-
-        for (String string : strings) {
-            if (string == null) {
-                return 0;
-            }
-            minLength = Math.min(minLength, string.length());
-        }
-
-        return minLength;
+        return maxCommonWs;
    }


@ -334,7 +304,7 @@ public final class StringUtil {
     *
     * @return String[]
     */
-    public static String[] trimStartOn(String[] strings, int trimDepth) {
+    private static String[] trimStartOn(String[] strings, int trimDepth) {

        if (trimDepth == 0) {
            return strings;
@ -347,6 +317,92 @@ public final class StringUtil {
        return results;
    }

+    /**
+     * Trim common indentation in the lines of the string.
+     * Does not discard
+     */
+    public static StringBuilder trimIndent(Chars string) {
+        List<Chars> lines = string.lineStream().collect(Collectors.toList());
+        StringBuilder sb = new StringBuilder(string.length());
+        trimIndentIntoStringBuilder(lines, sb);
+        return sb;
+    }
+
+    public static void trimIndentIntoStringBuilder(List<Chars> lines, StringBuilder sb) {
+        int prefixLength = maxCommonLeadingWhitespaceForAll(lines);
+        appendWithoutCommonPrefix(lines, prefixLength, sb);
+    }
+
+    private static void appendWithoutCommonPrefix(List<Chars> lines, int prefixLength, StringBuilder output) {
+        for (int i = 0; i < lines.size(); i++) {
+            Chars line = lines.get(i);
+            // remove common whitespace prefix
+            if (!StringUtils.isAllBlank(line) && line.length() >= prefixLength) {
+                line = line.subSequence(prefixLength, line.length());
+            }
+            line = line.trimEnd();
+            line.appendChars(output);
+
+            boolean isLastLine = i == lines.size() - 1;
+            boolean isFirstLine = i == 0;
+            // todo is this &&?
+            if (!isLastLine || !isFirstLine && !StringUtils.isAllBlank(line)) {
+                output.append('\n'); // normalize line endings to LF
+            }
+        }
+    }
+
+    /**
+     * Remove trailing and leading blank lines.
+     */
+    public static Chars trimBlankLines(Chars string) {
+        int offsetOfFirstNonBlankChar = string.length();
+        for (int i = 0; i < string.length(); i++) {
+            if (!Character.isWhitespace(string.charAt(i))) {
+                offsetOfFirstNonBlankChar = i;
+                break;
+            }
+        }
+        int offsetOfLastNonBlankChar = 0;
+        for (int i = string.length() - 1; i > offsetOfFirstNonBlankChar; i--) {
+            if (!Character.isWhitespace(string.charAt(i))) {
+                offsetOfLastNonBlankChar = i;
+                break;
+            }
+        }
+
+        int lastNonBlankLine = string.indexOf('\n', offsetOfLastNonBlankChar);
+        int firstNonBlankLine = string.lastIndexOf('\n', offsetOfFirstNonBlankChar);
+
+        return string.subSequence(
+            minus1Default(firstNonBlankLine, 0),
+            minus1Default(lastNonBlankLine, string.length())
+        );
+    }
+
+    private static int minus1Default(int i, int defaultValue) {
+        return i == -1 ? defaultValue : i;
+    }
+
+
+    private static int countLeadingWhitespace(CharSequence s) {
+        int count = 0;
+        while (count < s.length() && Character.isWhitespace(s.charAt(count))) {
+            count++;
+        }
+        return count;
+    }
+
+    public static String[] linesWithTrimIndent(String source) {
+        String[] lines = source.split("\n");
+        int trimDepth = maxCommonLeadingWhitespaceForAll(Arrays.asList(lines));
+        if (trimDepth > 0) {
+            lines = trimStartOn(lines, trimDepth);
+        }
+        return lines;
+    }
+
+

    /**
     * Are the two String values the same. The Strings can be optionally trimmed
--- a/pmd-java/src/main/java/net/sourceforge/pmd/lang/java/ast/ASTStringLiteral.java
+++ b/pmd-java/src/main/java/net/sourceforge/pmd/lang/java/ast/ASTStringLiteral.java
@ -4,14 +4,16 @@

 package net.sourceforge.pmd.lang.java.ast;

-import java.util.Arrays;
 import java.util.List;
+import java.util.stream.Collectors;

 import org.apache.commons.lang3.StringEscapeUtils;
-import org.apache.commons.lang3.StringUtils;
 import org.checkerframework.checker.nullness.qual.NonNull;
 import org.checkerframework.checker.nullness.qual.Nullable;

+import net.sourceforge.pmd.lang.document.Chars;
+import net.sourceforge.pmd.util.StringUtil;
+
 /**
 * Represents a string literal. The image of this node is the literal as it appeared
 * in the source ({@link #getText()}). {@link #getConstValue()} allows to recover
@ -71,7 +73,7 @@ public final class ASTStringLiteral extends AbstractLiteral implements ASTLitera
    @Override
    protected @Nullable Object buildConstValue() {
        if (isTextBlock()) {
-            return determineTextBlockContent(getImage());
+            return determineTextBlockContent(getText());
        } else {
            CharSequence image = getText();
            CharSequence woDelims = image.subSequence(1, image.length() - 1);
@ -79,46 +81,36 @@ public final class ASTStringLiteral extends AbstractLiteral implements ASTLitera
        }
    }

-    static String determineTextBlockContent(String image) {
-        // normalize line endings to LF
-        String content = image.replaceAll("\r\n|\r", "\n");
-        int start = determineContentStart(content);
-        content = content.substring(start, content.length() - TEXTBLOCK_DELIMITER.length());
-
-        int prefixLength = Integer.MAX_VALUE;
-        List<String> lines = Arrays.asList(content.split("\\n"));
-        for (int i = 0; i < lines.size(); i++) {
-            String line = lines.get(i);
-            // compute common prefix
-            if (!StringUtils.isAllBlank(line) || i == lines.size() - 1) {
-                prefixLength = Math.min(prefixLength, countLeadingWhitespace(line));
-            }
-        }
-        if (prefixLength == Integer.MAX_VALUE) {
-            // common prefix not found
-            prefixLength = 0;
-        }
-        StringBuilder sb = new StringBuilder(content.length());
-        for (int i = 0; i < lines.size(); i++) {
-            String line = lines.get(i);
-            // remove common whitespace prefix
-            if (!StringUtils.isAllBlank(line) && line.length() >= prefixLength) {
-                line = line.substring(prefixLength);
-            }
-            line = removeTrailingWhitespace(line);
-            sb.append(line);
-
-            boolean isLastLine = i == lines.size() - 1;
-            boolean isFirstLine = i == 0;
-            if (!isLastLine || !isFirstLine && !StringUtils.isAllBlank(line)) {
-                sb.append('\n');
-            }
-        }
-
+    static String determineTextBlockContent(Chars image) {
+        List<Chars> lines = getContentLines(image);
+        StringBuilder sb = new StringBuilder(image.length());
+        StringUtil.trimIndentIntoStringBuilder(lines, sb);
        interpretEscapeSequences(sb);
        return sb.toString();
    }

+    static String determineTextBlockContent(String image) {
+        return determineTextBlockContent(Chars.wrap(image));
+    }
+
+    /**
+     * Returns the lines of the parameter minus the delimiters.
+     */
+    private static @NonNull List<Chars> getContentLines(Chars chars) {
+        List<Chars> lines = chars.lineStream().collect(Collectors.toList());
+        assert lines.size() >= 2 : "invalid text block syntax " + chars;
+        // remove first line, it's just """ and some whitespace
+        lines = lines.subList(1, lines.size());
+
+        // trim the """ off the last line.
+        int lastIndex = lines.size() - 1;
+        Chars lastLine = lines.get(lastIndex);
+        assert lastLine.endsWith(TEXTBLOCK_DELIMITER);
+        lines.set(lastIndex, lastLine.removeSuffix(TEXTBLOCK_DELIMITER));
+
+        return lines;
+    }
+
    private static void interpretEscapeSequences(StringBuilder sb) {
        // interpret escape sequences "\<LF>" (line continuation), "n","t","b","r","f", "s", "\"", "\'", "\\"
        // we need to interpret everything in one pass, so regex replacement is inappropriate
@ -172,33 +164,4 @@ public final class ASTStringLiteral extends AbstractLiteral implements ASTLitera
            }
        }
    }
-
-    private static int determineContentStart(String s) {
-        int start = TEXTBLOCK_DELIMITER.length(); // this is the opening delimiter
-        // the content begins after at the first character after the line terminator
-        // of the opening delimiter
-        while (start < s.length() && Character.isWhitespace(s.charAt(start))) {
-            if (s.charAt(start) == '\n') {
-                return start + 1;
-            }
-            start++;
-        }
-        return start;
-    }
-
-    private static int countLeadingWhitespace(String s) {
-        int count = 0;
-        while (count < s.length() && Character.isWhitespace(s.charAt(count))) {
-            count++;
-        }
-        return count;
-    }
-
-    private static String removeTrailingWhitespace(String s) {
-        int endIndexIncluding = s.length() - 1;
-        while (endIndexIncluding >= 0 && Character.isWhitespace(s.charAt(endIndexIncluding))) {
-            endIndexIncluding--;
-        }
-        return s.substring(0, endIndexIncluding + 1);
-    }
 }
--- a/pmd-test/src/main/java/net/sourceforge/pmd/testframework/RuleTst.java
+++ b/pmd-test/src/main/java/net/sourceforge/pmd/testframework/RuleTst.java
@ -44,16 +44,19 @@ import net.sourceforge.pmd.RuleViolation;
 import net.sourceforge.pmd.lang.Language;
 import net.sourceforge.pmd.lang.LanguageRegistry;
 import net.sourceforge.pmd.lang.LanguageVersion;
+import net.sourceforge.pmd.lang.document.Chars;
 import net.sourceforge.pmd.lang.document.TextFile;
 import net.sourceforge.pmd.processor.AbstractPMDProcessor;
 import net.sourceforge.pmd.properties.PropertyDescriptor;
 import net.sourceforge.pmd.renderers.TextRenderer;
 import net.sourceforge.pmd.reporting.GlobalAnalysisListener;
+import net.sourceforge.pmd.util.StringUtil;

 /**
 * Advanced methods for test cases
 */
 public abstract class RuleTst {
+
    private final DocumentBuilder documentBuilder;

    /** Use a single classloader for all tests. */
@ -483,15 +486,16 @@ public abstract class RuleTst {
                    throw new RuntimeException("No matching code fragment found for coderef");
                }
            }
+            code = StringUtil.trimBlankLines(Chars.wrap(code)).toString();

            String description = getNodeValue(testCode, "description", true);
-            int expectedProblems = Integer.parseInt(getNodeValue(testCode, "expected-problems", true));
+            int expectedProblems = Integer.parseInt(getNodeValue(testCode, "expected-problems", true).trim());

            String languageVersionString = getNodeValue(testCode, "source-type", false);
            if (languageVersionString == null) {
                tests[i] = new TestDescriptor(code, description, expectedProblems, rule);
            } else {
-
+                languageVersionString = languageVersionString.trim();
                LanguageVersion languageVersion = parseSourceType(languageVersionString);
                if (languageVersion != null) {
                    tests[i] = new TestDescriptor(code, description, expectedProblems, rule, languageVersion);
@ -553,6 +557,6 @@ public abstract class RuleTst {
                buffer.append(node.getNodeValue());
            }
        }
-        return buffer.toString().trim();
+        return buffer.toString();
    }
 }