diff --git a/pmd-core/src/main/java/net/sourceforge/pmd/util/StringUtil.java b/pmd-core/src/main/java/net/sourceforge/pmd/util/StringUtil.java index 2cb2ddab43..6d463c1702 100644 --- a/pmd-core/src/main/java/net/sourceforge/pmd/util/StringUtil.java +++ b/pmd-core/src/main/java/net/sourceforge/pmd/util/StringUtil.java @@ -38,6 +38,114 @@ public final class StringUtil { private StringUtil() { } + + /** + * Returns the (1-based) line number of the character at the given index. + * Line terminators (\r, \n) are assumed to be on the line they *end* + * and not on the following line. The method also accepts that the given + * offset be the length of the string (in which case there's no targeted character), + * to get the line number of a character that would be inserted at + * the end of the string. + * + *
+     *
+     *     lineNumberAt("a\nb", 0)  = 1
+     *     lineNumberAt("a\nb", 1)  = 1
+     *     lineNumberAt("a\nb", 2)  = 2
+     *     lineNumberAt("a\nb", 3)  = 2  // charAt(3) doesn't exist though
+     *     lineNumberAt("a\nb", 4)  = -1
+     *
+     *     lineNumberAt("", 0) = 1
+     *     lineNumberAt("", _) = -1
+     *
+     * 
+ * + * @param charSeq Char sequence + * @param offsetInclusive Offset in the sequence of the targeted character. + * May be the length of the sequence. + * @return -1 if the offset is not in {@code [0, length]}, otherwise + * the line number + */ + public static int lineNumberAt(CharSequence charSeq, int offsetInclusive) { + int len = charSeq.length(); + + if (offsetInclusive > len || offsetInclusive < 0) { + return -1; + } + + int l = 1; + for (int curOffset = 0; curOffset < offsetInclusive; curOffset++) { + // if we end up outside the string, then the line is undefined + if (curOffset >= len) { + return -1; + } + + char c = charSeq.charAt(curOffset); + if (c == '\n') { + l++; + } else if (c == '\r') { + if (curOffset + 1 < len && charSeq.charAt(curOffset + 1) == '\n') { + if (curOffset == offsetInclusive - 1) { + // the CR is assumed to be on the same line as the LF + return l; + } + curOffset++; // SUPPRESS CHECKSTYLE jump to after the \n + } + l++; + } + } + return l; + } + + /** + * Returns the (1-based) column number of the character at the given index. + * Line terminators are by convention taken to be part of the line they end, + * and not the new line they start. Each character has width 1 (including {@code \t}). + * The method also accepts that the given offset be the length of the + * string (in which case there's no targeted character), to get the column + * number of a character that would be inserted at the end of the string. + * + *
+     *
+     *     columnNumberAt("a\nb", 0)  = 1
+     *     columnNumberAt("a\nb", 1)  = 2
+     *     columnNumberAt("a\nb", 2)  = 1
+     *     columnNumberAt("a\nb", 3)  = 2   // charAt(3) doesn't exist though
+     *     columnNumberAt("a\nb", 4)  = -1
+     *
+     *     columnNumberAt("a\r\n", 2)  = 3
+     *
+     * 
+ * + * @param charSeq Char sequence + * @param offsetInclusive Offset in the sequence + * @return -1 if the offset is not in {@code [0, length]}, otherwise + * the column number + */ + public static int columnNumberAt(CharSequence charSeq, final int offsetInclusive) { + if (offsetInclusive == charSeq.length()) { + return charSeq.length() == 0 ? 1 : 1 + columnNumberAt(charSeq, offsetInclusive - 1); + } else if (offsetInclusive > charSeq.length() || offsetInclusive < 0) { + return -1; + } + + int col = 0; + char next = 0; + for (int i = offsetInclusive; i >= 0; i--) { + char c = charSeq.charAt(i); + + if (offsetInclusive != i) { + if (c == '\n' || c == '\r' && next != '\n') { + return col; + } + } + + col++; + next = c; + } + return col; + } + /** * Formats a double to a percentage, keeping {@code numDecimal} decimal places. * diff --git a/pmd-core/src/test/java/net/sourceforge/pmd/util/StringUtilTest.java b/pmd-core/src/test/java/net/sourceforge/pmd/util/StringUtilTest.java index 387928c89e..8ca15c97dc 100644 --- a/pmd-core/src/test/java/net/sourceforge/pmd/util/StringUtilTest.java +++ b/pmd-core/src/test/java/net/sourceforge/pmd/util/StringUtilTest.java @@ -10,6 +10,43 @@ import org.junit.Test; public class StringUtilTest { + @Test + public void testColumnNumber() { + assertEquals(-1, StringUtil.columnNumberAt("f\rah\nb", -1)); + assertEquals(1, StringUtil.columnNumberAt("f\rah\nb", 0)); + assertEquals(2, StringUtil.columnNumberAt("f\rah\nb", 1)); + assertEquals(1, StringUtil.columnNumberAt("f\rah\nb", 2)); + assertEquals(2, StringUtil.columnNumberAt("f\rah\nb", 3)); + assertEquals(3, StringUtil.columnNumberAt("f\rah\nb", 4)); + assertEquals(1, StringUtil.columnNumberAt("f\rah\nb", 5)); + assertEquals(2, StringUtil.columnNumberAt("f\rah\nb", 6)); + assertEquals(-1, StringUtil.columnNumberAt("f\rah\nb", 7)); + } + + @Test + public void testColumnNumberCrLf() { + assertEquals(-1, StringUtil.columnNumberAt("f\r\nb", -1)); + assertEquals(1, StringUtil.columnNumberAt("f\r\nb", 0)); + assertEquals(2, StringUtil.columnNumberAt("f\r\nb", 1)); + assertEquals(3, StringUtil.columnNumberAt("f\r\nb", 2)); + assertEquals(1, StringUtil.columnNumberAt("f\r\nb", 3)); + assertEquals(2, StringUtil.columnNumberAt("f\r\nb", 4)); + assertEquals(-1, StringUtil.columnNumberAt("f\r\nb", 5)); + } + + @Test + public void testColumnNumberTrailing() { + assertEquals(1, StringUtil.columnNumberAt("\n", 0)); + assertEquals(2, StringUtil.columnNumberAt("\n", 1)); + assertEquals(-1, StringUtil.columnNumberAt("\n", 2)); + } + + @Test + public void testColumnNumberEmpty() { + assertEquals(1, StringUtil.columnNumberAt("", 0)); + assertEquals(-1, StringUtil.columnNumberAt("", 1)); + } + @Test public void testReplaceWithOneChar() { assertEquals("faa", StringUtil.replaceString("foo", 'o', "a"));