diff --git a/pmd-core/src/main/java/net/sourceforge/pmd/cpd/AbstractTokenizer.java b/pmd-core/src/main/java/net/sourceforge/pmd/cpd/AbstractTokenizer.java index a79aee80d4..e9b7f02594 100644 --- a/pmd-core/src/main/java/net/sourceforge/pmd/cpd/AbstractTokenizer.java +++ b/pmd-core/src/main/java/net/sourceforge/pmd/cpd/AbstractTokenizer.java @@ -34,6 +34,10 @@ public abstract class AbstractTokenizer implements Tokenizer { private int lineNumber = 0; private String currentLine; + // both zero-based + private int tokBeginLine; + private int tokBeginCol; + protected boolean spanMultipleLinesString = true; // Most languages do, so // default is true protected Character spanMultipleLinesLineContinuationCharacter = null; @@ -49,23 +53,35 @@ public abstract class AbstractTokenizer implements Tokenizer { int loc = 0; while (loc < currentLine.length()) { StringBuilder token = new StringBuilder(); - loc = getTokenFromLine(token, loc); + loc = getTokenFromLine(token, loc); // may jump several lines + if (token.length() > 0 && !isIgnorableString(token.toString())) { + final String image; if (downcaseString) { - token = new StringBuilder(token.toString().toLowerCase(Locale.ROOT)); + image = token.toString().toLowerCase(Locale.ROOT); + } else { + image = token.toString(); } - // need to re-think how to link this - // if ( CPD.debugEnable ) { - // System.out.println("Token added:" + token.toString()); - // } - tokenEntries.add(new TokenEntry(token.toString(), tokens.getFileName(), lineNumber + 1, loc - token.length(), loc - 1)); + + tokenEntries.add(new TokenEntry(image, + tokens.getFileName(), + tokBeginLine + 1, + tokBeginCol + 1, + loc)); } } } tokenEntries.add(TokenEntry.getEOF()); } + /** + * Returns (0-based) EXclusive offset of the end of the token, + * may jump several lines (sets {@link #lineNumber} in this case). + */ private int getTokenFromLine(StringBuilder token, int loc) { + tokBeginLine = lineNumber; + tokBeginCol = loc; + for (int j = loc; j < currentLine.length(); j++) { char tok = currentLine.charAt(j); if (!Character.isWhitespace(tok) && !ignoreCharacter(tok)) { @@ -89,6 +105,9 @@ public abstract class AbstractTokenizer implements Tokenizer { } else { if (token.length() > 0) { return j; + } else { + // ignored char + tokBeginCol++; } } loc = j; @@ -125,14 +144,14 @@ public abstract class AbstractTokenizer implements Tokenizer { if (spanMultipleLinesLineContinuationCharacter != null && token.length() > 0 && token.charAt(token.length() - 1) == spanMultipleLinesLineContinuationCharacter) { - token.deleteCharAt(token.length() - 1); + token.setLength(token.length() - 1); } // parsing new line currentLine = code.get(++lineNumber); // Warning : recursive call ! loc = parseString(token, 0, stringDelimiter); } - return loc + 1; + return loc; } private boolean ignoreCharacter(char tok) { diff --git a/pmd-core/src/main/java/net/sourceforge/pmd/cpd/TokenEntry.java b/pmd-core/src/main/java/net/sourceforge/pmd/cpd/TokenEntry.java index 7246dc6acf..d39e5e93ca 100644 --- a/pmd-core/src/main/java/net/sourceforge/pmd/cpd/TokenEntry.java +++ b/pmd-core/src/main/java/net/sourceforge/pmd/cpd/TokenEntry.java @@ -62,6 +62,7 @@ public class TokenEntry implements Comparable { * @param endColumn the column number, 1-based */ public TokenEntry(String image, String tokenSrcID, int beginLine, int beginColumn, int endColumn) { + assert beginLine >= 1 && beginColumn >= 1 && endColumn >= 1 : "Coordinates are 1-based"; setImage(image); this.tokenSrcID = tokenSrcID; this.beginLine = beginLine; diff --git a/pmd-ruby/src/test/resources/net/sourceforge/pmd/lang/ruby/cpd/testdata/server.rb b/pmd-ruby/src/test/resources/net/sourceforge/pmd/lang/ruby/cpd/testdata/server.rb index fcff68239c..7e71208c69 100644 --- a/pmd-ruby/src/test/resources/net/sourceforge/pmd/lang/ruby/cpd/testdata/server.rb +++ b/pmd-ruby/src/test/resources/net/sourceforge/pmd/lang/ruby/cpd/testdata/server.rb @@ -1,7 +1,7 @@ require "socket" -gs = TCPServer.open(0) -addr = gs.addr +gs = TCPServer.open(0) +addr = gs.addr addr.shift while true diff --git a/pmd-ruby/src/test/resources/net/sourceforge/pmd/lang/ruby/cpd/testdata/server.txt b/pmd-ruby/src/test/resources/net/sourceforge/pmd/lang/ruby/cpd/testdata/server.txt index b54c81a53f..03466e6ece 100644 --- a/pmd-ruby/src/test/resources/net/sourceforge/pmd/lang/ruby/cpd/testdata/server.txt +++ b/pmd-ruby/src/test/resources/net/sourceforge/pmd/lang/ruby/cpd/testdata/server.txt @@ -1,45 +1,44 @@ [Image] or [Truncated image[ Bcol Ecol L1 - [require] 0 6 + [require] 1 7 ["socket"] 9 16 L3 - [gs] 0 1 - [=] 3 3 - [tcpserver.open] 5 18 - [0] 20 20 + [gs] 1 2 + [=] 4 4 + [tcpserver.open] 7 20 + [0] 22 22 L4 - [addr] 0 3 - [=] 5 5 - [gs.addr] 7 13 + [addr] 1 4 + [=] 7 7 + [gs.addr] 9 15 L5 - [addr.shift] 0 9 + [addr.shift] 1 10 L7 - [true] 6 9 + [true] 7 10 L8 - [ns] 2 3 - [=] 5 5 - [gs.accept] 7 15 + [ns] 3 4 + [=] 6 6 + [gs.accept] 8 16 L9 - [print] 2 6 - [ns] 8 9 + [print] 3 7 + [ns] 9 10 [" is accepted"] 13 26 L10 - [thread.start] 2 13 + [thread.start] 3 14 L11 - [s] 4 4 - [=] 6 6 - [ns] 8 9 - [ # save to dynamic variable] 31 57 + [s] 5 5 + [=] 7 7 + [ns] 9 10 + [ # save to dynamic variable] 33 58 L12 - [s.gets] 10 15 + [s.gets] 11 16 L13 - [s.write] 6 12 - [$_] 14 15 + [s.write] 7 13 + [$_] 15 16 L15 - [print] 4 8 - [s] 10 10 -L18 - [" is gone [ -46 39 + [print] 5 9 + [s] 11 11 + [" is gone [ 14 36 L19 - [s.close] 4 10 + [s.close] 5 11 EOF