Applied patch from Juan Jesús García de Soria on CPD

git-svn-id: https://pmd.svn.sourceforge.net/svnroot/pmd/branches/pmd/4.2.x@7106 51baf565-9d33-0410-a72c-fc3788e3496d
This commit is contained in:
Romain Pelisse
2010-07-24 14:12:52 +00:00
parent ffc48e9835
commit 19da0dfc1a
7 changed files with 72 additions and 159 deletions

View File

@ -11,6 +11,7 @@ Fixed bug 2835074 - False -: DoubleCheckedLocking with reversed null check
Fixed bug 2826119 - False +: DoubleCheckedLocking warning with volatile field
Fixed bug 2904832 - Type resolution not working for ASTType when using an inner class
Modify (and hopefully fixed) CPD algorithm thanks to a patch from Juan Jesús García de Soria.
Correct -benchmark reporting of Rule visits via the RuleChain
Fix issue with Type Resolution incorrectly handling of Classes with same name as a java.lang Class.
The JSP/JSF parser can now parse Unicode input.

View File

@ -6,6 +6,7 @@ package test.net.sourceforge.pmd.cpd;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
import net.sourceforge.pmd.cpd.Match;
import net.sourceforge.pmd.cpd.Renderer;
import net.sourceforge.pmd.cpd.TokenEntry;

View File

@ -6,18 +6,18 @@ package net.sourceforge.pmd.cpd;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;
import net.sourceforge.pmd.util.FileFinder;
public class CPD {
private Map<String, SourceCode> source = new HashMap<String, SourceCode>();
private Map<String, SourceCode> source = new TreeMap<String, SourceCode>();
private CPDListener listener = new CPDNullListener();
private Tokens tokens = new Tokens();
private int minimumTileSize;

View File

@ -14,10 +14,8 @@ public class Match implements Comparable<Match> {
private int tokenCount;
private int lineCount;
private Set<TokenEntry> markSet = new TreeSet<TokenEntry>();
private TokenEntry[] marks = new TokenEntry[2];
private String code;
private MatchCode mc;
private Set<TokenEntry> markSet = new TreeSet<TokenEntry>();
private String code;
private String label;
public static final Comparator<Match> MatchesComparator = new Comparator<Match>() {
@ -34,55 +32,25 @@ public class Match implements Comparable<Match> {
public static final Comparator<Match> LabelComparator = new Comparator<Match>() {
public int compare(Match ma, Match mb) {
if (ma.getLabel() == null) return 1;
if (mb.getLabel() == null) return -1;
if (ma.getLabel() == null) {
return 1;
}
if (mb.getLabel() == null) {
return -1;
}
return mb.getLabel().compareTo(ma.getLabel());
}
};
public static final Comparator<Match> LengthComparator = new Comparator<Match>() {
public static final Comparator<Match> LENGTH_COMPARATOR = new Comparator<Match>() {
public int compare(Match ma, Match mb) {
return mb.getLineCount() - ma.getLineCount();
}
};
public static class MatchCode {
private int first;
private int second;
public MatchCode() {
}
public MatchCode(TokenEntry m1, TokenEntry m2) {
first = m1.getIndex();
second = m2.getIndex();
}
public int hashCode() {
return first + 37 * second;
}
public boolean equals(Object other) {
MatchCode mc = (MatchCode) other;
return mc.first == first && mc.second == second;
}
public void setFirst(int first) {
this.first = first;
}
public void setSecond(int second) {
this.second = second;
}
}
public Match(int tokenCount, TokenEntry first, TokenEntry second) {
markSet.add(first);
markSet.add(second);
marks[0] = first;
marks[1] = second;
this.tokenCount = tokenCount;
}
@ -119,15 +87,15 @@ public class Match implements Comparable<Match> {
if (diff != 0) {
return diff;
}
return other.getFirstMark().getIndex() - getFirstMark().getIndex();
return getFirstMark().getIndex() - other.getFirstMark().getIndex();
}
public TokenEntry getFirstMark() {
return marks[0];
return getMark(0);
}
public TokenEntry getSecondMark() {
return marks[1];
return getMark(1);
}
public String toString() {
@ -138,15 +106,8 @@ public class Match implements Comparable<Match> {
return markSet;
}
public MatchCode getMatchCode() {
if (mc == null) {
mc = new MatchCode(marks[0], marks[1]);
}
return mc;
}
public int getEndIndex() {
return marks[1].getIndex() + getTokenCount() - 1;
return getMark(0).getIndex() + getTokenCount() - 1;
}
public void setMarkSet(Set<TokenEntry> markSet) {
@ -160,4 +121,17 @@ public class Match implements Comparable<Match> {
public String getLabel() {
return label;
}
public void addTokenEntry(TokenEntry entry){
markSet.add(entry);
}
private TokenEntry getMark(int index) {
TokenEntry result = null;
int i = 0;
for (Iterator<TokenEntry> it = markSet.iterator(); it.hasNext() && i < index + 1; ){
result = it.next();
}
return result;
}
}

View File

@ -73,16 +73,15 @@ public class MatchAlgorithm {
cpdListener.phaseUpdate(CPDListener.GROUPING);
matches = matchCollector.getMatches();
matchCollector = null;
for (Match match: matches) {
for (Iterator<TokenEntry> occurrences = match.iterator(); occurrences.hasNext();) {
for (Match match : matches) {
Iterator<TokenEntry> occurrences = match.iterator();
if (occurrences.hasNext()) {
TokenEntry mark = occurrences.next();
match.setLineCount(tokens.getLineCount(mark, match));
if (!occurrences.hasNext()) {
int start = mark.getBeginLine();
int end = start + match.getLineCount() - 1;
SourceCode sourceCode = source.get(mark.getTokenSrcID());
match.setSourceCodeSlice(sourceCode.getSlice(start, end));
}
int start = mark.getBeginLine();
int end = start + match.getLineCount() - 1;
SourceCode sourceCode = source.get(mark.getTokenSrcID());
match.setSourceCodeSlice(sourceCode.getSlice(start, end));
}
}
cpdListener.phaseUpdate(CPDListener.DONE);

View File

@ -5,18 +5,14 @@ package net.sourceforge.pmd.cpd;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;
public class MatchCollector {
private List<Match> matchList = new ArrayList<Match>();
private Map<Integer, Map<Integer, Match>> matchTree = new TreeMap<Integer, Map<Integer, Match>>();
private MatchAlgorithm ma;
private Map<Match.MatchCode, Match> startMap = new HashMap<Match.MatchCode, Match>();
private Map<String, List<Match>> fileMap = new HashMap<String, List<Match>>();
public MatchCollector(MatchAlgorithm ma) {
this.ma = ma;
@ -45,105 +41,46 @@ public class MatchCollector {
if (diff + dupes >= 1) {
continue;
}
determineMatch(mark1, mark2, dupes);
reportMatch(mark1, mark2, dupes);
}
}
}
public List<Match> getMatches() {
List<Match> matchList = new ArrayList<Match>(startMap.values());
Collections.sort(matchList);
Set<Match.MatchCode> matchSet = new HashSet<Match.MatchCode>();
Match.MatchCode matchCode = new Match.MatchCode();
for (int i = matchList.size(); i > 1; i--) {
Match match1 = matchList.get(i - 1);
TokenEntry mark1 = match1.getMarkSet().iterator().next();
matchSet.clear();
matchSet.add(match1.getMatchCode());
for (int j = i - 1; j > 0; j--) {
Match match2 = matchList.get(j - 1);
if (match1.getTokenCount() != match2.getTokenCount()) {
break;
}
TokenEntry mark2 = null;
for (Iterator<TokenEntry> iter = match2.getMarkSet().iterator(); iter.hasNext();) {
mark2 = iter.next();
if (mark2 != mark1) {
break;
}
}
int dupes = countDuplicateTokens(mark1, mark2);
if (dupes < match1.getTokenCount()) {
break;
}
matchSet.add(match2.getMatchCode());
match1.getMarkSet().addAll(match2.getMarkSet());
matchList.remove(i - 2);
i--;
}
if (matchSet.size() == 1) {
continue;
}
//prune the mark set
Set pruned = match1.getMarkSet();
boolean done = false;
ArrayList<TokenEntry> a1 = new ArrayList<TokenEntry>(match1.getMarkSet());
Collections.sort(a1);
for (int outer = 0; outer < a1.size() - 1 && !done; outer++) {
TokenEntry cmark1 = a1.get(outer);
for (int inner = outer + 1; inner < a1.size() && !done; inner++) {
TokenEntry cmark2 = a1.get(inner);
matchCode.setFirst(cmark1.getIndex());
matchCode.setSecond(cmark2.getIndex());
if (!matchSet.contains(matchCode)) {
if (pruned.size() > 2) {
pruned.remove(cmark2);
}
if (pruned.size() == 2) {
done = true;
}
}
}
private void reportMatch(TokenEntry mark1, TokenEntry mark2, int dupes) {
Map<Integer, Match> matches = matchTree.get(dupes);
if (matches == null) {
matches = new TreeMap<Integer, Match>();
matchTree.put(dupes, matches);
addNewMatch(mark1, mark2, dupes, matches);
} else {
Match matchA = matchTree.get(dupes).get(mark1.getIndex());
Match matchB = matchTree.get(dupes).get(mark2.getIndex());
if (matchA == null && matchB == null) {
addNewMatch(mark1, mark2, dupes, matches);
} else if(matchA == null) {
matchB.addTokenEntry(mark1);
matches.put(mark1.getIndex(), matchB);
} else if(matchB == null) {
matchA.addTokenEntry(mark2);
matches.put(mark2.getIndex(), matchA);
}
}
return matchList;
}
/**
* A greedy algorithm for determining non-overlapping matches
*/
private void determineMatch(TokenEntry mark1, TokenEntry mark2, int dupes) {
private void addNewMatch(TokenEntry mark1, TokenEntry mark2, int dupes, Map<Integer, Match> matches){
Match match = new Match(dupes, mark1, mark2);
String fileKey = mark1.getTokenSrcID() + mark2.getTokenSrcID();
List<Match> pairMatches = fileMap.get(fileKey);
if (pairMatches == null) {
pairMatches = new ArrayList<Match>();
fileMap.put(fileKey, pairMatches);
}
boolean add = true;
for (int i = 0; i < pairMatches.size(); i++) {
Match other = pairMatches.get(i);
if (other.getFirstMark().getIndex() + other.getTokenCount() - mark1.getIndex()
> 0) {
boolean ordered = other.getSecondMark().getIndex() - mark2.getIndex() < 0;
if ((ordered && (other.getEndIndex() - mark2.getIndex() > 0))
|| (!ordered && (match.getEndIndex() - other.getSecondMark().getIndex()) > 0)) {
if (other.getTokenCount() >= match.getTokenCount()) {
add = false;
break;
} else {
pairMatches.remove(i);
startMap.remove(other.getMatchCode());
}
}
}
}
if (add) {
pairMatches.add(match);
startMap.put(match.getMatchCode(), match);
}
matches.put(mark1.getIndex(), match);
matches.put(mark2.getIndex(), match);
matchList.add(match);
}
@SuppressWarnings("PMD.CompareObjectsWithEquals")
public List<Match> getMatches() {
Collections.sort(matchList);
return matchList;
}
private boolean hasPreviousDupe(TokenEntry mark1, TokenEntry mark2) {
if (mark1.getIndex() == 0) {
return false;

View File

@ -58,7 +58,8 @@
<subsection name="Contributors">
<ul>
<li>Nicolas Dordet - Fixed an issue on CloseResource</li>
<li>Sergey Pariev - Fixed an ugly ArrayIndexOutOfBoundsException in CPD for Ruby</li>
<li>Juan Jesús García de Soria - rework CPD algorithm</li>
<li>Sergey Pariev - Fixed an ugly ArrayIndexOutOfBoundsException in CPD for Ruby</li>
<li>Chris Heister - Reported and noted proper fix for bug in IDEAJ renderer operations</li>
<li>Ralf Wagner - Reported bug in UselessOperationOnImmutable, reported and noted proper fix for broken XSLT</li>
<li>Caroline Rioux - Reported bug in ImmutableField</li>