Applied patch from Juan JesÃºs GarcÃa de Soria on CPD

git-svn-id: https://pmd.svn.sourceforge.net/svnroot/pmd/branches/pmd/4.2.x@7106 51baf565-9d33-0410-a72c-fc3788e3496d
2010-07-24 14:12:52 +00:00 · 2010-07-24 14:12:52 +00:00 · 19da0dfc1a
commit 19da0dfc1a
parent ffc48e9835
7 changed files with 72 additions and 159 deletions
--- a/pmd/etc/changelog.txt
+++ b/pmd/etc/changelog.txt
@ -11,6 +11,7 @@ Fixed bug 2835074 - False -: DoubleCheckedLocking with reversed null check
 Fixed bug 2826119 - False +: DoubleCheckedLocking warning with volatile field
 Fixed bug 2904832 - Type resolution not working for ASTType when using an inner class

+Modify (and hopefully fixed) CPD algorithm thanks to a patch from Juan JesÃºs GarcÃa de Soria.
 Correct -benchmark reporting of Rule visits via the RuleChain
 Fix issue with Type Resolution incorrectly handling of Classes with same name as a java.lang Class.
 The JSP/JSF parser can now parse Unicode input.
--- a/pmd/regress/test/net/sourceforge/pmd/cpd/XMLRendererTest.java
+++ b/pmd/regress/test/net/sourceforge/pmd/cpd/XMLRendererTest.java
@ -6,6 +6,7 @@ package test.net.sourceforge.pmd.cpd;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertTrue;
 import static org.junit.Assert.fail;
+
 import net.sourceforge.pmd.cpd.Match;
 import net.sourceforge.pmd.cpd.Renderer;
 import net.sourceforge.pmd.cpd.TokenEntry;
--- a/pmd/src/net/sourceforge/pmd/cpd/CPD.java
+++ b/pmd/src/net/sourceforge/pmd/cpd/CPD.java
@ -6,18 +6,18 @@ package net.sourceforge.pmd.cpd;
 import java.io.File;
 import java.io.FileNotFoundException;
 import java.io.IOException;
-import java.util.HashMap;
 import java.util.HashSet;
 import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
 import java.util.Set;
+import java.util.TreeMap;

 import net.sourceforge.pmd.util.FileFinder;

 public class CPD {

-    private Map<String, SourceCode> source = new HashMap<String, SourceCode>();
+    private Map<String, SourceCode> source = new TreeMap<String, SourceCode>();
    private CPDListener listener = new CPDNullListener();
    private Tokens tokens = new Tokens();
    private int minimumTileSize;
--- a/pmd/src/net/sourceforge/pmd/cpd/Match.java
+++ b/pmd/src/net/sourceforge/pmd/cpd/Match.java
@ -14,10 +14,8 @@ public class Match implements Comparable<Match> {

    private int tokenCount;
    private int lineCount;
-    private Set<TokenEntry> markSet = new TreeSet<TokenEntry>();
-    private TokenEntry[] marks = new TokenEntry[2];
-    private String code;
-    private MatchCode mc;
+    private Set<TokenEntry> markSet = new TreeSet<TokenEntry>();    
+    private String code;    
    private String label;
    
    public static final Comparator<Match> MatchesComparator = new Comparator<Match>() {
@ -34,55 +32,25 @@ public class Match implements Comparable<Match> {
    
    public static final Comparator<Match> LabelComparator = new Comparator<Match>() {
    	public int compare(Match ma, Match mb) {
-    		if (ma.getLabel() == null) return 1;
-    		if (mb.getLabel() == null) return -1;
+    		if (ma.getLabel() == null) {
+    		    return 1;
+    		}
+    		if (mb.getLabel() == null) {
+    		    return -1;
+    		}
    		return mb.getLabel().compareTo(ma.getLabel());
    	}
    };
    
-    public static final Comparator<Match> LengthComparator = new Comparator<Match>() {
+    public static final Comparator<Match> LENGTH_COMPARATOR = new Comparator<Match>() {
        public int compare(Match ma, Match mb) {
            return mb.getLineCount() - ma.getLineCount();
        }
    };
-    
-    public static class MatchCode {
-
-        private int first;
-        private int second;
-
-        public MatchCode() {
-        }
-
-        public MatchCode(TokenEntry m1, TokenEntry m2) {
-            first = m1.getIndex();
-            second = m2.getIndex();
-        }
-
-        public int hashCode() {
-            return first + 37 * second;
-        }
-
-        public boolean equals(Object other) {
-            MatchCode mc = (MatchCode) other;
-            return mc.first == first && mc.second == second;
-        }
-
-        public void setFirst(int first) {
-            this.first = first;
-        }
-
-        public void setSecond(int second) {
-            this.second = second;
-        }
-
-    }

    public Match(int tokenCount, TokenEntry first, TokenEntry second) {
        markSet.add(first);
        markSet.add(second);
-        marks[0] = first;
-        marks[1] = second;
        this.tokenCount = tokenCount;
    }

@ -119,15 +87,15 @@ public class Match implements Comparable<Match> {
        if (diff != 0) {
            return diff;
        }
-        return other.getFirstMark().getIndex() - getFirstMark().getIndex();
+        return getFirstMark().getIndex() - other.getFirstMark().getIndex();
    }

    public TokenEntry getFirstMark() {
-        return marks[0];
+        return getMark(0);
    }

    public TokenEntry getSecondMark() {
-        return marks[1];
+        return getMark(1);
    }

    public String toString() {
@ -138,15 +106,8 @@ public class Match implements Comparable<Match> {
        return markSet;
    }

-    public MatchCode getMatchCode() {
-        if (mc == null) {
-            mc = new MatchCode(marks[0], marks[1]);
-        }
-        return mc;
-    }
-
    public int getEndIndex() {
-        return marks[1].getIndex() + getTokenCount() - 1;
+        return getMark(0).getIndex() + getTokenCount() - 1;
    }

    public void setMarkSet(Set<TokenEntry> markSet) {
@ -160,4 +121,17 @@ public class Match implements Comparable<Match> {
    public String getLabel() {
    	return label;
    }
+    
+    public void addTokenEntry(TokenEntry entry){
+        markSet.add(entry);                
+    }
+    
+    private TokenEntry getMark(int index) {
+        TokenEntry result = null;
+        int i = 0;
+        for (Iterator<TokenEntry> it = markSet.iterator(); it.hasNext() && i < index + 1; ){            
+            result = it.next();
+        }
+        return result;
+    }
 }
--- a/pmd/src/net/sourceforge/pmd/cpd/MatchAlgorithm.java
+++ b/pmd/src/net/sourceforge/pmd/cpd/MatchAlgorithm.java
@ -73,16 +73,15 @@ public class MatchAlgorithm {
        cpdListener.phaseUpdate(CPDListener.GROUPING);
        matches = matchCollector.getMatches();
        matchCollector = null;
-        for (Match match: matches) {
-            for (Iterator<TokenEntry> occurrences = match.iterator(); occurrences.hasNext();) {
+        for (Match match : matches) {
+            Iterator<TokenEntry> occurrences = match.iterator();
+            if (occurrences.hasNext()) {
                TokenEntry mark = occurrences.next();
                match.setLineCount(tokens.getLineCount(mark, match));
-                if (!occurrences.hasNext()) {
-                    int start = mark.getBeginLine();
-                    int end = start + match.getLineCount() - 1;
-                    SourceCode sourceCode = source.get(mark.getTokenSrcID());
-                    match.setSourceCodeSlice(sourceCode.getSlice(start, end));
-                }
+                int start = mark.getBeginLine();
+                int end = start + match.getLineCount() - 1;
+                SourceCode sourceCode = source.get(mark.getTokenSrcID());
+                match.setSourceCodeSlice(sourceCode.getSlice(start, end));
            }
        }
        cpdListener.phaseUpdate(CPDListener.DONE);
--- a/pmd/src/net/sourceforge/pmd/cpd/MatchCollector.java
+++ b/pmd/src/net/sourceforge/pmd/cpd/MatchCollector.java
@ -5,18 +5,14 @@ package net.sourceforge.pmd.cpd;

 import java.util.ArrayList;
 import java.util.Collections;
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
-import java.util.Set;
+import java.util.TreeMap;

 public class MatchCollector {
-
+    private List<Match> matchList = new ArrayList<Match>();
+    private Map<Integer, Map<Integer, Match>> matchTree = new TreeMap<Integer, Map<Integer, Match>>();
    private MatchAlgorithm ma;
-    private Map<Match.MatchCode, Match> startMap = new HashMap<Match.MatchCode, Match>();
-    private Map<String, List<Match>> fileMap = new HashMap<String, List<Match>>();

    public MatchCollector(MatchAlgorithm ma) {
        this.ma = ma;
@ -45,105 +41,46 @@ public class MatchCollector {
                if (diff + dupes >= 1) {
                    continue;
                }
-                determineMatch(mark1, mark2, dupes);
+                reportMatch(mark1, mark2, dupes);
            }
        }
    }

-    public List<Match> getMatches() {
-        List<Match> matchList = new ArrayList<Match>(startMap.values());
-        Collections.sort(matchList);
-        Set<Match.MatchCode> matchSet = new HashSet<Match.MatchCode>();
-        Match.MatchCode matchCode = new Match.MatchCode();
-        for (int i = matchList.size(); i > 1; i--) {
-            Match match1 = matchList.get(i - 1);
-            TokenEntry mark1 = match1.getMarkSet().iterator().next();
-            matchSet.clear();
-            matchSet.add(match1.getMatchCode());
-            for (int j = i - 1; j > 0; j--) {
-                Match match2 = matchList.get(j - 1);
-                if (match1.getTokenCount() != match2.getTokenCount()) {
-                    break;
-                }
-                TokenEntry mark2 = null;
-                for (Iterator<TokenEntry> iter = match2.getMarkSet().iterator(); iter.hasNext();) {
-                    mark2 = iter.next();
-                    if (mark2 != mark1) {
-                        break;
-                    }
-                }
-                int dupes = countDuplicateTokens(mark1, mark2);
-                if (dupes < match1.getTokenCount()) {
-                    break;
-                }
-                matchSet.add(match2.getMatchCode());
-                match1.getMarkSet().addAll(match2.getMarkSet());
-                matchList.remove(i - 2);
-                i--;
-            }
-            if (matchSet.size() == 1) {
-                continue;
-            }
-            //prune the mark set
-            Set pruned = match1.getMarkSet();
-            boolean done = false;
-            ArrayList<TokenEntry> a1 = new ArrayList<TokenEntry>(match1.getMarkSet());
-            Collections.sort(a1);
-            for (int outer = 0; outer < a1.size() - 1 && !done; outer++) {
-                TokenEntry cmark1 = a1.get(outer);
-                for (int inner = outer + 1; inner < a1.size() && !done; inner++) {
-                    TokenEntry cmark2 = a1.get(inner);
-                    matchCode.setFirst(cmark1.getIndex());
-                    matchCode.setSecond(cmark2.getIndex());
-                    if (!matchSet.contains(matchCode)) {
-                        if (pruned.size() > 2) {
-                            pruned.remove(cmark2);
-                        }
-                        if (pruned.size() == 2) {
-                            done = true;
-                        }
-                    }
-                }
+    private void reportMatch(TokenEntry mark1, TokenEntry mark2, int dupes) {
+        Map<Integer, Match> matches = matchTree.get(dupes);
+        if (matches == null) {            
+            matches = new TreeMap<Integer, Match>();
+            matchTree.put(dupes, matches);
+            addNewMatch(mark1, mark2, dupes, matches);
+        } else {
+            Match matchA = matchTree.get(dupes).get(mark1.getIndex());
+            Match matchB = matchTree.get(dupes).get(mark2.getIndex());
+
+            if (matchA == null && matchB == null) {
+                addNewMatch(mark1, mark2, dupes, matches);
+            } else if(matchA == null) {
+                matchB.addTokenEntry(mark1);
+                matches.put(mark1.getIndex(), matchB);
+            } else if(matchB == null) {
+                matchA.addTokenEntry(mark2);
+                matches.put(mark2.getIndex(), matchA);
            }
        }
-        return matchList;
    }
-
-    /**
-     * A greedy algorithm for determining non-overlapping matches
-     */
-    private void determineMatch(TokenEntry mark1, TokenEntry mark2, int dupes) {
+    
+    private void addNewMatch(TokenEntry mark1, TokenEntry mark2, int dupes, Map<Integer, Match> matches){
        Match match = new Match(dupes, mark1, mark2);
-        String fileKey = mark1.getTokenSrcID() + mark2.getTokenSrcID();
-        List<Match> pairMatches = fileMap.get(fileKey);
-        if (pairMatches == null) {
-            pairMatches = new ArrayList<Match>();
-            fileMap.put(fileKey, pairMatches);
-        }
-        boolean add = true;
-        for (int i = 0; i < pairMatches.size(); i++) {
-            Match other = pairMatches.get(i);
-            if (other.getFirstMark().getIndex() + other.getTokenCount() - mark1.getIndex()
-                    > 0) {
-                boolean ordered = other.getSecondMark().getIndex() - mark2.getIndex() < 0;
-                if ((ordered && (other.getEndIndex() - mark2.getIndex() > 0))
-                        || (!ordered && (match.getEndIndex() - other.getSecondMark().getIndex()) > 0)) {
-                    if (other.getTokenCount() >= match.getTokenCount()) {
-                        add = false;
-                        break;
-                    } else {
-                        pairMatches.remove(i);
-                        startMap.remove(other.getMatchCode());
-                    }
-                }
-            }
-        }
-        if (add) {
-            pairMatches.add(match);
-            startMap.put(match.getMatchCode(), match);
-        }
+        matches.put(mark1.getIndex(), match);
+        matches.put(mark2.getIndex(), match);
+        matchList.add(match);        
    }

+    @SuppressWarnings("PMD.CompareObjectsWithEquals")
+    public List<Match> getMatches() {
+        Collections.sort(matchList);
+        return matchList;
+    }    
+
    private boolean hasPreviousDupe(TokenEntry mark1, TokenEntry mark2) {
        if (mark1.getIndex() == 0) {
            return false;
--- a/pmd/xdocs/credits.xml
+++ b/pmd/xdocs/credits.xml
@ -58,7 +58,8 @@
        <subsection name="Contributors">
        <ul>
            <li>Nicolas Dordet - Fixed an issue on CloseResource</li>
-	    <li>Sergey Pariev - Fixed an ugly ArrayIndexOutOfBoundsException in CPD for Ruby</li>
+	    <li>Juan JesÃºs GarcÃa de Soria - rework CPD algorithm</li>
+            <li>Sergey Pariev - Fixed an ugly ArrayIndexOutOfBoundsException in CPD for Ruby</li>
            <li>Chris Heister - Reported and noted proper fix for bug in IDEAJ renderer operations</li>
            <li>Ralf Wagner - Reported bug in UselessOperationOnImmutable, reported and noted proper fix for broken XSLT</li>
            <li>Caroline Rioux - Reported bug in ImmutableField</li>