Tweaking this and that as I'm working thru CPD

git-svn-id: https://pmd.svn.sourceforge.net/svnroot/pmd/trunk@3777 51baf565-9d33-0410-a72c-fc3788e3496d
2005-08-15 14:02:16 +00:00
parent c93c4c17aa
commit 417ae695a7
4 changed files with 89 additions and 86 deletions
--- a/pmd/src/net/sourceforge/pmd/cpd/JavaTokenizer.java
+++ b/pmd/src/net/sourceforge/pmd/cpd/JavaTokenizer.java
@ -25,46 +25,48 @@ public class JavaTokenizer implements Tokenizer {
    }

    public void tokenize(SourceCode tokens, Tokens tokenEntries) {
-        StringBuffer sb = tokens.getCodeBuffer();
+        StringBuffer buffer = tokens.getCodeBuffer();

        /*
        I'm doing a sort of State pattern thing here where
        this goes into "discarding" mode when it hits an import or package
-        keyword and goes back into "accumulate mode when it hits a semicolon.
+        keyword and goes back into "accumulate mode" when it hits a semicolon.
        This could probably be turned into some objects.
        */
-        JavaParserTokenManager tokenMgr = new TargetJDK1_4().createJavaParserTokenManager(new StringReader(sb.toString()));
-        Token currToken = tokenMgr.getNextToken();
-        boolean discarding = false;
-        while (currToken.image.length() > 0) {
-            if (currToken.kind == JavaParserConstants.IMPORT || currToken.kind == JavaParserConstants.PACKAGE) {
-                discarding = true;
-                currToken = tokenMgr.getNextToken();
+
+        // TODO - allow for JDK 1.5 selection
+        JavaParserTokenManager tokenMgr = new TargetJDK1_4().createJavaParserTokenManager(new StringReader(buffer.toString()));
+        Token currentToken = tokenMgr.getNextToken();
+        boolean inDiscardingState = false;
+        while (currentToken.image.length() > 0) {
+            if (currentToken.kind == JavaParserConstants.IMPORT || currentToken.kind == JavaParserConstants.PACKAGE) {
+                inDiscardingState = true;
+                currentToken = tokenMgr.getNextToken();
                continue;
            }

-            if (discarding && currToken.kind == JavaParserConstants.SEMICOLON) {
-                discarding = false;
+            if (inDiscardingState && currentToken.kind == JavaParserConstants.SEMICOLON) {
+                inDiscardingState = false;
            }

-            if (discarding) {
-                currToken = tokenMgr.getNextToken();
+            if (inDiscardingState) {
+                currentToken = tokenMgr.getNextToken();
                continue;
            }

-            if (currToken.kind != JavaParserConstants.SEMICOLON) {
-                String image = currToken.image;
-                if (ignoreLiterals && (currToken.kind == JavaParserConstants.STRING_LITERAL || currToken.kind == JavaParserConstants.CHARACTER_LITERAL 
-                        || currToken.kind == JavaParserConstants.DECIMAL_LITERAL || currToken.kind == JavaParserConstants.FLOATING_POINT_LITERAL)) {
-                    image = String.valueOf(currToken.kind);
+            if (currentToken.kind != JavaParserConstants.SEMICOLON) {
+                String image = currentToken.image;
+                if (ignoreLiterals && (currentToken.kind == JavaParserConstants.STRING_LITERAL || currentToken.kind == JavaParserConstants.CHARACTER_LITERAL
+                        || currentToken.kind == JavaParserConstants.DECIMAL_LITERAL || currentToken.kind == JavaParserConstants.FLOATING_POINT_LITERAL)) {
+                    image = String.valueOf(currentToken.kind);
                }
-                if (ignoreIdentifiers && currToken.kind == JavaParserConstants.IDENTIFIER) {
-                    image = String.valueOf(currToken.kind);
+                if (ignoreIdentifiers && currentToken.kind == JavaParserConstants.IDENTIFIER) {
+                    image = String.valueOf(currentToken.kind);
                }
-                tokenEntries.add(new TokenEntry(image, tokens.getFileName(), currToken.beginLine));
+                tokenEntries.add(new TokenEntry(image, tokens.getFileName(), currentToken.beginLine));
            }

-            currToken = tokenMgr.getNextToken();
+            currentToken = tokenMgr.getNextToken();
        }
        tokenEntries.add(TokenEntry.getEOF());
    }
--- a/pmd/src/net/sourceforge/pmd/cpd/MatchAlgorithm.java
+++ b/pmd/src/net/sourceforge/pmd/cpd/MatchAlgorithm.java
@ -42,26 +42,36 @@ public class MatchAlgorithm {
        this.cpdListener = listener;
    }

+    public Iterator matches() {
+        return matches.iterator();
+    }
+
+    public TokenEntry tokenAt(int offset, TokenEntry m) {
+        return (TokenEntry) code.get(offset + m.getIndex());
+    }
+
+    public int getMinimumTileSize() {
+        return this.min;
+    }
+
    public void findMatches() {
        cpdListener.phaseUpdate(CPDListener.HASH);
        Map markGroups = hash();

        cpdListener.phaseUpdate(CPDListener.MATCH);
-        MatchCollector coll = new MatchCollector(this);
+        MatchCollector matchCollector = new MatchCollector(this);
        for (Iterator i = markGroups.values().iterator(); i.hasNext();) {
            Object o = i.next();
            if (o instanceof List) {
                Collections.reverse((List) o);
-                coll.collect(min, (List) o);
+                matchCollector.collect((List) o);
            }
            i.remove();
        }
-
        cpdListener.phaseUpdate(CPDListener.GROUPING);
-        matches = coll.getMatches();
-        coll = null;
-
-        for (Iterator i = matches(); i.hasNext();) {
+        matches = matchCollector.getMatches();
+        matchCollector = null;
+        for (Iterator i = matches.iterator(); i.hasNext();) {
            Match match = (Match) i.next();
            for (Iterator occurrences = match.iterator(); occurrences.hasNext();) {
                TokenEntry mark = (TokenEntry) occurrences.next();
@ -86,6 +96,9 @@ public class MatchAlgorithm {
                lastHash = MOD * lastHash + token.getIdentifier() - lastMod * last;
                token.setHashCode(lastHash);
                Object o = markGroups.get(token);
+
+                // Note that this insertion method is worthwhile since the vast majority
+                // markGroup keys will have only one value.
                if (o == null) {
                    markGroups.put(token, token);
                } else if (o instanceof TokenEntry) {
@ -110,13 +123,4 @@ public class MatchAlgorithm {
        }
        return markGroups;
    }
-
-    public Iterator matches() {
-        return matches.iterator();
-    }
-
-    public TokenEntry tokenAt(int offset, TokenEntry m) {
-        return (TokenEntry) code.get(offset + m.getIndex());
-    }
-
 }
--- a/pmd/src/net/sourceforge/pmd/cpd/MatchCollector.java
+++ b/pmd/src/net/sourceforge/pmd/cpd/MatchCollector.java
@ -22,25 +22,26 @@ public class MatchCollector {
        this.ma = ma;
    }

-    public void collect(int minimumLength, List marks) {
+    public void collect(List marks) {
        //first get a pairwise collection of all maximal matches
        for (int i = 0; i < marks.size() - 1; i++) {
            TokenEntry mark1 = (TokenEntry) marks.get(i);
            for (int j = i + 1; j < marks.size(); j++) {
                TokenEntry mark2 = (TokenEntry) marks.get(j);
                int diff = mark1.getIndex() - mark2.getIndex();
-                if (-diff < minimumLength) {
+                if (-diff < ma.getMinimumTileSize()) {
                    continue;
                }
                if (hasPreviousDupe(mark1, mark2)) {
                    continue;
                }
+
+                // "match too small" check
                int dupes = countDuplicateTokens(mark1, mark2);
-                //false positive check
-                if (dupes < minimumLength) {
+                if (dupes < ma.getMinimumTileSize()) {
                    continue;
                }
-                //is it still too close together
+                // is it still too close together
                if (diff + dupes >= 1) {
                    continue;
                }
@ -50,49 +51,9 @@ public class MatchCollector {
    }

    public List getMatches() {
-        ArrayList matchList = new ArrayList(startMap.values());
-        groupMatches(matchList);
-        return matchList;
-    }
-
-    /**
-     * A greedy algorithm for determining non-overlapping matches
-     */
-    private void determineMatch(TokenEntry mark1, TokenEntry mark2, int dupes) {
-        Match match = new Match(dupes, mark1, mark2);
-        String fileKey = mark1.getTokenSrcID() + mark2.getTokenSrcID();
-        ArrayList pairMatches = (ArrayList) fileMap.get(fileKey);
-        if (pairMatches == null) {
-            pairMatches = new ArrayList();
-            fileMap.put(fileKey, pairMatches);
-        }
-        boolean add = true;
-        for (int k = 0; k < pairMatches.size(); k++) {
-            Match other = (Match) pairMatches.get(k);
-            if (other.getFirstMark().getIndex() + other.getTokenCount() - mark1.getIndex()
-                    > 0) {
-                boolean ordered = other.getSecondMark().getIndex() - mark2.getIndex() < 0;
-                if ((ordered && (other.getEndIndex() - mark2.getIndex() > 0))
-                        || (!ordered && (match.getEndIndex() - other.getSecondMark().getIndex()) > 0)) {
-                    if (other.getTokenCount() >= match.getTokenCount()) {
-                        add = false;
-                        break;
-                    } else {
-                        pairMatches.remove(k);
-                        startMap.remove(other.getMatchCode());
-                    }
-                }
-            }
-        }
-        if (add) {
-            pairMatches.add(match);
-            startMap.put(match.getMatchCode(), match);
-        }
-    }
-
-    private void groupMatches(ArrayList matchList) {
+        List matchList = new ArrayList(startMap.values());
        Collections.sort(matchList);
-        HashSet matchSet = new HashSet();
+        Set matchSet = new HashSet();
        Match.MatchCode matchCode = new Match.MatchCode();
        for (int i = matchList.size(); i > 1; i--) {
            Match match1 = (Match) matchList.get(i - 1);
@ -145,6 +106,42 @@ public class MatchCollector {
                }
            }
        }
+        return matchList;
+    }
+
+    /**
+     * A greedy algorithm for determining non-overlapping matches
+     */
+    private void determineMatch(TokenEntry mark1, TokenEntry mark2, int dupes) {
+        Match match = new Match(dupes, mark1, mark2);
+        String fileKey = mark1.getTokenSrcID() + mark2.getTokenSrcID();
+        List pairMatches = (ArrayList) fileMap.get(fileKey);
+        if (pairMatches == null) {
+            pairMatches = new ArrayList();
+            fileMap.put(fileKey, pairMatches);
+        }
+        boolean add = true;
+        for (int i = 0; i < pairMatches.size(); i++) {
+            Match other = (Match) pairMatches.get(i);
+            if (other.getFirstMark().getIndex() + other.getTokenCount() - mark1.getIndex()
+                    > 0) {
+                boolean ordered = other.getSecondMark().getIndex() - mark2.getIndex() < 0;
+                if ((ordered && (other.getEndIndex() - mark2.getIndex() > 0))
+                        || (!ordered && (match.getEndIndex() - other.getSecondMark().getIndex()) > 0)) {
+                    if (other.getTokenCount() >= match.getTokenCount()) {
+                        add = false;
+                        break;
+                    } else {
+                        pairMatches.remove(i);
+                        startMap.remove(other.getMatchCode());
+                    }
+                }
+            }
+        }
+        if (add) {
+            pairMatches.add(match);
+            startMap.put(match.getMatchCode(), match);
+        }
    }

    private boolean hasPreviousDupe(TokenEntry mark1, TokenEntry mark2) {
--- a/pmd/src/net/sourceforge/pmd/dfa/DataFlowFacade.java
+++ b/pmd/src/net/sourceforge/pmd/dfa/DataFlowFacade.java
@ -12,7 +12,7 @@ import net.sourceforge.pmd.dfa.variableaccess.VariableAccessVisitor;
 /**
 * @author raik
 *
- * What about initializers?  This only process methods and constructors
+ * What about initializers?  This only processes methods and constructors
 */
 public class DataFlowFacade extends JavaParserVisitorAdapter {