Properly handle multiple matches once again
This commit is contained in:
@@ -6,15 +6,19 @@ package net.sourceforge.pmd.cpd;
|
|||||||
|
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
|
import java.util.HashSet;
|
||||||
import java.util.Iterator;
|
import java.util.Iterator;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
import java.util.Set;
|
||||||
import java.util.TreeMap;
|
import java.util.TreeMap;
|
||||||
|
|
||||||
class MatchCollector {
|
class MatchCollector {
|
||||||
|
|
||||||
private final List<Match> matchList = new ArrayList<>();
|
private final Map<Integer, List<Match>> matchTree = new TreeMap<>();
|
||||||
private final Map<Integer, List<Match>> matchTree = new HashMap<>();
|
|
||||||
|
private final Map<Integer, Set<Integer>> tokenMatchSets = new HashMap<>();
|
||||||
|
|
||||||
private final MatchAlgorithm ma;
|
private final MatchAlgorithm ma;
|
||||||
|
|
||||||
MatchCollector(MatchAlgorithm ma) {
|
MatchCollector(MatchAlgorithm ma) {
|
||||||
@@ -50,42 +54,60 @@ class MatchCollector {
|
|||||||
}
|
}
|
||||||
|
|
||||||
private void reportMatch(TokenEntry mark1, TokenEntry mark2, int dupes) {
|
private void reportMatch(TokenEntry mark1, TokenEntry mark2, int dupes) {
|
||||||
matchTree.compute(mark1.getIndex(), (m1Index, matches) -> {
|
/*
|
||||||
if (matches == null) {
|
* Check if the match is previously know. This can happen when a snippet is duplicated more than once.
|
||||||
matches = new ArrayList<>();
|
* If A, B and C are identical snippets, MatchAlgorithm will find the matching pairs:
|
||||||
addNewMatch(mark1, mark2, dupes, matches);
|
* - AB
|
||||||
} else {
|
* - AC
|
||||||
Iterator<Match> matchIterator = matches.iterator();
|
* - BC
|
||||||
while (matchIterator.hasNext()) {
|
* It should be reduced to a single match with 3 marks
|
||||||
Match m = matchIterator.next();
|
*/
|
||||||
TokenEntry otherEnd = m.getSecondMark().getToken();
|
if (tokenMatchSets.computeIfAbsent(mark1.getIndex(), HashSet::new).contains(mark2.getIndex())) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
// does the new match supersedes this one?
|
List<Match> matches = matchTree.computeIfAbsent(mark1.getIndex(), ArrayList::new);
|
||||||
if (otherEnd.getIndex() < mark2.getIndex() && otherEnd.getIndex() + m.getTokenCount() >= mark2.getIndex() + dupes) {
|
Iterator<Match> matchIterator = matches.iterator();
|
||||||
// this match is embedded in the previous one… ignore it.
|
while (matchIterator.hasNext()) {
|
||||||
return matches;
|
Match m = matchIterator.next();
|
||||||
} else if (mark2.getIndex() < otherEnd.getIndex() && mark2.getIndex() + dupes >= otherEnd.getIndex() + m.getTokenCount()) {
|
TokenEntry otherEnd = m.getSecondMark().getToken(); // TODO : this only works for mark1 being the key
|
||||||
// the new match is longer and overlaps with the old one - replace it
|
|
||||||
matchIterator.remove();
|
|
||||||
matchList.remove(m);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
addNewMatch(mark1, mark2, dupes, matches);
|
// does the new match supersedes this one?
|
||||||
|
if (otherEnd.getIndex() < mark2.getIndex() && otherEnd.getIndex() + m.getTokenCount() >= mark2.getIndex() + dupes) {
|
||||||
|
// this match is embedded in the previous one… ignore it.
|
||||||
|
return;
|
||||||
|
} else if (mark2.getIndex() < otherEnd.getIndex() && mark2.getIndex() + dupes >= otherEnd.getIndex() + m.getTokenCount()) {
|
||||||
|
// the new match is longer and overlaps with the old one - replace it
|
||||||
|
matchIterator.remove();
|
||||||
|
break;
|
||||||
|
} else if (dupes == m.getTokenCount()) {
|
||||||
|
// we found yet another exact match of the same snippet. Roll it together
|
||||||
|
|
||||||
|
// Add this adjacency to all combinations
|
||||||
|
m.iterator().forEachRemaining(other -> registerTokenMatch(other.getToken(), mark2));
|
||||||
|
|
||||||
|
m.addMark(mark2);
|
||||||
|
return;
|
||||||
}
|
}
|
||||||
return matches;
|
}
|
||||||
});
|
|
||||||
|
// this is a new match, add it
|
||||||
|
matches.add(new Match(dupes, mark1, mark2));
|
||||||
|
|
||||||
|
// add matches in both directions
|
||||||
|
registerTokenMatch(mark1, mark2);
|
||||||
}
|
}
|
||||||
|
|
||||||
private void addNewMatch(TokenEntry mark1, TokenEntry mark2, int dupes, List<Match> matches) {
|
private void registerTokenMatch(TokenEntry mark1, TokenEntry mark2) {
|
||||||
Match match = new Match(dupes, mark1, mark2);
|
tokenMatchSets.computeIfAbsent(mark1.getIndex(), HashSet::new).add(mark2.getIndex());
|
||||||
matches.add(match);
|
tokenMatchSets.computeIfAbsent(mark2.getIndex(), HashSet::new).add(mark1.getIndex());
|
||||||
matchList.add(match);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
List<Match> getMatches() {
|
List<Match> getMatches() {
|
||||||
return matchList;
|
return matchTree.values().stream().reduce(new ArrayList<>(), (acc, matches) -> {
|
||||||
|
acc.addAll(matches);
|
||||||
|
return acc;
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
private boolean hasPreviousDupe(TokenEntry mark1, TokenEntry mark2) {
|
private boolean hasPreviousDupe(TokenEntry mark1, TokenEntry mark2) {
|
||||||
|
Reference in New Issue
Block a user