Properly handle multiple matches once again
This commit is contained in:
@ -6,15 +6,19 @@ package net.sourceforge.pmd.cpd;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.TreeMap;
|
||||
|
||||
class MatchCollector {
|
||||
|
||||
private final List<Match> matchList = new ArrayList<>();
|
||||
private final Map<Integer, List<Match>> matchTree = new HashMap<>();
|
||||
private final Map<Integer, List<Match>> matchTree = new TreeMap<>();
|
||||
|
||||
private final Map<Integer, Set<Integer>> tokenMatchSets = new HashMap<>();
|
||||
|
||||
private final MatchAlgorithm ma;
|
||||
|
||||
MatchCollector(MatchAlgorithm ma) {
|
||||
@ -50,42 +54,60 @@ class MatchCollector {
|
||||
}
|
||||
|
||||
private void reportMatch(TokenEntry mark1, TokenEntry mark2, int dupes) {
|
||||
matchTree.compute(mark1.getIndex(), (m1Index, matches) -> {
|
||||
if (matches == null) {
|
||||
matches = new ArrayList<>();
|
||||
addNewMatch(mark1, mark2, dupes, matches);
|
||||
} else {
|
||||
Iterator<Match> matchIterator = matches.iterator();
|
||||
while (matchIterator.hasNext()) {
|
||||
Match m = matchIterator.next();
|
||||
TokenEntry otherEnd = m.getSecondMark().getToken();
|
||||
/*
|
||||
* Check if the match is previously know. This can happen when a snippet is duplicated more than once.
|
||||
* If A, B and C are identical snippets, MatchAlgorithm will find the matching pairs:
|
||||
* - AB
|
||||
* - AC
|
||||
* - BC
|
||||
* It should be reduced to a single match with 3 marks
|
||||
*/
|
||||
if (tokenMatchSets.computeIfAbsent(mark1.getIndex(), HashSet::new).contains(mark2.getIndex())) {
|
||||
return;
|
||||
}
|
||||
|
||||
// does the new match supersedes this one?
|
||||
if (otherEnd.getIndex() < mark2.getIndex() && otherEnd.getIndex() + m.getTokenCount() >= mark2.getIndex() + dupes) {
|
||||
// this match is embedded in the previous one… ignore it.
|
||||
return matches;
|
||||
} else if (mark2.getIndex() < otherEnd.getIndex() && mark2.getIndex() + dupes >= otherEnd.getIndex() + m.getTokenCount()) {
|
||||
// the new match is longer and overlaps with the old one - replace it
|
||||
matchIterator.remove();
|
||||
matchList.remove(m);
|
||||
break;
|
||||
}
|
||||
}
|
||||
List<Match> matches = matchTree.computeIfAbsent(mark1.getIndex(), ArrayList::new);
|
||||
Iterator<Match> matchIterator = matches.iterator();
|
||||
while (matchIterator.hasNext()) {
|
||||
Match m = matchIterator.next();
|
||||
TokenEntry otherEnd = m.getSecondMark().getToken(); // TODO : this only works for mark1 being the key
|
||||
|
||||
addNewMatch(mark1, mark2, dupes, matches);
|
||||
// does the new match supersedes this one?
|
||||
if (otherEnd.getIndex() < mark2.getIndex() && otherEnd.getIndex() + m.getTokenCount() >= mark2.getIndex() + dupes) {
|
||||
// this match is embedded in the previous one… ignore it.
|
||||
return;
|
||||
} else if (mark2.getIndex() < otherEnd.getIndex() && mark2.getIndex() + dupes >= otherEnd.getIndex() + m.getTokenCount()) {
|
||||
// the new match is longer and overlaps with the old one - replace it
|
||||
matchIterator.remove();
|
||||
break;
|
||||
} else if (dupes == m.getTokenCount()) {
|
||||
// we found yet another exact match of the same snippet. Roll it together
|
||||
|
||||
// Add this adjacency to all combinations
|
||||
m.iterator().forEachRemaining(other -> registerTokenMatch(other.getToken(), mark2));
|
||||
|
||||
m.addMark(mark2);
|
||||
return;
|
||||
}
|
||||
return matches;
|
||||
});
|
||||
}
|
||||
|
||||
// this is a new match, add it
|
||||
matches.add(new Match(dupes, mark1, mark2));
|
||||
|
||||
// add matches in both directions
|
||||
registerTokenMatch(mark1, mark2);
|
||||
}
|
||||
|
||||
private void addNewMatch(TokenEntry mark1, TokenEntry mark2, int dupes, List<Match> matches) {
|
||||
Match match = new Match(dupes, mark1, mark2);
|
||||
matches.add(match);
|
||||
matchList.add(match);
|
||||
private void registerTokenMatch(TokenEntry mark1, TokenEntry mark2) {
|
||||
tokenMatchSets.computeIfAbsent(mark1.getIndex(), HashSet::new).add(mark2.getIndex());
|
||||
tokenMatchSets.computeIfAbsent(mark2.getIndex(), HashSet::new).add(mark1.getIndex());
|
||||
}
|
||||
|
||||
List<Match> getMatches() {
|
||||
return matchList;
|
||||
return matchTree.values().stream().reduce(new ArrayList<>(), (acc, matches) -> {
|
||||
acc.addAll(matches);
|
||||
return acc;
|
||||
});
|
||||
}
|
||||
|
||||
private boolean hasPreviousDupe(TokenEntry mark1, TokenEntry mark2) {
|
||||
|
Reference in New Issue
Block a user