Tweaking this and that as I'm working thru CPD
git-svn-id: https://pmd.svn.sourceforge.net/svnroot/pmd/trunk@3777 51baf565-9d33-0410-a72c-fc3788e3496d
This commit is contained in:
@ -25,46 +25,48 @@ public class JavaTokenizer implements Tokenizer {
|
||||
}
|
||||
|
||||
public void tokenize(SourceCode tokens, Tokens tokenEntries) {
|
||||
StringBuffer sb = tokens.getCodeBuffer();
|
||||
StringBuffer buffer = tokens.getCodeBuffer();
|
||||
|
||||
/*
|
||||
I'm doing a sort of State pattern thing here where
|
||||
this goes into "discarding" mode when it hits an import or package
|
||||
keyword and goes back into "accumulate mode when it hits a semicolon.
|
||||
keyword and goes back into "accumulate mode" when it hits a semicolon.
|
||||
This could probably be turned into some objects.
|
||||
*/
|
||||
JavaParserTokenManager tokenMgr = new TargetJDK1_4().createJavaParserTokenManager(new StringReader(sb.toString()));
|
||||
Token currToken = tokenMgr.getNextToken();
|
||||
boolean discarding = false;
|
||||
while (currToken.image.length() > 0) {
|
||||
if (currToken.kind == JavaParserConstants.IMPORT || currToken.kind == JavaParserConstants.PACKAGE) {
|
||||
discarding = true;
|
||||
currToken = tokenMgr.getNextToken();
|
||||
|
||||
// TODO - allow for JDK 1.5 selection
|
||||
JavaParserTokenManager tokenMgr = new TargetJDK1_4().createJavaParserTokenManager(new StringReader(buffer.toString()));
|
||||
Token currentToken = tokenMgr.getNextToken();
|
||||
boolean inDiscardingState = false;
|
||||
while (currentToken.image.length() > 0) {
|
||||
if (currentToken.kind == JavaParserConstants.IMPORT || currentToken.kind == JavaParserConstants.PACKAGE) {
|
||||
inDiscardingState = true;
|
||||
currentToken = tokenMgr.getNextToken();
|
||||
continue;
|
||||
}
|
||||
|
||||
if (discarding && currToken.kind == JavaParserConstants.SEMICOLON) {
|
||||
discarding = false;
|
||||
if (inDiscardingState && currentToken.kind == JavaParserConstants.SEMICOLON) {
|
||||
inDiscardingState = false;
|
||||
}
|
||||
|
||||
if (discarding) {
|
||||
currToken = tokenMgr.getNextToken();
|
||||
if (inDiscardingState) {
|
||||
currentToken = tokenMgr.getNextToken();
|
||||
continue;
|
||||
}
|
||||
|
||||
if (currToken.kind != JavaParserConstants.SEMICOLON) {
|
||||
String image = currToken.image;
|
||||
if (ignoreLiterals && (currToken.kind == JavaParserConstants.STRING_LITERAL || currToken.kind == JavaParserConstants.CHARACTER_LITERAL
|
||||
|| currToken.kind == JavaParserConstants.DECIMAL_LITERAL || currToken.kind == JavaParserConstants.FLOATING_POINT_LITERAL)) {
|
||||
image = String.valueOf(currToken.kind);
|
||||
if (currentToken.kind != JavaParserConstants.SEMICOLON) {
|
||||
String image = currentToken.image;
|
||||
if (ignoreLiterals && (currentToken.kind == JavaParserConstants.STRING_LITERAL || currentToken.kind == JavaParserConstants.CHARACTER_LITERAL
|
||||
|| currentToken.kind == JavaParserConstants.DECIMAL_LITERAL || currentToken.kind == JavaParserConstants.FLOATING_POINT_LITERAL)) {
|
||||
image = String.valueOf(currentToken.kind);
|
||||
}
|
||||
if (ignoreIdentifiers && currToken.kind == JavaParserConstants.IDENTIFIER) {
|
||||
image = String.valueOf(currToken.kind);
|
||||
if (ignoreIdentifiers && currentToken.kind == JavaParserConstants.IDENTIFIER) {
|
||||
image = String.valueOf(currentToken.kind);
|
||||
}
|
||||
tokenEntries.add(new TokenEntry(image, tokens.getFileName(), currToken.beginLine));
|
||||
tokenEntries.add(new TokenEntry(image, tokens.getFileName(), currentToken.beginLine));
|
||||
}
|
||||
|
||||
currToken = tokenMgr.getNextToken();
|
||||
currentToken = tokenMgr.getNextToken();
|
||||
}
|
||||
tokenEntries.add(TokenEntry.getEOF());
|
||||
}
|
||||
|
@ -42,26 +42,36 @@ public class MatchAlgorithm {
|
||||
this.cpdListener = listener;
|
||||
}
|
||||
|
||||
public Iterator matches() {
|
||||
return matches.iterator();
|
||||
}
|
||||
|
||||
public TokenEntry tokenAt(int offset, TokenEntry m) {
|
||||
return (TokenEntry) code.get(offset + m.getIndex());
|
||||
}
|
||||
|
||||
public int getMinimumTileSize() {
|
||||
return this.min;
|
||||
}
|
||||
|
||||
public void findMatches() {
|
||||
cpdListener.phaseUpdate(CPDListener.HASH);
|
||||
Map markGroups = hash();
|
||||
|
||||
cpdListener.phaseUpdate(CPDListener.MATCH);
|
||||
MatchCollector coll = new MatchCollector(this);
|
||||
MatchCollector matchCollector = new MatchCollector(this);
|
||||
for (Iterator i = markGroups.values().iterator(); i.hasNext();) {
|
||||
Object o = i.next();
|
||||
if (o instanceof List) {
|
||||
Collections.reverse((List) o);
|
||||
coll.collect(min, (List) o);
|
||||
matchCollector.collect((List) o);
|
||||
}
|
||||
i.remove();
|
||||
}
|
||||
|
||||
cpdListener.phaseUpdate(CPDListener.GROUPING);
|
||||
matches = coll.getMatches();
|
||||
coll = null;
|
||||
|
||||
for (Iterator i = matches(); i.hasNext();) {
|
||||
matches = matchCollector.getMatches();
|
||||
matchCollector = null;
|
||||
for (Iterator i = matches.iterator(); i.hasNext();) {
|
||||
Match match = (Match) i.next();
|
||||
for (Iterator occurrences = match.iterator(); occurrences.hasNext();) {
|
||||
TokenEntry mark = (TokenEntry) occurrences.next();
|
||||
@ -86,6 +96,9 @@ public class MatchAlgorithm {
|
||||
lastHash = MOD * lastHash + token.getIdentifier() - lastMod * last;
|
||||
token.setHashCode(lastHash);
|
||||
Object o = markGroups.get(token);
|
||||
|
||||
// Note that this insertion method is worthwhile since the vast majority
|
||||
// markGroup keys will have only one value.
|
||||
if (o == null) {
|
||||
markGroups.put(token, token);
|
||||
} else if (o instanceof TokenEntry) {
|
||||
@ -110,13 +123,4 @@ public class MatchAlgorithm {
|
||||
}
|
||||
return markGroups;
|
||||
}
|
||||
|
||||
public Iterator matches() {
|
||||
return matches.iterator();
|
||||
}
|
||||
|
||||
public TokenEntry tokenAt(int offset, TokenEntry m) {
|
||||
return (TokenEntry) code.get(offset + m.getIndex());
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -22,25 +22,26 @@ public class MatchCollector {
|
||||
this.ma = ma;
|
||||
}
|
||||
|
||||
public void collect(int minimumLength, List marks) {
|
||||
public void collect(List marks) {
|
||||
//first get a pairwise collection of all maximal matches
|
||||
for (int i = 0; i < marks.size() - 1; i++) {
|
||||
TokenEntry mark1 = (TokenEntry) marks.get(i);
|
||||
for (int j = i + 1; j < marks.size(); j++) {
|
||||
TokenEntry mark2 = (TokenEntry) marks.get(j);
|
||||
int diff = mark1.getIndex() - mark2.getIndex();
|
||||
if (-diff < minimumLength) {
|
||||
if (-diff < ma.getMinimumTileSize()) {
|
||||
continue;
|
||||
}
|
||||
if (hasPreviousDupe(mark1, mark2)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// "match too small" check
|
||||
int dupes = countDuplicateTokens(mark1, mark2);
|
||||
//false positive check
|
||||
if (dupes < minimumLength) {
|
||||
if (dupes < ma.getMinimumTileSize()) {
|
||||
continue;
|
||||
}
|
||||
//is it still too close together
|
||||
// is it still too close together
|
||||
if (diff + dupes >= 1) {
|
||||
continue;
|
||||
}
|
||||
@ -50,49 +51,9 @@ public class MatchCollector {
|
||||
}
|
||||
|
||||
public List getMatches() {
|
||||
ArrayList matchList = new ArrayList(startMap.values());
|
||||
groupMatches(matchList);
|
||||
return matchList;
|
||||
}
|
||||
|
||||
/**
|
||||
* A greedy algorithm for determining non-overlapping matches
|
||||
*/
|
||||
private void determineMatch(TokenEntry mark1, TokenEntry mark2, int dupes) {
|
||||
Match match = new Match(dupes, mark1, mark2);
|
||||
String fileKey = mark1.getTokenSrcID() + mark2.getTokenSrcID();
|
||||
ArrayList pairMatches = (ArrayList) fileMap.get(fileKey);
|
||||
if (pairMatches == null) {
|
||||
pairMatches = new ArrayList();
|
||||
fileMap.put(fileKey, pairMatches);
|
||||
}
|
||||
boolean add = true;
|
||||
for (int k = 0; k < pairMatches.size(); k++) {
|
||||
Match other = (Match) pairMatches.get(k);
|
||||
if (other.getFirstMark().getIndex() + other.getTokenCount() - mark1.getIndex()
|
||||
> 0) {
|
||||
boolean ordered = other.getSecondMark().getIndex() - mark2.getIndex() < 0;
|
||||
if ((ordered && (other.getEndIndex() - mark2.getIndex() > 0))
|
||||
|| (!ordered && (match.getEndIndex() - other.getSecondMark().getIndex()) > 0)) {
|
||||
if (other.getTokenCount() >= match.getTokenCount()) {
|
||||
add = false;
|
||||
break;
|
||||
} else {
|
||||
pairMatches.remove(k);
|
||||
startMap.remove(other.getMatchCode());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if (add) {
|
||||
pairMatches.add(match);
|
||||
startMap.put(match.getMatchCode(), match);
|
||||
}
|
||||
}
|
||||
|
||||
private void groupMatches(ArrayList matchList) {
|
||||
List matchList = new ArrayList(startMap.values());
|
||||
Collections.sort(matchList);
|
||||
HashSet matchSet = new HashSet();
|
||||
Set matchSet = new HashSet();
|
||||
Match.MatchCode matchCode = new Match.MatchCode();
|
||||
for (int i = matchList.size(); i > 1; i--) {
|
||||
Match match1 = (Match) matchList.get(i - 1);
|
||||
@ -145,6 +106,42 @@ public class MatchCollector {
|
||||
}
|
||||
}
|
||||
}
|
||||
return matchList;
|
||||
}
|
||||
|
||||
/**
|
||||
* A greedy algorithm for determining non-overlapping matches
|
||||
*/
|
||||
private void determineMatch(TokenEntry mark1, TokenEntry mark2, int dupes) {
|
||||
Match match = new Match(dupes, mark1, mark2);
|
||||
String fileKey = mark1.getTokenSrcID() + mark2.getTokenSrcID();
|
||||
List pairMatches = (ArrayList) fileMap.get(fileKey);
|
||||
if (pairMatches == null) {
|
||||
pairMatches = new ArrayList();
|
||||
fileMap.put(fileKey, pairMatches);
|
||||
}
|
||||
boolean add = true;
|
||||
for (int i = 0; i < pairMatches.size(); i++) {
|
||||
Match other = (Match) pairMatches.get(i);
|
||||
if (other.getFirstMark().getIndex() + other.getTokenCount() - mark1.getIndex()
|
||||
> 0) {
|
||||
boolean ordered = other.getSecondMark().getIndex() - mark2.getIndex() < 0;
|
||||
if ((ordered && (other.getEndIndex() - mark2.getIndex() > 0))
|
||||
|| (!ordered && (match.getEndIndex() - other.getSecondMark().getIndex()) > 0)) {
|
||||
if (other.getTokenCount() >= match.getTokenCount()) {
|
||||
add = false;
|
||||
break;
|
||||
} else {
|
||||
pairMatches.remove(i);
|
||||
startMap.remove(other.getMatchCode());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if (add) {
|
||||
pairMatches.add(match);
|
||||
startMap.put(match.getMatchCode(), match);
|
||||
}
|
||||
}
|
||||
|
||||
private boolean hasPreviousDupe(TokenEntry mark1, TokenEntry mark2) {
|
||||
|
@ -12,7 +12,7 @@ import net.sourceforge.pmd.dfa.variableaccess.VariableAccessVisitor;
|
||||
/**
|
||||
* @author raik
|
||||
*
|
||||
* What about initializers? This only process methods and constructors
|
||||
* What about initializers? This only processes methods and constructors
|
||||
*/
|
||||
public class DataFlowFacade extends JavaParserVisitorAdapter {
|
||||
|
||||
|
Reference in New Issue
Block a user