Tweaking this and that as I'm working thru CPD

git-svn-id: https://pmd.svn.sourceforge.net/svnroot/pmd/trunk@3777 51baf565-9d33-0410-a72c-fc3788e3496d
This commit is contained in:
Tom Copeland
2005-08-15 14:02:16 +00:00
parent c93c4c17aa
commit 417ae695a7
4 changed files with 89 additions and 86 deletions

View File

@ -25,46 +25,48 @@ public class JavaTokenizer implements Tokenizer {
}
public void tokenize(SourceCode tokens, Tokens tokenEntries) {
StringBuffer sb = tokens.getCodeBuffer();
StringBuffer buffer = tokens.getCodeBuffer();
/*
I'm doing a sort of State pattern thing here where
this goes into "discarding" mode when it hits an import or package
keyword and goes back into "accumulate mode when it hits a semicolon.
keyword and goes back into "accumulate mode" when it hits a semicolon.
This could probably be turned into some objects.
*/
JavaParserTokenManager tokenMgr = new TargetJDK1_4().createJavaParserTokenManager(new StringReader(sb.toString()));
Token currToken = tokenMgr.getNextToken();
boolean discarding = false;
while (currToken.image.length() > 0) {
if (currToken.kind == JavaParserConstants.IMPORT || currToken.kind == JavaParserConstants.PACKAGE) {
discarding = true;
currToken = tokenMgr.getNextToken();
// TODO - allow for JDK 1.5 selection
JavaParserTokenManager tokenMgr = new TargetJDK1_4().createJavaParserTokenManager(new StringReader(buffer.toString()));
Token currentToken = tokenMgr.getNextToken();
boolean inDiscardingState = false;
while (currentToken.image.length() > 0) {
if (currentToken.kind == JavaParserConstants.IMPORT || currentToken.kind == JavaParserConstants.PACKAGE) {
inDiscardingState = true;
currentToken = tokenMgr.getNextToken();
continue;
}
if (discarding && currToken.kind == JavaParserConstants.SEMICOLON) {
discarding = false;
if (inDiscardingState && currentToken.kind == JavaParserConstants.SEMICOLON) {
inDiscardingState = false;
}
if (discarding) {
currToken = tokenMgr.getNextToken();
if (inDiscardingState) {
currentToken = tokenMgr.getNextToken();
continue;
}
if (currToken.kind != JavaParserConstants.SEMICOLON) {
String image = currToken.image;
if (ignoreLiterals && (currToken.kind == JavaParserConstants.STRING_LITERAL || currToken.kind == JavaParserConstants.CHARACTER_LITERAL
|| currToken.kind == JavaParserConstants.DECIMAL_LITERAL || currToken.kind == JavaParserConstants.FLOATING_POINT_LITERAL)) {
image = String.valueOf(currToken.kind);
if (currentToken.kind != JavaParserConstants.SEMICOLON) {
String image = currentToken.image;
if (ignoreLiterals && (currentToken.kind == JavaParserConstants.STRING_LITERAL || currentToken.kind == JavaParserConstants.CHARACTER_LITERAL
|| currentToken.kind == JavaParserConstants.DECIMAL_LITERAL || currentToken.kind == JavaParserConstants.FLOATING_POINT_LITERAL)) {
image = String.valueOf(currentToken.kind);
}
if (ignoreIdentifiers && currToken.kind == JavaParserConstants.IDENTIFIER) {
image = String.valueOf(currToken.kind);
if (ignoreIdentifiers && currentToken.kind == JavaParserConstants.IDENTIFIER) {
image = String.valueOf(currentToken.kind);
}
tokenEntries.add(new TokenEntry(image, tokens.getFileName(), currToken.beginLine));
tokenEntries.add(new TokenEntry(image, tokens.getFileName(), currentToken.beginLine));
}
currToken = tokenMgr.getNextToken();
currentToken = tokenMgr.getNextToken();
}
tokenEntries.add(TokenEntry.getEOF());
}

View File

@ -42,26 +42,36 @@ public class MatchAlgorithm {
this.cpdListener = listener;
}
public Iterator matches() {
return matches.iterator();
}
public TokenEntry tokenAt(int offset, TokenEntry m) {
return (TokenEntry) code.get(offset + m.getIndex());
}
public int getMinimumTileSize() {
return this.min;
}
public void findMatches() {
cpdListener.phaseUpdate(CPDListener.HASH);
Map markGroups = hash();
cpdListener.phaseUpdate(CPDListener.MATCH);
MatchCollector coll = new MatchCollector(this);
MatchCollector matchCollector = new MatchCollector(this);
for (Iterator i = markGroups.values().iterator(); i.hasNext();) {
Object o = i.next();
if (o instanceof List) {
Collections.reverse((List) o);
coll.collect(min, (List) o);
matchCollector.collect((List) o);
}
i.remove();
}
cpdListener.phaseUpdate(CPDListener.GROUPING);
matches = coll.getMatches();
coll = null;
for (Iterator i = matches(); i.hasNext();) {
matches = matchCollector.getMatches();
matchCollector = null;
for (Iterator i = matches.iterator(); i.hasNext();) {
Match match = (Match) i.next();
for (Iterator occurrences = match.iterator(); occurrences.hasNext();) {
TokenEntry mark = (TokenEntry) occurrences.next();
@ -86,6 +96,9 @@ public class MatchAlgorithm {
lastHash = MOD * lastHash + token.getIdentifier() - lastMod * last;
token.setHashCode(lastHash);
Object o = markGroups.get(token);
// Note that this insertion method is worthwhile since the vast majority
// markGroup keys will have only one value.
if (o == null) {
markGroups.put(token, token);
} else if (o instanceof TokenEntry) {
@ -110,13 +123,4 @@ public class MatchAlgorithm {
}
return markGroups;
}
public Iterator matches() {
return matches.iterator();
}
public TokenEntry tokenAt(int offset, TokenEntry m) {
return (TokenEntry) code.get(offset + m.getIndex());
}
}

View File

@ -22,25 +22,26 @@ public class MatchCollector {
this.ma = ma;
}
public void collect(int minimumLength, List marks) {
public void collect(List marks) {
//first get a pairwise collection of all maximal matches
for (int i = 0; i < marks.size() - 1; i++) {
TokenEntry mark1 = (TokenEntry) marks.get(i);
for (int j = i + 1; j < marks.size(); j++) {
TokenEntry mark2 = (TokenEntry) marks.get(j);
int diff = mark1.getIndex() - mark2.getIndex();
if (-diff < minimumLength) {
if (-diff < ma.getMinimumTileSize()) {
continue;
}
if (hasPreviousDupe(mark1, mark2)) {
continue;
}
// "match too small" check
int dupes = countDuplicateTokens(mark1, mark2);
//false positive check
if (dupes < minimumLength) {
if (dupes < ma.getMinimumTileSize()) {
continue;
}
//is it still too close together
// is it still too close together
if (diff + dupes >= 1) {
continue;
}
@ -50,49 +51,9 @@ public class MatchCollector {
}
public List getMatches() {
ArrayList matchList = new ArrayList(startMap.values());
groupMatches(matchList);
return matchList;
}
/**
* A greedy algorithm for determining non-overlapping matches
*/
private void determineMatch(TokenEntry mark1, TokenEntry mark2, int dupes) {
Match match = new Match(dupes, mark1, mark2);
String fileKey = mark1.getTokenSrcID() + mark2.getTokenSrcID();
ArrayList pairMatches = (ArrayList) fileMap.get(fileKey);
if (pairMatches == null) {
pairMatches = new ArrayList();
fileMap.put(fileKey, pairMatches);
}
boolean add = true;
for (int k = 0; k < pairMatches.size(); k++) {
Match other = (Match) pairMatches.get(k);
if (other.getFirstMark().getIndex() + other.getTokenCount() - mark1.getIndex()
> 0) {
boolean ordered = other.getSecondMark().getIndex() - mark2.getIndex() < 0;
if ((ordered && (other.getEndIndex() - mark2.getIndex() > 0))
|| (!ordered && (match.getEndIndex() - other.getSecondMark().getIndex()) > 0)) {
if (other.getTokenCount() >= match.getTokenCount()) {
add = false;
break;
} else {
pairMatches.remove(k);
startMap.remove(other.getMatchCode());
}
}
}
}
if (add) {
pairMatches.add(match);
startMap.put(match.getMatchCode(), match);
}
}
private void groupMatches(ArrayList matchList) {
List matchList = new ArrayList(startMap.values());
Collections.sort(matchList);
HashSet matchSet = new HashSet();
Set matchSet = new HashSet();
Match.MatchCode matchCode = new Match.MatchCode();
for (int i = matchList.size(); i > 1; i--) {
Match match1 = (Match) matchList.get(i - 1);
@ -145,6 +106,42 @@ public class MatchCollector {
}
}
}
return matchList;
}
/**
* A greedy algorithm for determining non-overlapping matches
*/
private void determineMatch(TokenEntry mark1, TokenEntry mark2, int dupes) {
Match match = new Match(dupes, mark1, mark2);
String fileKey = mark1.getTokenSrcID() + mark2.getTokenSrcID();
List pairMatches = (ArrayList) fileMap.get(fileKey);
if (pairMatches == null) {
pairMatches = new ArrayList();
fileMap.put(fileKey, pairMatches);
}
boolean add = true;
for (int i = 0; i < pairMatches.size(); i++) {
Match other = (Match) pairMatches.get(i);
if (other.getFirstMark().getIndex() + other.getTokenCount() - mark1.getIndex()
> 0) {
boolean ordered = other.getSecondMark().getIndex() - mark2.getIndex() < 0;
if ((ordered && (other.getEndIndex() - mark2.getIndex() > 0))
|| (!ordered && (match.getEndIndex() - other.getSecondMark().getIndex()) > 0)) {
if (other.getTokenCount() >= match.getTokenCount()) {
add = false;
break;
} else {
pairMatches.remove(i);
startMap.remove(other.getMatchCode());
}
}
}
}
if (add) {
pairMatches.add(match);
startMap.put(match.getMatchCode(), match);
}
}
private boolean hasPreviousDupe(TokenEntry mark1, TokenEntry mark2) {

View File

@ -12,7 +12,7 @@ import net.sourceforge.pmd.dfa.variableaccess.VariableAccessVisitor;
/**
* @author raik
*
* What about initializers? This only process methods and constructors
* What about initializers? This only processes methods and constructors
*/
public class DataFlowFacade extends JavaParserVisitorAdapter {