[scala] adding support for CPD-ON and CPD-OFF special comments

This commit is contained in:
Andy Robinson 2020-11-16 12:08:47 +00:00
parent bc4938087a
commit cb5ce5d9d1
5 changed files with 194 additions and 14 deletions

View File

@ -0,0 +1,59 @@
/**
* BSD-style license; for more info see http://pmd.sourceforge.net/license.html
*/
package net.sourceforge.pmd.cpd;
import net.sourceforge.pmd.lang.ast.GenericToken;
import scala.meta.tokens.Token;
/**
* Adapts the scala.meta.tokens.Token so that it can be used with the generic BaseTokenFilter
*/
public class ScalaTokenAdapter implements GenericToken {
private Token token;
private GenericToken previousComment;
ScalaTokenAdapter(Token token, GenericToken comment) {
this.token = token;
this.previousComment = comment;
}
@Override
public GenericToken getNext() {
// not required
return null;
}
@Override
public GenericToken getPreviousComment() {
return previousComment;
}
@Override
public String getImage() {
return token.text();
}
@Override
public int getBeginLine() {
return token.pos().startLine();
}
@Override
public int getEndLine() {
return token.pos().endLine();
}
@Override
public int getBeginColumn() {
return token.pos().startColumn();
}
@Override
public int getEndColumn() {
return token.pos().endColumn();
}
}

View File

@ -9,8 +9,11 @@ import java.util.Properties;
import org.apache.commons.lang3.StringUtils;
import net.sourceforge.pmd.cpd.token.internal.BaseTokenFilter;
import net.sourceforge.pmd.lang.LanguageRegistry;
import net.sourceforge.pmd.lang.LanguageVersion;
import net.sourceforge.pmd.lang.TokenManager;
import net.sourceforge.pmd.lang.ast.GenericToken;
import net.sourceforge.pmd.lang.ast.TokenMgrError;
import net.sourceforge.pmd.lang.scala.ScalaLanguageHandler;
import net.sourceforge.pmd.lang.scala.ScalaLanguageModule;
@ -24,7 +27,7 @@ import scala.meta.tokenizers.TokenizeException;
import scala.meta.tokens.Token;
/**
* Scala Tokenizer class. Uses the Scala Meta Tokenizer.
* Scala Tokenizer class. Uses the Scala Meta Tokenizer, but adapts it for use with generic filtering
*/
public class ScalaTokenizer implements Tokenizer {
@ -72,19 +75,21 @@ public class ScalaTokenizer implements Tokenizer {
// tokenize with a filter
try {
scala.meta.tokens.Tokens tokens = tokenizer.tokenize();
ScalaTokenFilter filter = new ScalaTokenFilter(tokens.iterator());
Token token;
// use extensions to the standard PMD TokenManager and Filter
ScalaTokenManager scalaTokenManager = new ScalaTokenManager(tokens.iterator());
ScalaTokenFilter filter = new ScalaTokenFilter(scalaTokenManager);
GenericToken token;
while ((token = filter.getNextToken()) != null) {
if (StringUtils.isEmpty(token.text())) {
if (StringUtils.isEmpty(token.getImage())) {
continue;
}
Position pos = token.pos();
TokenEntry cpdToken = new TokenEntry(token.text(),
TokenEntry cpdToken = new TokenEntry(token.getImage(),
filename,
pos.startLine() + 1,
pos.startColumn() + 1,
pos.endColumn() + 1);
token.getBeginLine() + 1,
token.getBeginColumn() + 1,
token.getEndColumn() + 1);
tokenEntries.add(cpdToken);
}
} catch (Exception e) {
@ -103,19 +108,26 @@ public class ScalaTokenizer implements Tokenizer {
}
/**
* Token Filter skips un-helpful tokens to only register important tokens
* Implementation of the generic Token Manager, also skips un-helpful tokens to only register important tokens
* and patterns.
*
* Keeps track of comments, for special comment processing
*/
private static class ScalaTokenFilter {
private class ScalaTokenManager implements TokenManager {
Iterator<Token> tokenIter;
Class<?>[] skippableTokens = new Class<?>[] { Token.Space.class, Token.Tab.class, Token.CR.class,
Token.LF.class, Token.FF.class, Token.LFLF.class, Token.EOF.class };
ScalaTokenFilter(Iterator<Token> iterator) {
GenericToken previousComment = null;
GenericToken result = null;
ScalaTokenManager(Iterator<Token> iterator) {
this.tokenIter = iterator;
}
Token getNextToken() {
@Override
public GenericToken getNextToken() {
if (!tokenIter.hasNext()) {
return null;
}
@ -125,7 +137,13 @@ public class ScalaTokenizer implements Tokenizer {
token = tokenIter.next();
} while (token != null && skipToken(token) && tokenIter.hasNext());
return token;
result = new ScalaTokenAdapter(token, previousComment);
if (isComment(token)) {
previousComment = result;
}
return result;
}
private boolean skipToken(Token token) {
@ -137,5 +155,27 @@ public class ScalaTokenizer implements Tokenizer {
}
return skip;
}
private boolean isComment(Token token) {
return token != null && (token.text().startsWith("//") || token.text().startsWith("/*"));
}
@Override
public void setFileName(String fileName) {
throw new UnsupportedOperationException("setFileName deprecated");
}
}
private class ScalaTokenFilter extends BaseTokenFilter<ScalaTokenAdapter> {
ScalaTokenFilter(TokenManager tokenManager) {
super(tokenManager);
}
@Override
protected boolean shouldStopProcessing(ScalaTokenAdapter currentToken) {
return currentToken == null;
}
}
}

View File

@ -36,6 +36,11 @@ public class ScalaTokenizerTest extends CpdTextComparisonTest {
doTest("sample-LiftActor");
}
@Test
public void testSuppressionComments() {
doTest("special_comments");
}
@Test
public void tokenizeFailTest() {
ex.expect(TokenMgrError.class);

View File

@ -0,0 +1,24 @@
// Testing CPD suppression
// Irrelevant comment
// CPD-OFF
// CPD-ON
case class Foo() { // special multiline comments
/* CPD-OFF
*
*
* */ val hi = "Hello" /* This is a comment ending in CPD-ON */
private def bar(i: Int) : Int = {
val CPD = 40
val OFF = 60
CPD-OFF // This should tokenize
}
/* CPD-OFF */
def bar2(s: String): String = "bar2"
/* CPD-ON */
}

View File

@ -0,0 +1,52 @@
[Image] or [Truncated image[ Bcol Ecol
L1
[// Testing CPD suppression] 1 27
L3
[// Irrelevant comment] 1 22
L4
[// CPD-OFF] 1 11
L7
[case] 1 5
[class] 6 11
[Foo] 12 15
[(] 15 16
[)] 16 17
[{] 18 19
[// special multiline comments] 20 49
L9
[/* CPD-OFF\n *\n *\n * */] 3 7
L14
[private] 3 10
[def] 11 14
[bar] 15 18
[(] 18 19
[i] 19 20
[:] 20 21
[Int] 22 25
[)] 25 26
[:] 27 28
[Int] 29 32
[=] 33 34
[{] 35 36
L15
[val] 5 8
[CPD] 9 12
[=] 13 14
[40] 15 17
L16
[val] 5 8
[OFF] 9 12
[=] 13 14
[60] 15 17
L17
[CPD] 5 8
[-] 8 9
[OFF] 9 12
[// This should tokenize] 15 38
L18
[}] 3 4
L20
[/* CPD-OFF */] 3 16
L24
[}] 1 2
EOF