diff --git a/pmd-python/etc/grammar/python.jj b/pmd-python/etc/grammar/python.jj index e69d5886bd..60f723870c 100644 --- a/pmd-python/etc/grammar/python.jj +++ b/pmd-python/etc/grammar/python.jj @@ -33,7 +33,11 @@ SKIP : | "\014" | | -| +} + +SPECIAL_TOKEN : +{ + } TOKEN : /* SEPARATORS */ diff --git a/pmd-python/src/main/java/net/sourceforge/pmd/cpd/PythonTokenizer.java b/pmd-python/src/main/java/net/sourceforge/pmd/cpd/PythonTokenizer.java index 9ab7965b0b..1783278024 100644 --- a/pmd-python/src/main/java/net/sourceforge/pmd/cpd/PythonTokenizer.java +++ b/pmd-python/src/main/java/net/sourceforge/pmd/cpd/PythonTokenizer.java @@ -9,9 +9,10 @@ import java.io.StringReader; import org.apache.commons.io.IOUtils; +import net.sourceforge.pmd.cpd.token.JavaCCTokenFilter; +import net.sourceforge.pmd.cpd.token.TokenFilter; import net.sourceforge.pmd.lang.LanguageRegistry; import net.sourceforge.pmd.lang.LanguageVersionHandler; -import net.sourceforge.pmd.lang.TokenManager; import net.sourceforge.pmd.lang.ast.TokenMgrError; import net.sourceforge.pmd.lang.python.PythonLanguageModule; import net.sourceforge.pmd.lang.python.ast.Token; @@ -31,13 +32,13 @@ public class PythonTokenizer implements Tokenizer { .getDefaultVersion().getLanguageVersionHandler(); reader = new StringReader(buffer.toString()); reader = IOUtil.skipBOM(reader); - TokenManager tokenManager = languageVersionHandler + TokenFilter tokenFilter = new JavaCCTokenFilter(languageVersionHandler .getParser(languageVersionHandler.getDefaultParserOptions()) - .getTokenManager(sourceCode.getFileName(), reader); - Token currentToken = (Token) tokenManager.getNextToken(); - while (currentToken.image.length() > 0) { + .getTokenManager(sourceCode.getFileName(), reader)); + Token currentToken = (Token) tokenFilter.getNextToken(); + while (currentToken != null) { tokenEntries.add(new TokenEntry(currentToken.image, sourceCode.getFileName(), currentToken.beginLine)); - currentToken = (Token) tokenManager.getNextToken(); + currentToken = (Token) tokenFilter.getNextToken(); } tokenEntries.add(TokenEntry.getEOF()); System.err.println("Added " + sourceCode); diff --git a/pmd-python/src/test/java/net/sourceforge/pmd/cpd/PythonTokenizerTest.java b/pmd-python/src/test/java/net/sourceforge/pmd/cpd/PythonTokenizerTest.java index 16798c6e64..eaa8cc1154 100644 --- a/pmd-python/src/test/java/net/sourceforge/pmd/cpd/PythonTokenizerTest.java +++ b/pmd-python/src/test/java/net/sourceforge/pmd/cpd/PythonTokenizerTest.java @@ -4,12 +4,15 @@ package net.sourceforge.pmd.cpd; +import static org.junit.Assert.assertEquals; + import java.io.IOException; import org.apache.commons.io.IOUtils; import org.junit.Before; import org.junit.Test; +import net.sourceforge.pmd.PMD; import net.sourceforge.pmd.testframework.AbstractTokenizerTest; public class PythonTokenizerTest extends AbstractTokenizerTest { @@ -33,4 +36,20 @@ public class PythonTokenizerTest extends AbstractTokenizerTest { this.expectedTokenCount = 1218; super.tokenizeTest(); } + + @Test + public void testIgnoreBetweenSpecialComments() throws IOException { + SourceCode sourceCode = new SourceCode(new SourceCode.StringCodeLoader("import logging" + PMD.EOL + + "# CPD-OFF" + PMD.EOL + + "logger = logging.getLogger('django.request')" + PMD.EOL + + "class BaseHandler(object):" + PMD.EOL + + " def __init__(self):" + PMD.EOL + + " self._request_middleware = None" + PMD.EOL + + " # CPD-ON" + PMD.EOL + )); + Tokens tokens = new Tokens(); + tokenizer.tokenize(sourceCode, tokens); + TokenEntry.getEOF(); + assertEquals(3, tokens.size()); // 3 tokens: "import" + "logging" + EOF + } }