From 43750e37999113643e3e08cbb8fd6f855d220ddf Mon Sep 17 00:00:00 2001 From: Jan van Nunen Date: Tue, 6 Jan 2015 17:02:02 +0100 Subject: [PATCH] Added support for Python to CPD. --- pmd-python/etc/grammar/python.jj | 279 ++++++++++++++++++ pmd-python/pom.xml | 96 ++++++ pmd-python/src/main/ant/alljavacc.xml | 45 +++ .../sourceforge/pmd/cpd/PythonLanguage.java | 18 ++ .../sourceforge/pmd/cpd/PythonTokenizer.java | 46 +++ .../pmd/lang/python/PythonHandler.java | 25 ++ .../pmd/lang/python/PythonLanguageModule.java | 25 ++ .../pmd/lang/python/PythonParser.java | 49 +++ .../pmd/lang/python/PythonTokenManager.java | 34 +++ .../services/net.sourceforge.pmd.cpd.Language | 1 + .../net.sourceforge.pmd.lang.Language | 1 + pmd-python/src/site/markdown/index.md | 3 + pmd-python/src/site/site.xml | 12 + .../pmd/LanguageVersionDiscovererTest.java | 32 ++ .../sourceforge/pmd/LanguageVersionTest.java | 27 ++ .../pmd/cpd/PythonTokenizerTest.java | 40 +++ .../net/sourceforge/pmd/cpd/sample-python.py | 279 ++++++++++++++++++ pom.xml | 1 + 18 files changed, 1013 insertions(+) create mode 100644 pmd-python/etc/grammar/python.jj create mode 100644 pmd-python/pom.xml create mode 100644 pmd-python/src/main/ant/alljavacc.xml create mode 100644 pmd-python/src/main/java/net/sourceforge/pmd/cpd/PythonLanguage.java create mode 100644 pmd-python/src/main/java/net/sourceforge/pmd/cpd/PythonTokenizer.java create mode 100644 pmd-python/src/main/java/net/sourceforge/pmd/lang/python/PythonHandler.java create mode 100644 pmd-python/src/main/java/net/sourceforge/pmd/lang/python/PythonLanguageModule.java create mode 100644 pmd-python/src/main/java/net/sourceforge/pmd/lang/python/PythonParser.java create mode 100644 pmd-python/src/main/java/net/sourceforge/pmd/lang/python/PythonTokenManager.java create mode 100644 pmd-python/src/main/resources/META-INF/services/net.sourceforge.pmd.cpd.Language create mode 100644 pmd-python/src/main/resources/META-INF/services/net.sourceforge.pmd.lang.Language create mode 100644 pmd-python/src/site/markdown/index.md create mode 100644 pmd-python/src/site/site.xml create mode 100644 pmd-python/src/test/java/net/sourceforge/pmd/LanguageVersionDiscovererTest.java create mode 100644 pmd-python/src/test/java/net/sourceforge/pmd/LanguageVersionTest.java create mode 100644 pmd-python/src/test/java/net/sourceforge/pmd/cpd/PythonTokenizerTest.java create mode 100644 pmd-python/src/test/resources/net/sourceforge/pmd/cpd/sample-python.py diff --git a/pmd-python/etc/grammar/python.jj b/pmd-python/etc/grammar/python.jj new file mode 100644 index 0000000000..e69d5886bd --- /dev/null +++ b/pmd-python/etc/grammar/python.jj @@ -0,0 +1,279 @@ +/** + * This Python 2.7 grammar was copied from the PyDev Project. (http://www.pydev.org/) + * + * Original source file: + * https://github.com/aptana/Pydev/blob/development/plugins/org.python.pydev.parser/src/org/python/pydev/parser/grammar27/python.jjt (commit 32950d534139f286e03d34795aec99edab09c04c) + */ + +options { + BUILD_PARSER=false; + CACHE_TOKENS=true; + STATIC=false; + UNICODE_INPUT = true; + USER_CHAR_STREAM=true; +} + +PARSER_BEGIN(PythonParser) + +package net.sourceforge.pmd.lang.python.ast; + +import net.sourceforge.pmd.lang.ast.CharStream; +import net.sourceforge.pmd.lang.ast.TokenMgrError; + +public class PythonParser { + +} + +PARSER_END(PythonParser) + +SKIP : +{ + +| "\t" +| "\014" +| +| +| +} + +TOKEN : /* SEPARATORS */ +{ + < LPAREN: "(" > +| < RPAREN: ")" > +| < LBRACE: "{" > +| < RBRACE: "}" > +| < LBRACKET: "[" > +| < RBRACKET: "]" > +| < SEMICOLON: ";" > +| < COMMA: "," > +| < DOT: "." > +| < COLON: ":" > +} + + +TOKEN : /* OPERATORS */ +{ + < PLUS: "+" > +| < MINUS: "-" > +| < MULTIPLY: "*" > +| < DIVIDE: "/" > +| < FLOORDIVIDE: "//" > +| < POWER: "**" > +| < LSHIFT: "<<" > +| < RSHIFT: ">>" > +| < MODULO: "%" > +| < NOT: "~" > +| < XOR: "^" > +| < OR: "|" > +| < AND: "&" > +| < EQUAL: "=" > +| < GREATER: ">" > +| < LESS: "<" > +| < EQEQUAL: "==" > +| < EQLESS: "<=" > +| < EQGREATER: ">=" > +| < LESSGREATER: "<>" > +| < NOTEQUAL: "!=" > +| < PLUSEQ: "+=" > +| < MINUSEQ: "-=" > +| < MULTIPLYEQ: "*=" > +| < DIVIDEEQ: "/=" > +| < FLOORDIVIDEEQ: "//=" > +| < MODULOEQ: "%=" > +| < ANDEQ: "&=" > +| < OREQ: "|=" > +| < XOREQ: "^=" > +| < LSHIFTEQ: "<<=" > +| < RSHIFTEQ: ">>=" > +| < POWEREQ: "**=" > +} + +TOKEN : /* KEYWORDS */ +{ + < OR_BOOL: "or" > +| < AND_BOOL: "and" > +| < NOT_BOOL: "not" > +| < IS: "is" > +| < IN: "in" > +| < LAMBDA: "lambda" > +| < IF: "if" > +| < ELSE: "else" > +| < ELIF: "elif" > +| < WHILE: "while" > +| < FOR: "for" > +| < TRY: "try" > +| < EXCEPT: "except" > +| < DEF: "def" > +| < CLASS: "class" > +| < FINALLY: "finally" > +| < PRINT: "print" > +| < PASS: "pass" > +| < BREAK: "break" > +| < CONTINUE: "continue" > +| < RETURN: "return" > +| < YIELD: "yield" > +| < IMPORT: "import" > +| < FROM: "from" > +| < DEL: "del" > +| < RAISE: "raise" > +| < GLOBAL: "global" > +| < EXEC: "exec" > +| < ASSERT: "assert" > +| < AS: "as" > +| +| < AT: "@" > +} + + +TOKEN : /* Python identifiers */ +{ + < NAME: ( | )* > +| < #LETTER: ["_","a"-"z","A"-"Z"] > +} + + +TOKEN : /* Numeric literals */ +{ + < DECNUMBER: + ["1"-"9"] (["0"-"9"])* (["l", "L"])? + | "0" + > +| < HEXNUMBER: "0" ["x","X"] (["0"-"9","a"-"f","A"-"F"])+ (["l","L"])? > +| < OCTNUMBER: "0" (["o","O"])? (["0"-"7"])* (["l","L"])? > +| < BINNUMBER: "0" (["b","B"])? (["0"-"1"])* (["l","L"])? > +| + < FLOAT: + (["0"-"9"])+ "." (["0"-"9"])* ()? + | "." (["0"-"9"])+ ()? + | (["0"-"9"])+ + > +| < COMPLEX: ( | | "0" ) ["j", "J"]> +| < #EXPONENT: ["e","E"] (["+","-"])? (["0"-"9"])+ > +| < #DIGIT: ["0" - "9"] > +} + +MORE : /* Strings */ +{ + < (["u", "U"]) (["r", "R"])? "'" > : IN_USTRING11 +| < (["u", "U"]) (["r", "R"])? "\"" > : IN_USTRING21 +| < (["u", "U"]) (["r", "R"])? "'''" > : IN_USTRING13 +| < (["u", "U"]) (["r", "R"])? "\"\"\"" > : IN_USTRING23 + +| < (["b", "B"]) (["r", "R"])? "'" > : IN_BSTRING11 +| < (["b", "B"]) (["r", "R"])? "\"" > : IN_BSTRING21 +| < (["b", "B"]) (["r", "R"])? "'''" > : IN_BSTRING13 +| < (["b", "B"]) (["r", "R"])? "\"\"\"" > : IN_BSTRING23 + +| < (["r", "R"])? "'" > : IN_STRING11 +| < (["r", "R"])? "\"" > : IN_STRING21 +| < (["r", "R"])? "'''" > : IN_STRING13 +| < (["r", "R"])? "\"\"\"" > : IN_STRING23 +} + + TOKEN : { { + matchedToken.image = image.toString(); } : DEFAULT} + TOKEN : { { + matchedToken.image = image.toString(); } : DEFAULT} + TOKEN : { { + matchedToken.image = image.toString(); } : DEFAULT} + TOKEN : { { + matchedToken.image = image.toString(); } : DEFAULT} + + TOKEN : { { + matchedToken.image = image.toString(); } : DEFAULT} + TOKEN : { { + matchedToken.image = image.toString(); } : DEFAULT} + TOKEN : { { + matchedToken.image = image.toString(); } : DEFAULT} + TOKEN : { { + matchedToken.image = image.toString(); } : DEFAULT} + + TOKEN : { { + matchedToken.image = image.toString(); } : DEFAULT} + TOKEN : { { + matchedToken.image = image.toString(); } : DEFAULT} + TOKEN : { { + matchedToken.image = image.toString(); } : DEFAULT} + TOKEN : { { + matchedToken.image = image.toString(); } : DEFAULT} + + MORE: +{ + <"\\\r\n"> { image.setLength(image.length()-3); } : IN_STRING1NLC +| <("\\" ("\n"|"\r"))> { image.setLength(image.length()-2); } : IN_STRING1NLC +} + + MORE: +{ + <"\\\r\n"> { image.setLength(image.length()-3); } : IN_STRING2NLC +| <("\\" ("\n"|"\r"))> { image.setLength(image.length()-2); } : IN_STRING2NLC +} + + MORE: +{ + <"\\\r\n"> { image.setLength(image.length()-3); } : IN_USTRING1NLC +| <("\\" ("\n"|"\r"))> { image.setLength(image.length()-2); } : IN_USTRING1NLC +} + + MORE: +{ + <"\\\r\n"> { image.setLength(image.length()-3); } : IN_USTRING2NLC +| <("\\" ("\n"|"\r"))> { image.setLength(image.length()-2); } : IN_USTRING2NLC +} + + MORE: +{ + <"\\\r\n"> { image.setLength(image.length()-3); } : IN_BSTRING1NLC +| <("\\" ("\n"|"\r"))> { image.setLength(image.length()-2); } : IN_BSTRING1NLC +} + + MORE: +{ + <"\\\r\n"> { image.setLength(image.length()-3); } : IN_BSTRING2NLC +| <("\\" ("\n"|"\r"))> { image.setLength(image.length()-2); } : IN_BSTRING2NLC +} + + MORE: +{ + <""> : IN_STRING11 +} + + MORE: +{ + <""> : IN_STRING21 +} + + MORE: +{ + <""> : IN_USTRING11 +} + + MORE: +{ + <""> : IN_USTRING21 +} + + MORE: +{ + <""> : IN_BSTRING11 +} + + MORE: +{ + <""> : IN_BSTRING21 +} + + MORE: { <("\\" ("\\"|"'")) | ~["\n","\r"]> } + MORE: { <("\\" ("\\"|"\"")) | ~["\n","\r"]> } + MORE: +{ + <"\r\n"> { + int l = image.length(); + image.setLength(l-1); + image.setCharAt(l-2, '\n'); + } +| <"\n"> +| <"\r"> { image.setCharAt(image.length()-1, '\n'); } +| <~["\n","\r"]> +| <"\\" ~["\n","\r"]> +} \ No newline at end of file diff --git a/pmd-python/pom.xml b/pmd-python/pom.xml new file mode 100644 index 0000000000..694fe68373 --- /dev/null +++ b/pmd-python/pom.xml @@ -0,0 +1,96 @@ + + + 4.0.0 + pmd-python + PMD Python + + + net.sourceforge.pmd + pmd + 5.2.4-SNAPSHOT + + + + ${basedir}/../pmd-core + + + + + + maven-resources-plugin + + false + + ${*} + + + + + + org.apache.maven.plugins + maven-antrun-plugin + true + + + generate-sources + generate-sources + + + + + + + + + + run + + + + + + + org.codehaus.mojo + build-helper-maven-plugin + + + add-javacc-generated-sources + + add-source + + + + ${project.build.directory}/generated-sources/javacc + + + + + + + + org.apache.maven.plugins + maven-site-plugin + + ${project.build.directory}/generated-xdocs + + + + + + + net.sourceforge.pmd + pmd-core + + + + junit + junit + test + + + net.sourceforge.pmd + pmd-test + test + + + diff --git a/pmd-python/src/main/ant/alljavacc.xml b/pmd-python/src/main/ant/alljavacc.xml new file mode 100644 index 0000000000..06b4bfddcc --- /dev/null +++ b/pmd-python/src/main/ant/alljavacc.xml @@ -0,0 +1,45 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/pmd-python/src/main/java/net/sourceforge/pmd/cpd/PythonLanguage.java b/pmd-python/src/main/java/net/sourceforge/pmd/cpd/PythonLanguage.java new file mode 100644 index 0000000000..ec54cc1dee --- /dev/null +++ b/pmd-python/src/main/java/net/sourceforge/pmd/cpd/PythonLanguage.java @@ -0,0 +1,18 @@ +/** + * BSD-style license; for more info see http://pmd.sourceforge.net/license.html + */ +package net.sourceforge.pmd.cpd; + + +/** + * Defines the Language module for Python + */ +public class PythonLanguage extends AbstractLanguage { + + /** + * Creates a new instance of {@link PythonLanguage} with the default extensions for python files. + */ + public PythonLanguage() { + super("Python", "python", new PythonTokenizer(), ".py"); + } +} diff --git a/pmd-python/src/main/java/net/sourceforge/pmd/cpd/PythonTokenizer.java b/pmd-python/src/main/java/net/sourceforge/pmd/cpd/PythonTokenizer.java new file mode 100644 index 0000000000..e940ca7d23 --- /dev/null +++ b/pmd-python/src/main/java/net/sourceforge/pmd/cpd/PythonTokenizer.java @@ -0,0 +1,46 @@ +/** + * BSD-style license; for more info see http://pmd.sourceforge.net/license.html + */ +package net.sourceforge.pmd.cpd; + +import java.io.Reader; +import java.io.StringReader; + +import net.sourceforge.pmd.lang.LanguageRegistry; +import net.sourceforge.pmd.lang.LanguageVersionHandler; +import net.sourceforge.pmd.lang.TokenManager; +import net.sourceforge.pmd.lang.ast.TokenMgrError; +import net.sourceforge.pmd.lang.python.PythonLanguageModule; +import net.sourceforge.pmd.lang.python.ast.Token; +import net.sourceforge.pmd.util.IOUtil; + +import org.apache.commons.io.IOUtils; + +public class PythonTokenizer implements Tokenizer { + + public void tokenize(SourceCode sourceCode, Tokens tokenEntries) { + StringBuilder buffer = sourceCode.getCodeBuffer(); + Reader reader = null; + try { + LanguageVersionHandler languageVersionHandler = LanguageRegistry.getLanguage(PythonLanguageModule.NAME) + .getDefaultVersion().getLanguageVersionHandler(); + reader = new StringReader(buffer.toString()); + reader = IOUtil.skipBOM(reader); + TokenManager tokenManager = languageVersionHandler.getParser( + languageVersionHandler.getDefaultParserOptions()).getTokenManager(sourceCode.getFileName(), reader); + Token currentToken = (Token) tokenManager.getNextToken(); + while (currentToken.image.length() > 0) { + tokenEntries.add(new TokenEntry(currentToken.image, sourceCode.getFileName(), currentToken.beginLine)); + currentToken = (Token) tokenManager.getNextToken(); + } + tokenEntries.add(TokenEntry.getEOF()); + System.err.println("Added " + sourceCode); + } catch (TokenMgrError err) { + err.printStackTrace(); + System.err.println("Skipping " + sourceCode + " due to parse error"); + tokenEntries.add(TokenEntry.getEOF()); + } finally { + IOUtils.closeQuietly(reader); + } + } +} diff --git a/pmd-python/src/main/java/net/sourceforge/pmd/lang/python/PythonHandler.java b/pmd-python/src/main/java/net/sourceforge/pmd/lang/python/PythonHandler.java new file mode 100644 index 0000000000..f7e586d1ee --- /dev/null +++ b/pmd-python/src/main/java/net/sourceforge/pmd/lang/python/PythonHandler.java @@ -0,0 +1,25 @@ +/** + * BSD-style license; for more info see http://pmd.sourceforge.net/license.html + */ +package net.sourceforge.pmd.lang.python; + +import net.sourceforge.pmd.lang.AbstractLanguageVersionHandler; +import net.sourceforge.pmd.lang.Parser; +import net.sourceforge.pmd.lang.ParserOptions; +import net.sourceforge.pmd.lang.rule.RuleViolationFactory; + +/** + * Implementation of LanguageVersionHandler for the Python Language. + */ +public class PythonHandler extends AbstractLanguageVersionHandler { + + @Override + public RuleViolationFactory getRuleViolationFactory() { + throw new UnsupportedOperationException("getRuleViolationFactory() is not supported for Python"); + } + + @Override + public Parser getParser(ParserOptions parserOptions) { + return new PythonParser(parserOptions); + } +} diff --git a/pmd-python/src/main/java/net/sourceforge/pmd/lang/python/PythonLanguageModule.java b/pmd-python/src/main/java/net/sourceforge/pmd/lang/python/PythonLanguageModule.java new file mode 100644 index 0000000000..40a3506161 --- /dev/null +++ b/pmd-python/src/main/java/net/sourceforge/pmd/lang/python/PythonLanguageModule.java @@ -0,0 +1,25 @@ +/** + * BSD-style license; for more info see http://pmd.sourceforge.net/license.html + */ +package net.sourceforge.pmd.lang.python; + +import net.sourceforge.pmd.lang.BaseLanguageModule; + +/** + * Implementation of the Python Language Module. + */ +public class PythonLanguageModule extends BaseLanguageModule { + + /** The name, that can be used to display the language in UI. */ + public static final String NAME = "Python"; + /** The internal name. */ + public static final String TERSE_NAME = "python"; + + /** + * Creates a new instance of {@link PythonLanguageModule} with the default file extensions for Python. + */ + public PythonLanguageModule() { + super(NAME, null, TERSE_NAME, null, "py"); + addVersion("", new PythonHandler(), true); + } +} diff --git a/pmd-python/src/main/java/net/sourceforge/pmd/lang/python/PythonParser.java b/pmd-python/src/main/java/net/sourceforge/pmd/lang/python/PythonParser.java new file mode 100644 index 0000000000..3c68d3957e --- /dev/null +++ b/pmd-python/src/main/java/net/sourceforge/pmd/lang/python/PythonParser.java @@ -0,0 +1,49 @@ +/** + * BSD-style license; for more info see http://pmd.sourceforge.net/license.html + */ +package net.sourceforge.pmd.lang.python; + +import java.io.Reader; +import java.util.Map; + +import net.sourceforge.pmd.lang.AbstractParser; +import net.sourceforge.pmd.lang.ParserOptions; +import net.sourceforge.pmd.lang.TokenManager; +import net.sourceforge.pmd.lang.ast.AbstractTokenManager; +import net.sourceforge.pmd.lang.ast.Node; +import net.sourceforge.pmd.lang.ast.ParseException; + +/** + * Adapter for the Python Parser. + */ +public class PythonParser extends AbstractParser { + + /** + * Creates a new Python Parser. + * @param parserOptions the options + */ + public PythonParser(ParserOptions parserOptions) { + super(parserOptions); + } + + @Override + public TokenManager createTokenManager(Reader source) { + return new PythonTokenManager(source); + } + + @Override + public boolean canParse() { + return false; + } + + @Override + public Node parse(String fileName, Reader source) throws ParseException { + AbstractTokenManager.setFileName(fileName); + throw new UnsupportedOperationException("parse(Reader) is not supported for Python"); + } + + @Override + public Map getSuppressMap() { + throw new UnsupportedOperationException("getSuppressMap() is not supported for Python"); + } +} diff --git a/pmd-python/src/main/java/net/sourceforge/pmd/lang/python/PythonTokenManager.java b/pmd-python/src/main/java/net/sourceforge/pmd/lang/python/PythonTokenManager.java new file mode 100644 index 0000000000..bdc6e2b8de --- /dev/null +++ b/pmd-python/src/main/java/net/sourceforge/pmd/lang/python/PythonTokenManager.java @@ -0,0 +1,34 @@ +/** + * BSD-style license; for more info see http://pmd.sourceforge.net/license.html + */ +package net.sourceforge.pmd.lang.python; + +import java.io.Reader; + +import net.sourceforge.pmd.lang.TokenManager; +import net.sourceforge.pmd.lang.ast.SimpleCharStream; +import net.sourceforge.pmd.lang.python.ast.PythonParserTokenManager; + +/** + * Python Token Manager implementation. + */ +public class PythonTokenManager implements TokenManager { + private final PythonParserTokenManager tokenManager; + + /** + * Creates a new Python Token Manager from the given source code. + * @param source the source code + */ + public PythonTokenManager(Reader source) { + tokenManager = new PythonParserTokenManager(new SimpleCharStream(source)); + } + + public Object getNextToken() { + return tokenManager.getNextToken(); + } + + @Override + public void setFileName(String fileName) { + PythonParserTokenManager.setFileName(fileName); + } +} diff --git a/pmd-python/src/main/resources/META-INF/services/net.sourceforge.pmd.cpd.Language b/pmd-python/src/main/resources/META-INF/services/net.sourceforge.pmd.cpd.Language new file mode 100644 index 0000000000..29b0e80274 --- /dev/null +++ b/pmd-python/src/main/resources/META-INF/services/net.sourceforge.pmd.cpd.Language @@ -0,0 +1 @@ +net.sourceforge.pmd.cpd.PythonLanguage diff --git a/pmd-python/src/main/resources/META-INF/services/net.sourceforge.pmd.lang.Language b/pmd-python/src/main/resources/META-INF/services/net.sourceforge.pmd.lang.Language new file mode 100644 index 0000000000..3a3c036b90 --- /dev/null +++ b/pmd-python/src/main/resources/META-INF/services/net.sourceforge.pmd.lang.Language @@ -0,0 +1 @@ +net.sourceforge.pmd.lang.python.PythonLanguageModule diff --git a/pmd-python/src/site/markdown/index.md b/pmd-python/src/site/markdown/index.md new file mode 100644 index 0000000000..68427c8367 --- /dev/null +++ b/pmd-python/src/site/markdown/index.md @@ -0,0 +1,3 @@ +# PMD Python + +Only CPD is supported. There are no PMD rules for Python. diff --git a/pmd-python/src/site/site.xml b/pmd-python/src/site/site.xml new file mode 100644 index 0000000000..aff4020887 --- /dev/null +++ b/pmd-python/src/site/site.xml @@ -0,0 +1,12 @@ + + + + + + + + diff --git a/pmd-python/src/test/java/net/sourceforge/pmd/LanguageVersionDiscovererTest.java b/pmd-python/src/test/java/net/sourceforge/pmd/LanguageVersionDiscovererTest.java new file mode 100644 index 0000000000..174281de97 --- /dev/null +++ b/pmd-python/src/test/java/net/sourceforge/pmd/LanguageVersionDiscovererTest.java @@ -0,0 +1,32 @@ +package net.sourceforge.pmd; + +import static org.junit.Assert.assertEquals; + +import java.io.File; + +import junit.framework.JUnit4TestAdapter; +import net.sourceforge.pmd.lang.LanguageRegistry; +import net.sourceforge.pmd.lang.LanguageVersion; +import net.sourceforge.pmd.lang.LanguageVersionDiscoverer; +import net.sourceforge.pmd.lang.python.PythonLanguageModule; + +import org.junit.Test; + +public class LanguageVersionDiscovererTest { + + /** + * Test on Python file with default version + */ + @Test + public void testPlsql() { + LanguageVersionDiscoverer discoverer = new LanguageVersionDiscoverer(); + File plsqlFile = new File("/path/to/MY_PACKAGE.py"); + + LanguageVersion languageVersion = discoverer.getDefaultLanguageVersionForFile(plsqlFile); + assertEquals("LanguageVersion must be Python!", LanguageRegistry.getLanguage(PythonLanguageModule.NAME).getDefaultVersion(), languageVersion); + } + + public static junit.framework.Test suite() { + return new JUnit4TestAdapter(LanguageVersionDiscovererTest.class); + } +} diff --git a/pmd-python/src/test/java/net/sourceforge/pmd/LanguageVersionTest.java b/pmd-python/src/test/java/net/sourceforge/pmd/LanguageVersionTest.java new file mode 100644 index 0000000000..0412c99a50 --- /dev/null +++ b/pmd-python/src/test/java/net/sourceforge/pmd/LanguageVersionTest.java @@ -0,0 +1,27 @@ +/** + * BSD-style license; for more info see http://pmd.sourceforge.net/license.html + */ +package net.sourceforge.pmd; + +import java.util.Arrays; +import java.util.Collection; + +import net.sourceforge.pmd.lang.LanguageRegistry; +import net.sourceforge.pmd.lang.LanguageVersion; +import net.sourceforge.pmd.lang.python.PythonLanguageModule; + +import org.junit.runners.Parameterized.Parameters; + +public class LanguageVersionTest extends AbstractLanguageVersionTest { + + public LanguageVersionTest(String name, String terseName, String version, LanguageVersion expected) { + super(name, terseName, version, expected); + } + + @Parameters + public static Collection data() { + return Arrays.asList(new Object[][] { + { PythonLanguageModule.NAME, PythonLanguageModule.TERSE_NAME, "", LanguageRegistry.getLanguage(PythonLanguageModule.NAME).getDefaultVersion() } + }); + } +} diff --git a/pmd-python/src/test/java/net/sourceforge/pmd/cpd/PythonTokenizerTest.java b/pmd-python/src/test/java/net/sourceforge/pmd/cpd/PythonTokenizerTest.java new file mode 100644 index 0000000000..269f930e4a --- /dev/null +++ b/pmd-python/src/test/java/net/sourceforge/pmd/cpd/PythonTokenizerTest.java @@ -0,0 +1,40 @@ +/** + * BSD-style license; for more info see http://pmd.sourceforge.net/license.html + */ +package net.sourceforge.pmd.cpd; + +import java.io.IOException; + +import net.sourceforge.pmd.testframework.AbstractTokenizerTest; +import net.sourceforge.pmd.testframework.StreamUtil; + +import org.junit.Before; +import org.junit.Test; + + +public class PythonTokenizerTest extends AbstractTokenizerTest { + + private static final String FILENAME = "sample-python.py"; + + @Before + @Override + public void buildTokenizer() { + this.tokenizer = new PythonTokenizer(); + this.sourceCode = new SourceCode(new SourceCode.StringCodeLoader(this.getSampleCode(), FILENAME)); + } + + @Override + public String getSampleCode() { + return StreamUtil.toString(PythonTokenizer.class.getResourceAsStream(FILENAME)); + } + + @Test + public void tokenizeTest() throws IOException { + this.expectedTokenCount = 1218; + super.tokenizeTest(); + } + + public static junit.framework.Test suite() { + return new junit.framework.JUnit4TestAdapter(PythonTokenizerTest.class); + } +} diff --git a/pmd-python/src/test/resources/net/sourceforge/pmd/cpd/sample-python.py b/pmd-python/src/test/resources/net/sourceforge/pmd/cpd/sample-python.py new file mode 100644 index 0000000000..0c7abea4a6 --- /dev/null +++ b/pmd-python/src/test/resources/net/sourceforge/pmd/cpd/sample-python.py @@ -0,0 +1,279 @@ +# Example source code copied from the Django project on GitHub +# https://github.com/django/django/blob/master/django/core/handlers/base.py +from __future__ import unicode_literals + +import logging +import sys +import types + +from django import http +from django.conf import settings +from django.core import urlresolvers +from django.core import signals +from django.core.exceptions import MiddlewareNotUsed, PermissionDenied, SuspiciousOperation +from django.db import connections, transaction +from django.http.multipartparser import MultiPartParserError +from django.utils.encoding import force_text +from django.utils.module_loading import import_string +from django.utils import six +from django.views import debug + +logger = logging.getLogger('django.request') + + +class BaseHandler(object): + # Changes that are always applied to a response (in this order). + response_fixes = [ + http.fix_location_header, + http.conditional_content_removal, + ] + + def __init__(self): + self._request_middleware = None + self._view_middleware = None + self._template_response_middleware = None + self._response_middleware = None + self._exception_middleware = None + + def load_middleware(self): + """ + Populate middleware lists from settings.MIDDLEWARE_CLASSES. + + Must be called after the environment is fixed (see __call__ in subclasses). + """ + self._view_middleware = [] + self._template_response_middleware = [] + self._response_middleware = [] + self._exception_middleware = [] + + request_middleware = [] + for middleware_path in settings.MIDDLEWARE_CLASSES: + mw_class = import_string(middleware_path) + try: + mw_instance = mw_class() + except MiddlewareNotUsed as exc: + if settings.DEBUG: + if six.text_type(exc): + logger.debug('MiddlewareNotUsed(%r): %s', middleware_path, exc) + else: + logger.debug('MiddlewareNotUsed: %r', middleware_path) + continue + + if hasattr(mw_instance, 'process_request'): + request_middleware.append(mw_instance.process_request) + if hasattr(mw_instance, 'process_view'): + self._view_middleware.append(mw_instance.process_view) + if hasattr(mw_instance, 'process_template_response'): + self._template_response_middleware.insert(0, mw_instance.process_template_response) + if hasattr(mw_instance, 'process_response'): + self._response_middleware.insert(0, mw_instance.process_response) + if hasattr(mw_instance, 'process_exception'): + self._exception_middleware.insert(0, mw_instance.process_exception) + + # We only assign to this when initialization is complete as it is used + # as a flag for initialization being complete. + self._request_middleware = request_middleware + + def make_view_atomic(self, view): + non_atomic_requests = getattr(view, '_non_atomic_requests', set()) + for db in connections.all(): + if (db.settings_dict['ATOMIC_REQUESTS'] + and db.alias not in non_atomic_requests): + view = transaction.atomic(using=db.alias)(view) + return view + + def get_exception_response(self, request, resolver, status_code): + try: + callback, param_dict = resolver.resolve_error_handler(status_code) + response = callback(request, **param_dict) + except: + signals.got_request_exception.send(sender=self.__class__, request=request) + response = self.handle_uncaught_exception(request, resolver, sys.exc_info()) + + return response + + def get_response(self, request): + "Returns an HttpResponse object for the given HttpRequest" + + # Setup default url resolver for this thread, this code is outside + # the try/except so we don't get a spurious "unbound local + # variable" exception in the event an exception is raised before + # resolver is set + urlconf = settings.ROOT_URLCONF + urlresolvers.set_urlconf(urlconf) + resolver = urlresolvers.RegexURLResolver(r'^/', urlconf) + try: + response = None + # Apply request middleware + for middleware_method in self._request_middleware: + response = middleware_method(request) + if response: + break + + if response is None: + if hasattr(request, 'urlconf'): + # Reset url resolver with a custom urlconf. + urlconf = request.urlconf + urlresolvers.set_urlconf(urlconf) + resolver = urlresolvers.RegexURLResolver(r'^/', urlconf) + + resolver_match = resolver.resolve(request.path_info) + callback, callback_args, callback_kwargs = resolver_match + request.resolver_match = resolver_match + + # Apply view middleware + for middleware_method in self._view_middleware: + response = middleware_method(request, callback, callback_args, callback_kwargs) + if response: + break + + if response is None: + wrapped_callback = self.make_view_atomic(callback) + try: + response = wrapped_callback(request, *callback_args, **callback_kwargs) + except Exception as e: + # If the view raised an exception, run it through exception + # middleware, and if the exception middleware returns a + # response, use that. Otherwise, reraise the exception. + for middleware_method in self._exception_middleware: + response = middleware_method(request, e) + if response: + break + if response is None: + raise + + # Complain if the view returned None (a common error). + if response is None: + if isinstance(callback, types.FunctionType): # FBV + view_name = callback.__name__ + else: # CBV + view_name = callback.__class__.__name__ + '.__call__' + raise ValueError("The view %s.%s didn't return an HttpResponse object. It returned None instead." + % (callback.__module__, view_name)) + + # If the response supports deferred rendering, apply template + # response middleware and then render the response + if hasattr(response, 'render') and callable(response.render): + for middleware_method in self._template_response_middleware: + response = middleware_method(request, response) + # Complain if the template response middleware returned None (a common error). + if response is None: + raise ValueError( + "%s.process_template_response didn't return an " + "HttpResponse object. It returned None instead." + % (middleware_method.__self__.__class__.__name__)) + response = response.render() + + except http.Http404 as e: + logger.warning('Not Found: %s', request.path, + extra={ + 'status_code': 404, + 'request': request + }) + if settings.DEBUG: + response = debug.technical_404_response(request, e) + else: + response = self.get_exception_response(request, resolver, 404) + + except PermissionDenied: + logger.warning( + 'Forbidden (Permission denied): %s', request.path, + extra={ + 'status_code': 403, + 'request': request + }) + response = self.get_exception_response(request, resolver, 403) + + except MultiPartParserError: + logger.warning( + 'Bad request (Unable to parse request body): %s', request.path, + extra={ + 'status_code': 400, + 'request': request + }) + response = self.get_exception_response(request, resolver, 400) + + except SuspiciousOperation as e: + # The request logger receives events for any problematic request + # The security logger receives events for all SuspiciousOperations + security_logger = logging.getLogger('django.security.%s' % + e.__class__.__name__) + security_logger.error( + force_text(e), + extra={ + 'status_code': 400, + 'request': request + }) + if settings.DEBUG: + return debug.technical_500_response(request, *sys.exc_info(), status_code=400) + + response = self.get_exception_response(request, resolver, 400) + + except SystemExit: + # Allow sys.exit() to actually exit. See tickets #1023 and #4701 + raise + + except: # Handle everything else. + # Get the exception info now, in case another exception is thrown later. + signals.got_request_exception.send(sender=self.__class__, request=request) + response = self.handle_uncaught_exception(request, resolver, sys.exc_info()) + + try: + # Apply response middleware, regardless of the response + for middleware_method in self._response_middleware: + response = middleware_method(request, response) + # Complain if the response middleware returned None (a common error). + if response is None: + raise ValueError( + "%s.process_response didn't return an " + "HttpResponse object. It returned None instead." + % (middleware_method.__self__.__class__.__name__)) + response = self.apply_response_fixes(request, response) + except: # Any exception should be gathered and handled + signals.got_request_exception.send(sender=self.__class__, request=request) + response = self.handle_uncaught_exception(request, resolver, sys.exc_info()) + + response._closable_objects.append(request) + + return response + + def handle_uncaught_exception(self, request, resolver, exc_info): + """ + Processing for any otherwise uncaught exceptions (those that will + generate HTTP 500 responses). Can be overridden by subclasses who want + customised 500 handling. + + Be *very* careful when overriding this because the error could be + caused by anything, so assuming something like the database is always + available would be an error. + """ + if settings.DEBUG_PROPAGATE_EXCEPTIONS: + raise + + logger.error('Internal Server Error: %s', request.path, + exc_info=exc_info, + extra={ + 'status_code': 500, + 'request': request + } + ) + + if settings.DEBUG: + return debug.technical_500_response(request, *exc_info) + + # If Http500 handler is not installed, re-raise last exception + if resolver.urlconf_module is None: + six.reraise(*exc_info) + # Return an HttpResponse that displays a friendly error message. + callback, param_dict = resolver.resolve_error_handler(500) + return callback(request, **param_dict) + + def apply_response_fixes(self, request, response): + """ + Applies each of the functions in self.response_fixes to the request and + response, modifying the response in the process. Returns the new + response. + """ + for func in self.response_fixes: + response = func(request, response) + return response \ No newline at end of file diff --git a/pom.xml b/pom.xml index 3380601831..57eae9323c 100644 --- a/pom.xml +++ b/pom.xml @@ -866,6 +866,7 @@ pmd-jsp pmd-php pmd-plsql + pmd-python pmd-ruby pmd-test pmd-vm