From 636cf15009786032b1384f8ea34861c931b5dd2b Mon Sep 17 00:00:00 2001 From: wener Date: Tue, 21 Feb 2023 19:30:28 +0100 Subject: [PATCH] Added support for Julia code duplication. --- docs/_data/sidebars/pmd_sidebar.yml | 3 + docs/pages/pmd/languages/julia.md | 15 ++ pmd-dist/pom.xml | 5 + .../pmd/it/BinaryDistributionIT.java | 4 +- pmd-julia/pom.xml | 59 +++++++ .../sourceforge/pmd/lang/julia/ast/Julia.g4 | 163 ++++++++++++++++++ .../pmd/lang/julia/ast/package-info.java | 8 + .../pmd/lang/julia/cpd/JuliaLanguage.java | 20 +++ .../pmd/lang/julia/cpd/JuliaTokenizer.java | 30 ++++ .../pmd/lang/julia/cpd/package-info.java | 8 + .../services/net.sourceforge.pmd.cpd.Language | 1 + .../pmd/cpd/JuliaTokenizerTest.java | 34 ++++ .../lang/julia/cpd/testdata/mathExample.jl | 33 ++++ .../lang/julia/cpd/testdata/mathExample.txt | 153 ++++++++++++++++ pom.xml | 1 + 15 files changed, 535 insertions(+), 2 deletions(-) create mode 100644 docs/pages/pmd/languages/julia.md create mode 100644 pmd-julia/pom.xml create mode 100644 pmd-julia/src/main/antlr4/net/sourceforge/pmd/lang/julia/ast/Julia.g4 create mode 100644 pmd-julia/src/main/java/net/sourceforge/pmd/lang/julia/ast/package-info.java create mode 100644 pmd-julia/src/main/java/net/sourceforge/pmd/lang/julia/cpd/JuliaLanguage.java create mode 100644 pmd-julia/src/main/java/net/sourceforge/pmd/lang/julia/cpd/JuliaTokenizer.java create mode 100644 pmd-julia/src/main/java/net/sourceforge/pmd/lang/julia/cpd/package-info.java create mode 100644 pmd-julia/src/main/resources/META-INF/services/net.sourceforge.pmd.cpd.Language create mode 100644 pmd-julia/src/test/java/net/sourceforge/pmd/cpd/JuliaTokenizerTest.java create mode 100644 pmd-julia/src/test/resources/net/sourceforge/pmd/lang/julia/cpd/testdata/mathExample.jl create mode 100644 pmd-julia/src/test/resources/net/sourceforge/pmd/lang/julia/cpd/testdata/mathExample.txt diff --git a/docs/_data/sidebars/pmd_sidebar.yml b/docs/_data/sidebars/pmd_sidebar.yml index 043d62d9ab..ed41ac0ad8 100644 --- a/docs/_data/sidebars/pmd_sidebar.yml +++ b/docs/_data/sidebars/pmd_sidebar.yml @@ -406,6 +406,9 @@ entries: - title: Gherkin url: /pmd_languages_gherkin.html output: web, pdf + - title: Julia + url: /pmd_languages_julia.html + output: web, pdf - title: Developer Documentation output: web, pdf folderitems: diff --git a/docs/pages/pmd/languages/julia.md b/docs/pages/pmd/languages/julia.md new file mode 100644 index 0000000000..c13e4cdc95 --- /dev/null +++ b/docs/pages/pmd/languages/julia.md @@ -0,0 +1,15 @@ +--- +title: Julia +permalink: pmd_languages_julia.html +--- + +The [Julia](https://julialang.org/) is dynamically typed, like a scripting language, +and has good support for interactive use. +Julia was designed from the beginning for high performance. +Julia programs compile to efficient native code for multiple platforms via LLVM. + +## Support in PMD +Starting from version 6.55.0, Gherkin support was added to CPD. + +### Limitations +- Support for Julia only extends to CPD to detect code duplication in Julia source files. diff --git a/pmd-dist/pom.xml b/pmd-dist/pom.xml index 17850a571b..48600b944e 100644 --- a/pmd-dist/pom.xml +++ b/pmd-dist/pom.xml @@ -179,6 +179,11 @@ pmd-jsp ${project.version} + + net.sourceforge.pmd + pmd-julia + ${project.version} + net.sourceforge.pmd pmd-kotlin diff --git a/pmd-dist/src/test/java/net/sourceforge/pmd/it/BinaryDistributionIT.java b/pmd-dist/src/test/java/net/sourceforge/pmd/it/BinaryDistributionIT.java index 36cf4f1b2e..ac31bdeb9f 100644 --- a/pmd-dist/src/test/java/net/sourceforge/pmd/it/BinaryDistributionIT.java +++ b/pmd-dist/src/test/java/net/sourceforge/pmd/it/BinaryDistributionIT.java @@ -28,10 +28,10 @@ public class BinaryDistributionIT extends AbstractBinaryDistributionTest { static { // note: apex, javascript, visualforce, and scala require java8 if (PMDExecutor.isJava7Test()) { - SUPPORTED_LANGUAGES_CPD = "Supported languages: [cpp, cs, dart, fortran, gherkin, go, groovy, java, jsp, kotlin, lua, matlab, modelica, objectivec, perl, php, plsql, python, ruby, swift, tsql, xml]"; + SUPPORTED_LANGUAGES_CPD = "Supported languages: [cpp, cs, dart, fortran, gherkin, go, groovy, java, jsp, julia, kotlin, lua, matlab, modelica, objectivec, perl, php, plsql, python, ruby, swift, tsql, xml]"; SUPPORTED_LANGUAGES_PMD = "java, jsp, modelica, plsql, pom, vm, wsdl, xml, xsl"; } else { - SUPPORTED_LANGUAGES_CPD = "Supported languages: [apex, cpp, cs, dart, ecmascript, fortran, gherkin, go, groovy, html, java, jsp, kotlin, lua, matlab, modelica, objectivec, perl, php, plsql, python, ruby, scala, swift, tsql, vf, xml]"; + SUPPORTED_LANGUAGES_CPD = "Supported languages: [apex, cpp, cs, dart, ecmascript, fortran, gherkin, go, groovy, html, java, jsp, julia, kotlin, lua, matlab, modelica, objectivec, perl, php, plsql, python, ruby, scala, swift, tsql, vf, xml]"; SUPPORTED_LANGUAGES_PMD = "apex, ecmascript, html, java, jsp, modelica, plsql, pom, scala, vf, vm, wsdl, xml, xsl"; } } diff --git a/pmd-julia/pom.xml b/pmd-julia/pom.xml new file mode 100644 index 0000000000..f60ba10c04 --- /dev/null +++ b/pmd-julia/pom.xml @@ -0,0 +1,59 @@ + + + 4.0.0 + pmd-julia + PMD Julia + + + net.sourceforge.pmd + pmd + 6.55.0-SNAPSHOT + ../pom.xml + + + + + + org.antlr + antlr4-maven-plugin + + + + maven-resources-plugin + + false + + ${*} + + + + + + + + + net.sourceforge.pmd + pmd-core + + + org.antlr + antlr4-runtime + + + + junit + junit + test + + + net.sourceforge.pmd + pmd-test + test + + + net.sourceforge.pmd + pmd-lang-test + test + + + diff --git a/pmd-julia/src/main/antlr4/net/sourceforge/pmd/lang/julia/ast/Julia.g4 b/pmd-julia/src/main/antlr4/net/sourceforge/pmd/lang/julia/ast/Julia.g4 new file mode 100644 index 0000000000..2bd1e56efe --- /dev/null +++ b/pmd-julia/src/main/antlr4/net/sourceforge/pmd/lang/julia/ast/Julia.g4 @@ -0,0 +1,163 @@ +grammar Julia; + +// Parser + +// TODO + +// Calculate cyclomatic complexity +// Create tokenizer for CPD +// Publish first version 0.1 in Github and remain public for all increments + +main + : functionBody (functionDefinition functionBody)* END? EOF + ; + +functionDefinition + : functionDefinition1 + | functionDefinition2 + ; + +functionDefinition1 + : FUNCTION IDENTIFIER? anyToken*? ('(' anyToken*? ')' whereClause*? functionBody)? END + ; + +functionDefinition2 + : functionIdentifier '(' anyToken*? ')' whereClause*? '=' functionBody + ; + +functionIdentifier + : IDENTIFIER + | '(' anyToken*? ')' // Operator + ; + +whereClause + : WHERE anyToken*? + ; + +functionBody + : anyToken*? (statement anyToken*?)*? + ; + +statement + : beginStatement + | doStatement + | forStatement + | functionDefinition1 + | ifStatement + | letStatement + | macroStatement + | structStatement + | tryCatchStatement + | typeStatement + | whileStatement + ; + +beginStatement + : BEGIN functionBody END + ; + +doStatement + : DO functionBody END + ; + +forStatement + : FOR functionBody END + ; + +ifStatement + : IF functionBody (ELSIF functionBody)* (ELSE functionBody)? END + ; + +letStatement + : LET functionBody END + ; + +macroStatement + : MACRO functionBody END + ; + +structStatement + : STRUCT functionBody END + ; + +tryCatchStatement + : TRY functionBody (CATCH functionBody)? (FINALLY functionBody)? END + ; + +typeStatement + : TYPE functionBody END + ; + +whileStatement + : WHILE functionBody END + ; + +anyToken + : ANY + | BEGIN + | CATCH + | CHAR + | DO + | ELSE + | ELSIF + | END + | EXTERNALCOMMAND + | FINALLY + | FOR + | FUNCTION + | IDENTIFIER + | IF + | LET + | MACRO + | STRING + | STRUCT + | TRY + | TYPE + | WHERE + | WHILE + | '(' anyToken*? ')' + | '[' anyToken*? ']' + | '{' anyToken*? '}' + | '=' + | '&&' // short-circuit + | '||' // short-circuit + | '==' // to disambiguate from "=" + ; + +// Lexer + +COMMENTS : '#' (~[=\r\n]~[\r\n]*)? -> skip; // skip #= because otherwise multiline comments are not recognized, see next line +MULTILINECOMMENTS1 : '#=' .*? '=#' -> skip; +MULTILINECOMMENTS2 : '```' .*? '```' -> skip; +MULTILINESTRING : '"""' ('\\"'|.)*? '"""' -> skip; +NL : '\r'? '\n' -> skip ; +WHITESPACE : [ \t]+ -> skip ; + +BEGIN : 'begin' ; +CATCH : 'catch' ; +CHAR : '\'' '\\'? .? '\'' ; +DO : 'do' ; +ELSE : 'else' ; +ELSIF : 'elsif' ; +END : 'end' ; +EXTERNALCOMMAND : '`' .*? '`' ; +FINALLY : 'finally' ; +FOR : 'for' ; +FUNCTION : 'function' ; +IF : 'if' ; +LET : 'let' ; +MACRO : 'macro' ; +STRING : '"' ('\\\\'|'\\"'|'$(' ('$(' .*? ')'|'"' .*? '"'|.)*? ')'|.)*? '"'; +STRUCT : 'struct' ; +TRY : 'try' ; +TYPE : 'type' ; +WHERE : 'where' ; +WHILE : 'while' ; + +IDENTIFIER : [$a-zA-Z_] [a-zA-Z_0-9]* ; + +ANY : . ; + + + + diff --git a/pmd-julia/src/main/java/net/sourceforge/pmd/lang/julia/ast/package-info.java b/pmd-julia/src/main/java/net/sourceforge/pmd/lang/julia/ast/package-info.java new file mode 100644 index 0000000000..f72bcf2f32 --- /dev/null +++ b/pmd-julia/src/main/java/net/sourceforge/pmd/lang/julia/ast/package-info.java @@ -0,0 +1,8 @@ +/* + * BSD-style license; for more info see http://pmd.sourceforge.net/license.html + */ + +/** + * Contains the Antlr grammar for Julia. + */ +package net.sourceforge.pmd.lang.julia.ast; diff --git a/pmd-julia/src/main/java/net/sourceforge/pmd/lang/julia/cpd/JuliaLanguage.java b/pmd-julia/src/main/java/net/sourceforge/pmd/lang/julia/cpd/JuliaLanguage.java new file mode 100644 index 0000000000..d6bd001607 --- /dev/null +++ b/pmd-julia/src/main/java/net/sourceforge/pmd/lang/julia/cpd/JuliaLanguage.java @@ -0,0 +1,20 @@ +/** + * BSD-style license; for more info see http://pmd.sourceforge.net/license.html + */ + +package net.sourceforge.pmd.lang.julia.cpd; + +import net.sourceforge.pmd.cpd.AbstractLanguage; + +/** + * Language implementation for Julia. + */ +public class JuliaLanguage extends AbstractLanguage { + + /** + * Creates a new Julia Language instance. + */ + public JuliaLanguage() { + super("Julia", "julia", new JuliaTokenizer(), ".jl"); + } +} diff --git a/pmd-julia/src/main/java/net/sourceforge/pmd/lang/julia/cpd/JuliaTokenizer.java b/pmd-julia/src/main/java/net/sourceforge/pmd/lang/julia/cpd/JuliaTokenizer.java new file mode 100644 index 0000000000..17ca973212 --- /dev/null +++ b/pmd-julia/src/main/java/net/sourceforge/pmd/lang/julia/cpd/JuliaTokenizer.java @@ -0,0 +1,30 @@ +/** + * BSD-style license; for more info see http://pmd.sourceforge.net/license.html + */ + +package net.sourceforge.pmd.lang.julia.cpd; + +import org.antlr.v4.runtime.CharStream; + +import net.sourceforge.pmd.cpd.AntlrTokenizer; +import net.sourceforge.pmd.cpd.SourceCode; +import net.sourceforge.pmd.cpd.token.AntlrTokenFilter; +import net.sourceforge.pmd.lang.antlr.AntlrTokenManager; +import net.sourceforge.pmd.lang.julia.ast.JuliaLexer; + +/** + * The Julia Tokenizer. + */ +public class JuliaTokenizer extends AntlrTokenizer { + + @Override + protected AntlrTokenManager getLexerForSource(SourceCode sourceCode) { + CharStream charStream = AntlrTokenizer.getCharStreamFromSourceCode(sourceCode); + return new AntlrTokenManager(new JuliaLexer(charStream), sourceCode.getFileName()); + } + + @Override + protected AntlrTokenFilter getTokenFilter(final AntlrTokenManager tokenManager) { + return new AntlrTokenFilter(tokenManager); + } +} diff --git a/pmd-julia/src/main/java/net/sourceforge/pmd/lang/julia/cpd/package-info.java b/pmd-julia/src/main/java/net/sourceforge/pmd/lang/julia/cpd/package-info.java new file mode 100644 index 0000000000..e052fae87a --- /dev/null +++ b/pmd-julia/src/main/java/net/sourceforge/pmd/lang/julia/cpd/package-info.java @@ -0,0 +1,8 @@ +/* + * BSD-style license; for more info see http://pmd.sourceforge.net/license.html + */ + +/** + * Contains Julia tokenizer and language classes. + */ +package net.sourceforge.pmd.lang.julia.cpd; diff --git a/pmd-julia/src/main/resources/META-INF/services/net.sourceforge.pmd.cpd.Language b/pmd-julia/src/main/resources/META-INF/services/net.sourceforge.pmd.cpd.Language new file mode 100644 index 0000000000..bd23fbae93 --- /dev/null +++ b/pmd-julia/src/main/resources/META-INF/services/net.sourceforge.pmd.cpd.Language @@ -0,0 +1 @@ +net.sourceforge.pmd.lang.julia.cpd.JuliaLanguage diff --git a/pmd-julia/src/test/java/net/sourceforge/pmd/cpd/JuliaTokenizerTest.java b/pmd-julia/src/test/java/net/sourceforge/pmd/cpd/JuliaTokenizerTest.java new file mode 100644 index 0000000000..7a0fdda562 --- /dev/null +++ b/pmd-julia/src/test/java/net/sourceforge/pmd/cpd/JuliaTokenizerTest.java @@ -0,0 +1,34 @@ +/* + * BSD-style license; for more info see http://pmd.sourceforge.net/license.html + */ + +package net.sourceforge.pmd.cpd; + +import java.util.Properties; + +import org.junit.Test; + +import net.sourceforge.pmd.cpd.test.CpdTextComparisonTest; +import net.sourceforge.pmd.lang.julia.cpd.JuliaTokenizer; + +public class JuliaTokenizerTest extends CpdTextComparisonTest { + public JuliaTokenizerTest() { + super(".jl"); + } + + @Override + protected String getResourcePrefix() { + return "../lang/julia/cpd/testdata"; + } + + @Override + public Tokenizer newTokenizer(Properties properties) { + JuliaTokenizer tok = new JuliaTokenizer(); + return tok; + } + + @Test + public void testMathExample() { + doTest("mathExample"); + } +} diff --git a/pmd-julia/src/test/resources/net/sourceforge/pmd/lang/julia/cpd/testdata/mathExample.jl b/pmd-julia/src/test/resources/net/sourceforge/pmd/lang/julia/cpd/testdata/mathExample.jl new file mode 100644 index 0000000000..8c70fdef3e --- /dev/null +++ b/pmd-julia/src/test/resources/net/sourceforge/pmd/lang/julia/cpd/testdata/mathExample.jl @@ -0,0 +1,33 @@ +# function to calculate the volume of a sphere +function sphere_vol(r) + # julia allows Unicode names (in UTF-8 encoding) + # so either "pi" or the symbol π can be used + return 4/3*pi*r^3 +end + +# functions can also be defined more succinctly +quadratic(a, sqr_term, b) = (-b + sqr_term) / 2a + +# calculates x for 0 = a*x^2+b*x+c, arguments types can be defined in function definitions +function quadratic2(a::Float64, b::Float64, c::Float64) + # unlike other languages 2a is equivalent to 2*a + # a^2 is used instead of a**2 or pow(a,2) + sqr_term = sqrt(b^2-4a*c) + r1 = quadratic(a, sqr_term, b) + r2 = quadratic(a, -sqr_term, b) + # multiple values can be returned from a function using tuples + # if the return keyword is omitted, the last term is returned + r1, r2 +end + +vol = sphere_vol(3) +# @printf allows number formatting but does not automatically append the \n to statements, see below +using Printf +@printf "volume = %0.3f\n" vol +#> volume = 113.097 + +quad1, quad2 = quadratic2(2.0, -2.0, -12.0) +println("result 1: ", quad1) +#> result 1: 3.0 +println("result 2: ", quad2) +#> result 2: -2.0 \ No newline at end of file diff --git a/pmd-julia/src/test/resources/net/sourceforge/pmd/lang/julia/cpd/testdata/mathExample.txt b/pmd-julia/src/test/resources/net/sourceforge/pmd/lang/julia/cpd/testdata/mathExample.txt new file mode 100644 index 0000000000..47d6f6a52b --- /dev/null +++ b/pmd-julia/src/test/resources/net/sourceforge/pmd/lang/julia/cpd/testdata/mathExample.txt @@ -0,0 +1,153 @@ + [Image] or [Truncated image[ Bcol Ecol +L2 + [function] 1 8 + [sphere_vol] 10 19 + [(] 20 20 + [r] 21 21 + [)] 22 22 +L5 + [return] 3 8 + [4] 10 10 + [/] 11 11 + [3] 12 12 + [*] 13 13 + [pi] 14 15 + [*] 16 16 + [r] 17 17 + [^] 18 18 + [3] 19 19 +L6 + [end] 1 3 +L9 + [quadratic] 1 9 + [(] 10 10 + [a] 11 11 + [,] 12 12 + [sqr_term] 14 21 + [,] 22 22 + [b] 24 24 + [)] 25 25 + [=] 27 27 + [(] 29 29 + [-] 30 30 + [b] 31 31 + [+] 33 33 + [sqr_term] 35 42 + [)] 43 43 + [/] 45 45 + [2] 47 47 + [a] 48 48 +L12 + [function] 1 8 + [quadratic2] 10 19 + [(] 20 20 + [a] 21 21 + [:] 22 22 + [:] 23 23 + [Float64] 24 30 + [,] 31 31 + [b] 33 33 + [:] 34 34 + [:] 35 35 + [Float64] 36 42 + [,] 43 43 + [c] 45 45 + [:] 46 46 + [:] 47 47 + [Float64] 48 54 + [)] 55 55 +L15 + [sqr_term] 3 10 + [=] 12 12 + [sqrt] 14 17 + [(] 18 18 + [b] 19 19 + [^] 20 20 + [2] 21 21 + [-] 22 22 + [4] 23 23 + [a] 24 24 + [*] 25 25 + [c] 26 26 + [)] 27 27 +L16 + [r1] 3 4 + [=] 6 6 + [quadratic] 8 16 + [(] 17 17 + [a] 18 18 + [,] 19 19 + [sqr_term] 21 28 + [,] 29 29 + [b] 31 31 + [)] 32 32 +L17 + [r2] 3 4 + [=] 6 6 + [quadratic] 8 16 + [(] 17 17 + [a] 18 18 + [,] 19 19 + [-] 21 21 + [sqr_term] 22 29 + [,] 30 30 + [b] 32 32 + [)] 33 33 +L20 + [r1] 3 4 + [,] 5 5 + [r2] 7 8 +L21 + [end] 1 3 +L23 + [vol] 1 3 + [=] 5 5 + [sphere_vol] 7 16 + [(] 17 17 + [3] 18 18 + [)] 19 19 +L25 + [using] 1 5 + [Printf] 7 12 +L26 + [@] 1 1 + [printf] 2 7 + ["volume = %0.3f\\n"] 9 26 + [vol] 28 30 +L29 + [quad1] 1 5 + [,] 6 6 + [quad2] 8 12 + [=] 14 14 + [quadratic2] 16 25 + [(] 26 26 + [2] 27 27 + [.] 28 28 + [0] 29 29 + [,] 30 30 + [-] 32 32 + [2] 33 33 + [.] 34 34 + [0] 35 35 + [,] 36 36 + [-] 38 38 + [1] 39 39 + [2] 40 40 + [.] 41 41 + [0] 42 42 + [)] 43 43 +L30 + [println] 1 7 + [(] 8 8 + ["result 1: "] 9 20 + [,] 21 21 + [quad1] 23 27 + [)] 28 28 +L32 + [println] 1 7 + [(] 8 8 + ["result 2: "] 9 20 + [,] 21 21 + [quad2] 23 27 + [)] 28 28 +EOF diff --git a/pom.xml b/pom.xml index b2ea18544a..d855504328 100644 --- a/pom.xml +++ b/pom.xml @@ -1085,6 +1085,7 @@ pmd-lua pmd-java pmd-jsp + pmd-julia pmd-kotlin pmd-matlab pmd-modelica