Merge branch 'master' into pmd/7.0.x

2022-09-30 12:17:45 +02:00
parent f6176f37bc be871350c0
commit c0dff0d75a
54 changed files with 2104 additions and 353 deletions
--- a/pmd-lua/src/main/antlr4/net/sourceforge/pmd/lang/lua/ast/Lua.g4
+++ b/pmd-lua/src/main/antlr4/net/sourceforge/pmd/lang/lua/ast/Lua.g4
--- a/pmd-lua/src/main/java/net/sourceforge/pmd/cpd/LuaLanguage.java
+++ b/pmd-lua/src/main/java/net/sourceforge/pmd/cpd/LuaLanguage.java
@ -4,15 +4,28 @@

 package net.sourceforge.pmd.cpd;

+import java.util.Properties;
+
 /**
 * Language implementation for Lua
 */
 public class LuaLanguage extends AbstractLanguage {

+    public LuaLanguage() {
+        this(System.getProperties());
+    }
+
    /**
     * Creates a new Lua Language instance.
     */
-    public LuaLanguage() {
+    public LuaLanguage(Properties properties) {
        super("Lua", "lua", new LuaTokenizer(), ".lua");
+        setProperties(properties);
+    }
+
+    @Override
+    public final void setProperties(Properties properties) {
+        LuaTokenizer tokenizer = (LuaTokenizer) getTokenizer();
+        tokenizer.setProperties(properties);
    }
 }
--- a/pmd-lua/src/main/java/net/sourceforge/pmd/cpd/LuaTokenizer.java
+++ b/pmd-lua/src/main/java/net/sourceforge/pmd/cpd/LuaTokenizer.java
@ -4,19 +4,187 @@

 package net.sourceforge.pmd.cpd;

+import java.util.Properties;
+
 import org.antlr.v4.runtime.CharStream;
 import org.antlr.v4.runtime.Lexer;

 import net.sourceforge.pmd.cpd.internal.AntlrTokenizer;
 import net.sourceforge.pmd.lang.lua.ast.LuaLexer;
+import net.sourceforge.pmd.cpd.token.AntlrTokenFilter;

 /**
 * The Lua Tokenizer
 */
 public class LuaTokenizer extends AntlrTokenizer {

+    private boolean ignoreLiteralSequences = false;
+
+    /**
+     * Sets the possible options for the Lua tokenizer.
+     *
+     * @param properties the properties
+     * @see #OPTION_IGNORE_LITERAL_SEQUENCES
+     */
+    public void setProperties(Properties properties) {
+        ignoreLiteralSequences = getBooleanProperty(properties, OPTION_IGNORE_LITERAL_SEQUENCES);
+    }
+
+    private boolean getBooleanProperty(final Properties properties, final String property) {
+        return Boolean.parseBoolean(properties.getProperty(property, Boolean.FALSE.toString()));
+    }
+
    @Override
    protected Lexer getLexerForSource(CharStream charStream) {
        return new LuaLexer(charStream);
+
+    @Override
+    protected AntlrTokenFilter getTokenFilter(final AntlrTokenManager tokenManager) {
+        return new LuaTokenFilter(tokenManager, ignoreLiteralSequences);
+    }
+
+    /**
+     * The {@link LuaTokenFilter} extends the {@link AntlrTokenFilter} to discard
+     * Lua-specific tokens.
+     * <p>
+     * By default, it discards semicolons, require statements, and
+     * enables annotation-based CPD suppression.
+     * </p>
+     */
+    private static class LuaTokenFilter extends AntlrTokenFilter {
+
+        private final boolean ignoreLiteralSequences;
+        private boolean discardingRequires = false;
+        private boolean discardingNL = false;
+        private AntlrToken discardingLiteralsUntil = null;
+        private boolean discardCurrent = false;
+
+
+        LuaTokenFilter(final AntlrTokenManager tokenManager, boolean ignoreLiteralSequences) {
+            super(tokenManager);
+            this.ignoreLiteralSequences = ignoreLiteralSequences;
+        }
+
+        @Override
+        protected void analyzeToken(final AntlrToken currentToken) {
+            skipNewLines(currentToken);
+        }
+
+        @Override
+        protected void analyzeTokens(final AntlrToken currentToken, final Iterable<AntlrToken> remainingTokens) {
+            discardCurrent = false;
+            skipRequires(currentToken);
+            skipLiteralSequences(currentToken, remainingTokens);
+        }
+
+        private void skipRequires(final AntlrToken currentToken) {
+            final int type = currentToken.getKind();
+            if (type == LuaLexer.REQUIRE) {
+                discardingRequires = true;
+            } else if (type == LuaLexer.CLOSE_PARENS && discardingRequires) {
+                discardingRequires = false;
+                discardCurrent = true;
+            }
+        }
+
+        private void skipNewLines(final AntlrToken currentToken) {
+            discardingNL = currentToken.getKind() == LuaLexer.NL;
+        }
+
+        private void skipLiteralSequences(final AntlrToken currentToken, final Iterable<AntlrToken> remainingTokens) {
+            if (ignoreLiteralSequences) {
+                final int type = currentToken.getKind();
+                if (isDiscardingLiterals()) {
+                    if (currentToken == discardingLiteralsUntil) { // NOPMD - intentional check for reference equality
+                        discardingLiteralsUntil = null;
+                        discardCurrent = true;
+                    }
+                } else if (type == LuaLexer.OPEN_BRACE
+                    || type == LuaLexer.OPEN_BRACKET
+                    || type == LuaLexer.OPEN_PARENS) {
+                    final AntlrToken finalToken = findEndOfSequenceOfLiterals(remainingTokens);
+                    discardingLiteralsUntil = finalToken;
+                }
+            }
+        }
+
+        private AntlrToken findEndOfSequenceOfLiterals(final Iterable<AntlrToken> remainingTokens) {
+            boolean seenLiteral = false;
+            int braceCount = 0;
+            int bracketCount = 0;
+            int parenCount = 0;
+            for (final AntlrToken token : remainingTokens) {
+                switch (token.getKind()) {
+                case LuaLexer.INT:
+                case LuaLexer.NORMAL_STRING:
+                case LuaLexer.INTERPOLATED_STRING:
+                case LuaLexer.LONG_STRING:
+                case LuaLexer.HEX_FLOAT:
+                case LuaLexer.HEX:
+                case LuaLexer.FLOAT:
+                case LuaLexer.NIL:
+                case LuaLexer.BOOLEAN:
+                    seenLiteral = true;
+                    break; // can be skipped; continue to the next token
+                case LuaLexer.COMMA:
+                    break; // can be skipped; continue to the next token
+                case LuaLexer.NL:
+                    // this helps skip large multi-line data table sequences in Lua
+                    break; // can be skipped; continue to the next token
+                case LuaLexer.ASSIGNMENT:
+                    // this helps skip large data table sequences in Lua: { ["bob"] = "uncle", ["alice"] = "enby" }
+                    break; // can be skipped; continue to the next token
+                case LuaLexer.OPEN_BRACE:
+                    braceCount++;
+                    break; // curly braces are allowed, as long as they're balanced
+                case LuaLexer.CLOSE_BRACE:
+                    braceCount--;
+                    if (braceCount < 0) {
+                        // end of the list in the braces; skip all contents
+                        return seenLiteral ? token : null;
+                    } else {
+                        // curly braces are not yet balanced; continue to the next token
+                        break;
+                    }
+                case LuaLexer.OPEN_BRACKET:
+                    bracketCount++;
+                    break; // brackets are allowed, as long as they're balanced
+                case LuaLexer.CLOSE_BRACKET:
+                    bracketCount--;
+                    if (bracketCount < 0) {
+                        // end of the list in the brackets; skip all contents
+                        return seenLiteral ? token : null;
+                    } else {
+                        // brackets are not yet balanced; continue to the next token
+                        break;
+                    }
+                case LuaLexer.OPEN_PARENS:
+                    parenCount++;
+                    break; // parens are allowed, as long as they're balanced
+                case LuaLexer.CLOSE_PARENS:
+                    parenCount--;
+                    if (parenCount < 0) {
+                        // end of the list in the parens; skip all contents
+                        return seenLiteral ? token : null;
+                    } else {
+                        // parens are not yet balanced; continue to the next token
+                        break;
+                    }
+                default:
+                    // some other token than the expected ones; this is not a sequence of literals
+                    return null;
+                }
+            }
+            return null;
+        }
+
+        public boolean isDiscardingLiterals() {
+            return discardingLiteralsUntil != null;
+        }
+
+        @Override
+        protected boolean isLanguageSpecificDiscarding() {
+            return discardingRequires || discardingNL || isDiscardingLiterals() || discardCurrent;
+        }
    }
 }
--- a/pmd-lua/src/test/java/net/sourceforge/pmd/cpd/LuaTokenizerTest.java
+++ b/pmd-lua/src/test/java/net/sourceforge/pmd/cpd/LuaTokenizerTest.java
@ -39,4 +39,14 @@ public class LuaTokenizerTest extends CpdTextComparisonTest {
    public void testTabWidth() {
        doTest("tabWidth");
    }
+
+    @Test
+    public void testLuauTypes() {
+        doTest("luauTypes");
+    }
+
+    @Test
+    public void testComment() {
+        doTest("comment");
+    }
 }
--- a/pmd-lua/src/test/resources/net/sourceforge/pmd/lang/lua/cpd/testdata/comment.lua
+++ b/pmd-lua/src/test/resources/net/sourceforge/pmd/lang/lua/cpd/testdata/comment.lua
@ -0,0 +1,13 @@
+
+-- inline comment ("long comment")
+print(1 --[[, 2]])
+
+-- line comment ("short comment")
+print(1) -- comment
+
+-- inline comment with multiple lines ("long comment")
+print(1 --[[comment line 1
+comment line 2]])
+
+-- line comment without any content
+print(1) --
--- a/pmd-lua/src/test/resources/net/sourceforge/pmd/lang/lua/cpd/testdata/comment.txt
+++ b/pmd-lua/src/test/resources/net/sourceforge/pmd/lang/lua/cpd/testdata/comment.txt
@ -0,0 +1,23 @@
+    [Image] or [Truncated image[            Bcol      Ecol
+L3
+    [print]                                 1         5
+    [(]                                     6         6
+    [1]                                     7         7
+    [)]                                     18        18
+L6
+    [print]                                 1         5
+    [(]                                     6         6
+    [1]                                     7         7
+    [)]                                     8         8
+L9
+    [print]                                 1         5
+    [(]                                     6         6
+    [1]                                     7         7
+L10
+    [)]                                     17        17
+L13
+    [print]                                 1         5
+    [(]                                     6         6
+    [1]                                     7         7
+    [)]                                     8         8
+EOF
--- a/pmd-lua/src/test/resources/net/sourceforge/pmd/lang/lua/cpd/testdata/luauTypes.lua
+++ b/pmd-lua/src/test/resources/net/sourceforge/pmd/lang/lua/cpd/testdata/luauTypes.lua
@ -0,0 +1,34 @@
+--!strict
+type Array<T = any> = { T }
+local x = 31337
+local _negativeLiteral = -3
+local _negativeVariable = -x
+local _notLiteral = not true
+local _notVariable = not x
+local _length = #{x}
+export type Function<T... = ...any> = (...any) -> T...
+local _PlatformService = nil
+local game = require(script.Parent.game).default :: any
+pcall(function() _PlatformService = game:GetService('PlatformService') end)
+
+return function <T>(req, ...: boolean): ({[string|number]: T}, string, Function<...any>)
+  local body = string.format("%s %s\n", req.method, req.path)
+  local res = {
+    code = 200,
+    { "Content-Type", "text/plain" },
+    { 
+      "Content-Length", 
+      #body,
+      ["Auth.Confirm"] = [[至：%s。]],
+
+    } :: Array<any>,
+  } :: { [any]: number | Array<string | boolean> }
+  if (req :: any).keepAlive then
+    local socketType: "Connection" | "Pingback" | "" = "" :: ""
+    socketType = "Connection" :: "Connection"
+    res[#res + 1] = { socketType :: string, `\`${req.keepAlive}\`` }
+    res[#res - 2] = { ... }
+  end
+
+  return (res :: any) :: { T }, (if req then body else "") :: string, function(...): ...any return ... end
+end
--- a/pmd-lua/src/test/resources/net/sourceforge/pmd/lang/lua/cpd/testdata/luauTypes.txt
+++ b/pmd-lua/src/test/resources/net/sourceforge/pmd/lang/lua/cpd/testdata/luauTypes.txt