blender/build_files/cmake/clang_array_check.py

# SPDX-FileCopyrightText: 2012 Blender Authors
#
# SPDX-License-Identifier: Apache-2.0

"""
Invocation:

   export CLANG_BIND_DIR="/dsk/src/llvm/tools/clang/bindings/python"
   export CLANG_LIB_DIR="/opt/llvm/lib"

   python clang_array_check.py somefile.c -DSOME_DEFINE -I/some/include

... defines and includes are optional

"""

# delay parsing functions until we need them
USE_LAZY_INIT = True
USE_EXACT_COMPARE = False

# -----------------------------------------------------------------------------
# predefined function/arg sizes, handy sometimes, but not complete...

defs_precalc = {
    "glColor3bv": {0: 3},
    "glColor4bv": {0: 4},

    "glColor3ubv": {0: 3},
    "glColor4ubv": {0: 4},

    "glColor3usv": {0: 3},
    "glColor4usv": {0: 4},

    "glColor3fv": {0: 3},
    "glColor4fv": {0: 4},

    "glColor3dv": {0: 3},
    "glColor4dv": {0: 4},

    "glVertex2fv": {0: 2},
    "glVertex3fv": {0: 3},
    "glVertex4fv": {0: 4},

    "glEvalCoord1fv": {0: 1},
    "glEvalCoord1dv": {0: 1},
    "glEvalCoord2fv": {0: 2},
    "glEvalCoord2dv": {0: 2},

    "glRasterPos2dv": {0: 2},
    "glRasterPos3dv": {0: 3},
    "glRasterPos4dv": {0: 4},

    "glRasterPos2fv": {0: 2},
    "glRasterPos3fv": {0: 3},
    "glRasterPos4fv": {0: 4},

    "glRasterPos2sv": {0: 2},
    "glRasterPos3sv": {0: 3},
    "glRasterPos4sv": {0: 4},

    "glTexCoord2fv": {0: 2},
    "glTexCoord3fv": {0: 3},
    "glTexCoord4fv": {0: 4},

    "glTexCoord2dv": {0: 2},
    "glTexCoord3dv": {0: 3},
    "glTexCoord4dv": {0: 4},

    "glNormal3fv": {0: 3},
    "glNormal3dv": {0: 3},
    "glNormal3bv": {0: 3},
    "glNormal3iv": {0: 3},
    "glNormal3sv": {0: 3},

    # GPU immediate mode.
    "immVertex2iv": {1: 2},

    "immVertex2fv": {1: 2},
    "immVertex3fv": {1: 3},

    "immAttr2fv": {1: 2},
    "immAttr3fv": {1: 3},
    "immAttr4fv": {1: 4},

    "immAttr3ubv": {1: 3},
    "immAttr4ubv": {1: 4},

    "immUniform2fv": {1: 2},
    "immUniform3fv": {1: 3},
    "immUniform4fv": {1: 4},

    "immUniformColor3fv": {0: 3},
    "immUniformColor4fv": {0: 4},

    "immUniformColor3ubv": {1: 3},
    "immUniformColor4ubv": {1: 4},

    "immUniformColor3fvAlpha": {0: 3},
    "immUniformColor4fvAlpha": {0: 4},
}

# -----------------------------------------------------------------------------

import sys

if 0:
    # Examples with LLVM as the root dir: '/dsk/src/llvm'

    # path containing 'clang/__init__.py'
    CLANG_BIND_DIR = "/dsk/src/llvm/tools/clang/bindings/python"

    # path containing libclang.so
    CLANG_LIB_DIR = "/opt/llvm/lib"
else:
    import os
    CLANG_BIND_DIR = os.environ.get("CLANG_BIND_DIR")
    CLANG_LIB_DIR = os.environ.get("CLANG_LIB_DIR")

    if CLANG_BIND_DIR is None:
        print("$CLANG_BIND_DIR python binding dir not set")
    if CLANG_LIB_DIR is None:
        print("$CLANG_LIB_DIR clang lib dir not set")

if CLANG_BIND_DIR:
    sys.path.append(CLANG_BIND_DIR)

import clang
import clang.cindex
from clang.cindex import (CursorKind,
                          TypeKind,
                          TokenKind)

if CLANG_LIB_DIR:
    clang.cindex.Config.set_library_path(CLANG_LIB_DIR)

index = clang.cindex.Index.create()

args = sys.argv[2:]
# print(args)

tu = index.parse(sys.argv[1], args)
# print('Translation unit: %s' % tu.spelling)
filepath = tu.spelling

# -----------------------------------------------------------------------------


def function_parm_wash_tokens(parm):
    # print(parm.kind)
    assert parm.kind in (CursorKind.PARM_DECL,
                         CursorKind.VAR_DECL,  # XXX, double check this
                         CursorKind.FIELD_DECL,
                         )

    """
    Return tokens without trailing commands and 'const'
    """

    tokens = [t for t in parm.get_tokens()]
    if not tokens:
        return tokens

    # if tokens[-1].kind == To
    # remove trailing char
    if tokens[-1].kind == TokenKind.PUNCTUATION:
        if tokens[-1].spelling in {",", ")", ";"}:
            tokens.pop()
        # else:
        #     print(tokens[-1].spelling)

    t_new = []
    for t in tokens:
        t_kind = t.kind
        t_spelling = t.spelling
        ok = True
        if t_kind == TokenKind.KEYWORD:
            if t_spelling in {"const", "restrict", "volatile"}:
                ok = False
            elif t_spelling.startswith("__"):
                ok = False  # __restrict
        elif t_kind in (TokenKind.COMMENT, ):
            ok = False

            # Use these
        elif t_kind in (TokenKind.LITERAL,
                        TokenKind.PUNCTUATION,
                        TokenKind.IDENTIFIER):
            # use but ignore
            pass

        else:
            print("Unknown!", t_kind, t_spelling)

        # if its OK we will add
        if ok:
            t_new.append(t)
    return t_new


def parm_size(node_child):
    tokens = function_parm_wash_tokens(node_child)

    # print(" ".join([t.spelling for t in tokens]))

    # NOT PERFECT CODE, EXTRACT SIZE FROM TOKENS
    if len(tokens) >= 3:  # foo [ 1 ]
        if ((tokens[-3].kind == TokenKind.PUNCTUATION and tokens[-3].spelling == "[") and
            (tokens[-2].kind == TokenKind.LITERAL and tokens[-2].spelling.isdigit()) and
                (tokens[-1].kind == TokenKind.PUNCTUATION and tokens[-1].spelling == "]")):
            # ---
            return int(tokens[-2].spelling)
    return -1


def function_get_arg_sizes(node):
    # Return a dict if (index: size) items
    # {arg_indx: arg_array_size, ... ]
    arg_sizes = {}

    if 1:  # node.spelling == "BM_vert_create", for debugging
        node_parms = [node_child for node_child in node.get_children()
                      if node_child.kind == CursorKind.PARM_DECL]

        for i, node_child in enumerate(node_parms):

            # print(node_child.kind, node_child.spelling)
            # print(node_child.type.kind, node_child.spelling)
            if node_child.type.kind == TypeKind.CONSTANTARRAY:
                pointee = node_child.type.get_pointee()
                size = parm_size(node_child)
                if size != -1:
                    arg_sizes[i] = size

    return arg_sizes


# -----------------------------------------------------------------------------
_defs = {}


def lookup_function_size_def(func_id):
    if USE_LAZY_INIT:
        result = _defs.get(func_id, {})
        if type(result) != dict:
            result = _defs[func_id] = function_get_arg_sizes(result)
        return result
    else:
        return _defs.get(func_id, {})

# -----------------------------------------------------------------------------


def file_check_arg_sizes(tu):

    # main checking function
    def validate_arg_size(node):
        """
        Loop over args and validate sizes for args we KNOW the size of.
        """
        assert node.kind == CursorKind.CALL_EXPR

        if 0:
            print("---",
                  " <~> ".join(
                      [" ".join([t.spelling for t in C.get_tokens()])
                       for C in node.get_children()]
                  ))
        # print(node.location)

        # first child is the function call, skip that.
        children = list(node.get_children())

        if not children:
            return  # XXX, look into this, happens on C++

        func = children[0]

        # get the func declaration!
        # works but we can better scan for functions ahead of time.
        if 0:
            func_dec = func.get_definition()
            if func_dec:
                print("FD", " ".join([t.spelling for t in func_dec.get_tokens()]))
            else:
                # HRMP'f - why does this fail?
                print("AA", " ".join([t.spelling for t in node.get_tokens()]))
        else:
            args_size_definition = ()  # dummy

            # get the key
            tok = list(func.get_tokens())
            if tok:
                func_id = tok[0].spelling
                args_size_definition = lookup_function_size_def(func_id)

        if not args_size_definition:
            return

        children = children[1:]
        for i, node_child in enumerate(children):
            children = list(node_child.get_children())

            # skip if we don't have an index...
            size_def = args_size_definition.get(i, -1)

            if size_def == -1:
                continue

            # print([c.kind for c in children])
            # print(" ".join([t.spelling for t in node_child.get_tokens()]))

            if len(children) == 1:
                arg = children[0]
                if arg.kind in (CursorKind.DECL_REF_EXPR,
                                CursorKind.UNEXPOSED_EXPR):

                    if arg.type.kind == TypeKind.CONSTANTARRAY:
                        dec = arg.get_definition()
                        if dec:
                            size = parm_size(dec)

                            # size == 0 is for 'float *a'
                            if size != -1 and size != 0:

                                # nice print!
                                if 0:
                                    print("".join([t.spelling for t in func.get_tokens()]),
                                          i,
                                          " ".join([t.spelling for t in dec.get_tokens()]))

                                # testing
                                # size_def = 100
                                if size != 1:
                                    if USE_EXACT_COMPARE:
                                        # is_err = (size != size_def) and (size != 4 and size_def != 3)
                                        is_err = (size != size_def)
                                    else:
                                        is_err = (size < size_def)

                                    if is_err:
                                        location = node.location
                                        # if "math_color_inline.c" not in str(location.file):
                                        if 1:
                                            print("%s:%d:%d: argument %d is size %d, should be %d (from %s)" %
                                                  (location.file,
                                                   location.line,
                                                   location.column,
                                                   i + 1, size, size_def,
                                                   filepath  # always the same but useful when running threaded
                                                   ))

    # we don't really care what we are looking at, just scan entire file for
    # function calls.

    def recursive_func_call_check(node):
        if node.kind == CursorKind.CALL_EXPR:
            validate_arg_size(node)

        for c in node.get_children():
            recursive_func_call_check(c)

    recursive_func_call_check(tu.cursor)


# -- first pass, cache function definitions sizes

# PRINT FUNC DEFINES
def recursive_arg_sizes(node, ):
    # print(node.kind, node.spelling)
    if node.kind == CursorKind.FUNCTION_DECL:
        if USE_LAZY_INIT:
            args_sizes = node
        else:
            args_sizes = function_get_arg_sizes(node)
        # if args_sizes:
        #     print(node.spelling, args_sizes)
        _defs[node.spelling] = args_sizes
        # print("adding", node.spelling)
    for c in node.get_children():
        recursive_arg_sizes(c)


# cache function sizes
recursive_arg_sizes(tu.cursor)
_defs.update(defs_precalc)

# --- second pass, check against def's
file_check_arg_sizes(tu)