forked from bartvdbraak/blender
code checker: add 'make check_struct_comments'
This utility uses Python's clang module to validate comments intended to match the struct member names. `cmake_static_check_clang.py` has been written to support adding other checkers in the future.
This commit is contained in:
parent
17563e9a91
commit
2c9aa2c8e5
@ -59,6 +59,7 @@ Static Source Code Checking
|
||||
|
||||
* check_cppcheck: Run blender source through cppcheck (C & C++).
|
||||
* check_clang_array: Run blender source through clang array checking script (C & C++).
|
||||
* check_struct_comments: Check struct member comments are correct (C & C++).
|
||||
* check_deprecated: Check if there is any deprecated code to remove.
|
||||
* check_descriptions: Check for duplicate/invalid descriptions.
|
||||
* check_licenses: Check license headers follow the SPDX license specification,
|
||||
@ -468,6 +469,13 @@ check_cppcheck: .FORCE
|
||||
"$(BLENDER_DIR)/check_cppcheck.txt"
|
||||
@echo "written: check_cppcheck.txt"
|
||||
|
||||
check_struct_comments: .FORCE
|
||||
@$(CMAKE_CONFIG)
|
||||
@cd "$(BUILD_DIR)" ; \
|
||||
$(PYTHON) \
|
||||
"$(BLENDER_DIR)/build_files/cmake/cmake_static_check_clang.py" \
|
||||
--checks=struct_comments --match=".*"
|
||||
|
||||
check_clang_array: .FORCE
|
||||
@$(CMAKE_CONFIG)
|
||||
@cd "$(BUILD_DIR)" ; \
|
||||
|
544
build_files/cmake/cmake_static_check_clang.py
Normal file
544
build_files/cmake/cmake_static_check_clang.py
Normal file
@ -0,0 +1,544 @@
|
||||
#!/usr/bin/env python3
|
||||
# SPDX-FileCopyrightText: 2023 Blender Foundation
|
||||
#
|
||||
# SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
"""
|
||||
A command line utility to check Blender's source code with CLANG's Python module.
|
||||
|
||||
To call this directly:
|
||||
|
||||
export CLANG_LIB_DIR=/usr/lib64
|
||||
cd {BUILD_DIR}
|
||||
python ../blender/build_files/cmake/cmake_static_check_clang.py --match=".*" --checks=struct_comments
|
||||
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
|
||||
from typing import (
|
||||
Any,
|
||||
Dict,
|
||||
List,
|
||||
Type,
|
||||
Sequence,
|
||||
Tuple,
|
||||
)
|
||||
|
||||
|
||||
import project_source_info
|
||||
|
||||
# pylint: disable-next=import-outside-toplevel
|
||||
import clang # type: ignore
|
||||
# pylint: disable-next=import-outside-toplevel
|
||||
import clang.cindex # type: ignore
|
||||
from clang.cindex import (
|
||||
CursorKind,
|
||||
)
|
||||
|
||||
# Only for readability.
|
||||
ClangNode = Any
|
||||
ClangTranslationUnit = Any
|
||||
ClangSourceLocation = Any
|
||||
|
||||
|
||||
USE_VERBOSE = os.environ.get("VERBOSE", None) is not None
|
||||
|
||||
# Turn off for debugging.
|
||||
USE_MULTIPROCESS = True
|
||||
|
||||
CLANG_BIND_DIR = os.environ.get("CLANG_BIND_DIR")
|
||||
CLANG_LIB_DIR = os.environ.get("CLANG_LIB_DIR")
|
||||
|
||||
if CLANG_BIND_DIR is None:
|
||||
print("$CLANG_BIND_DIR python binding dir not set")
|
||||
if CLANG_LIB_DIR is None:
|
||||
print("$CLANG_LIB_DIR clang lib dir not set")
|
||||
|
||||
if CLANG_LIB_DIR:
|
||||
clang.cindex.Config.set_library_path(CLANG_LIB_DIR)
|
||||
if CLANG_BIND_DIR:
|
||||
sys.path.append(CLANG_BIND_DIR)
|
||||
|
||||
|
||||
CHECKER_IGNORE_PREFIX = [
|
||||
"extern",
|
||||
]
|
||||
|
||||
CHECKER_EXCLUDE_SOURCE_FILES = set(os.path.join(*f.split("/")) for f in (
|
||||
# Skip parsing these large (mostly data files).
|
||||
"source/blender/editors/space_text/text_format_pov.cc",
|
||||
"source/blender/editors/space_text/text_format_pov_ini.cc",
|
||||
))
|
||||
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Utility Functions
|
||||
|
||||
def clang_source_location_as_str(source_location: ClangSourceLocation) -> str:
|
||||
return "{:s}:{:d}:{:d}:".format(str(source_location.file), source_location.line, source_location.column)
|
||||
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Checkers
|
||||
|
||||
class ClangChecker:
|
||||
"""
|
||||
Base class for checkers.
|
||||
|
||||
Notes:
|
||||
|
||||
- The function ``check_source`` takes file_data as bytes instead of a string
|
||||
because the offsets provided by CLANG are byte offsets.
|
||||
While the offsets could be converted into UNICODE offset's,
|
||||
there doesn't seem to be an efficient & convenient way to do that.
|
||||
"""
|
||||
__slots__ = ()
|
||||
|
||||
def __new__(cls, *args: Tuple[Any], **kwargs: Dict[str, Any]) -> Any:
|
||||
raise RuntimeError("%s should not be instantiated" % cls)
|
||||
|
||||
@staticmethod
|
||||
def check_source(
|
||||
_filepath: str,
|
||||
_file_data: bytes,
|
||||
_tu: ClangTranslationUnit,
|
||||
_shared_check_data: Any,
|
||||
) -> List[str]:
|
||||
raise RuntimeError("This function must be overridden by it's subclass!")
|
||||
return []
|
||||
|
||||
@staticmethod
|
||||
def setup() -> Any:
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
def teardown(_shared_check_data: Any) -> None:
|
||||
pass
|
||||
|
||||
|
||||
class clang_checkers:
|
||||
# fake module.
|
||||
|
||||
class struct_comments(ClangChecker):
|
||||
"""
|
||||
Ensure comments in struct declarations match the members of the struct, e.g:
|
||||
|
||||
SomeStruct var = {
|
||||
/*name*/ "Text",
|
||||
/*children*/ nullptr,
|
||||
/*flag*/ 0,
|
||||
};
|
||||
|
||||
Will generate a warning if any of the names in the prefix comments don't match the struct member names.
|
||||
"""
|
||||
|
||||
_struct_comments_ignore = {
|
||||
# `PyTypeObject` uses compile time members that vary (see: #PyVarObject_HEAD_INIT macro)
|
||||
# While some clever comment syntax could be supported to signify multiple/optional members
|
||||
# this is such a specific case that it's simpler to skip this warning.
|
||||
"PyTypeObject": {"ob_base": {"ob_size"}},
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def _struct_check_comments_recursive(
|
||||
# Static (unchanged for each recursion).
|
||||
filepath: str,
|
||||
file_data: bytes,
|
||||
# Different for each recursion.
|
||||
node: ClangNode,
|
||||
node_parent: ClangNode,
|
||||
level: int,
|
||||
# Used to build data.
|
||||
struct_decl_map: Dict[str, ClangNode],
|
||||
struct_type_map: Dict[str, str],
|
||||
output: List[str],
|
||||
) -> None:
|
||||
|
||||
# Needed to read back the node.
|
||||
if USE_VERBOSE:
|
||||
print("TRY:", node.kind, node.spelling, len(list(node.get_tokens())), level, node.location)
|
||||
|
||||
# if node.kind == CursorKind.VAR_DECL and node.spelling == "Vector_NumMethods":
|
||||
# import IPython
|
||||
# IPython.embed()
|
||||
|
||||
if node.kind == CursorKind.STRUCT_DECL:
|
||||
struct_type = node.spelling.strip()
|
||||
if not struct_type:
|
||||
# The parent may be a `typedef [..] TypeID` where `[..]` is `struct { a; b; c; }`.
|
||||
# Inspect the parent.
|
||||
if node_parent is not None and (node_parent.kind == CursorKind.TYPEDEF_DECL):
|
||||
tokens = list(node_parent.get_tokens())
|
||||
if tokens[0].spelling == "typedef":
|
||||
struct_type = tokens[-1].spelling
|
||||
|
||||
struct_decl_map[struct_type] = node
|
||||
|
||||
# Ignore declarations for anything defined outside this file.
|
||||
if str(node.location.file) == filepath:
|
||||
if node.kind == CursorKind.INIT_LIST_EXPR:
|
||||
if USE_VERBOSE:
|
||||
print(node.spelling, node.location)
|
||||
# Split to avoid `const struct` .. and similar.
|
||||
# NOTE: there may be an array size suffix, e.g. `[4]`.
|
||||
# This could be supported.
|
||||
struct_type = node.type.spelling.split()[-1]
|
||||
struct = struct_decl_map.get(struct_type)
|
||||
if struct is None:
|
||||
if USE_VERBOSE:
|
||||
print("NOT FOUND:", struct_type)
|
||||
struct_type = struct_type_map.get(struct_type)
|
||||
if struct_type is not None:
|
||||
struct = struct_decl_map.get(struct_type)
|
||||
|
||||
if USE_VERBOSE:
|
||||
print("INSPECTING STRUCT:", struct_type)
|
||||
if struct is not None:
|
||||
member_names = [
|
||||
node_child.spelling for node_child in struct.get_children()
|
||||
if node_child.kind == CursorKind.FIELD_DECL
|
||||
]
|
||||
# if struct_type == "PyMappingMethods":
|
||||
# import IPython
|
||||
# IPython.embed()
|
||||
|
||||
children = list(node.get_children())
|
||||
comment_names = []
|
||||
|
||||
# Set to true when there is a comment directly before a value,
|
||||
# this is needed because:
|
||||
# - Comments on the previous line are rarely intended to be identifiers of the struct member.
|
||||
# - Comments which _are_ intended to be identifiers can be wrapped onto new-lines
|
||||
# so they should not be ignored.
|
||||
#
|
||||
# While it's possible every member is wrapped onto a new-line,
|
||||
# this is highly unlikely.
|
||||
comment_names_prefix_any = False
|
||||
|
||||
for node_child in children:
|
||||
# Extract the content before the child
|
||||
# (typically a C-style comment containing the struct member).
|
||||
end = min(node_child.location.offset, len(file_data))
|
||||
|
||||
# It's possible this ID has a preceding "name::space::etc"
|
||||
# which should be skipped.
|
||||
while end > 0 and ((ch := bytes((file_data[end - 1],))).isalpha() or ch == b":"):
|
||||
end -= 1
|
||||
|
||||
has_newline = False
|
||||
while end > 0:
|
||||
ch = bytes((file_data[end - 1],))
|
||||
if ch in {b"\t", b" "}:
|
||||
end -= 1
|
||||
elif ch == b"\n":
|
||||
end -= 1
|
||||
has_newline = True
|
||||
else:
|
||||
break
|
||||
|
||||
beg = end - 1
|
||||
while beg != 0 and bytes((file_data[beg],)) != b"\n":
|
||||
beg -= 1
|
||||
text = file_data[beg:end]
|
||||
if text.lstrip().startswith(b"/*"):
|
||||
if not has_newline:
|
||||
comment_names_prefix_any = True
|
||||
else:
|
||||
text = b""
|
||||
comment_names.append(text.decode('utf-8'))
|
||||
|
||||
if USE_VERBOSE:
|
||||
print(member_names)
|
||||
print(comment_names)
|
||||
|
||||
total = min(len(member_names), len(comment_names))
|
||||
|
||||
if total != 0 and comment_names_prefix_any:
|
||||
result = [""] * total
|
||||
count_found = 0
|
||||
count_invalid = 0
|
||||
for i in range(total):
|
||||
comment = comment_names[i]
|
||||
if "/*" in comment and "*/" in comment:
|
||||
comment = comment.strip().strip("/").strip("*")
|
||||
if comment == member_names[i]:
|
||||
count_found += 1
|
||||
else:
|
||||
suppress_warning = False
|
||||
if (
|
||||
skip_members_table :=
|
||||
clang_checkers.struct_comments._struct_comments_ignore.get(
|
||||
node_parent.type.spelling,
|
||||
)
|
||||
) is not None:
|
||||
if (skip_members := skip_members_table.get(comment)) is not None:
|
||||
if member_names[i] in skip_members:
|
||||
suppress_warning = True
|
||||
|
||||
if not suppress_warning:
|
||||
result[i] = "Incorrect! found \"{:s}\" expected \"{:s}\"".format(
|
||||
comment, member_names[i])
|
||||
count_invalid += 1
|
||||
else:
|
||||
result[i] = "No comment for \"{:s}\"".format(member_names[i])
|
||||
if count_found == 0 and count_invalid == 0:
|
||||
# No comments used, skip this as not all declaration use this comment style.
|
||||
output.append(
|
||||
"NONE: {:s} {:s}".format(
|
||||
clang_source_location_as_str(node.location),
|
||||
node.type.spelling,
|
||||
)
|
||||
)
|
||||
elif count_found != total:
|
||||
for i in range(total):
|
||||
if result[i]:
|
||||
output.append(
|
||||
"FAIL: {:s} {:s}".format(
|
||||
clang_source_location_as_str(children[i].location),
|
||||
result[i],
|
||||
)
|
||||
)
|
||||
else:
|
||||
output.append(
|
||||
"OK: {:s} {:s}".format(
|
||||
clang_source_location_as_str(node.location),
|
||||
node.type.spelling,
|
||||
)
|
||||
)
|
||||
|
||||
for node_child in node.get_children():
|
||||
clang_checkers.struct_comments._struct_check_comments_recursive(
|
||||
filepath, file_data,
|
||||
node_child, node, level + 1,
|
||||
struct_decl_map, struct_type_map, output,
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def check_source(
|
||||
filepath: str,
|
||||
file_data: bytes,
|
||||
tu: ClangTranslationUnit,
|
||||
_shared_check_data: Any) -> List[str]:
|
||||
output: List[str] = []
|
||||
|
||||
struct_decl_map: Dict[str, Any] = {}
|
||||
struct_type_map: Dict[str, str] = {}
|
||||
clang_checkers.struct_comments._struct_check_comments_recursive(
|
||||
filepath, file_data,
|
||||
tu.cursor, None, 0,
|
||||
struct_decl_map, struct_type_map, output,
|
||||
)
|
||||
|
||||
return output
|
||||
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Checker Class Access
|
||||
|
||||
def check_function_get_all() -> List[str]:
|
||||
checkers = []
|
||||
for name in dir(clang_checkers):
|
||||
value = getattr(clang_checkers, name)
|
||||
if isinstance(value, type) and issubclass(value, ClangChecker):
|
||||
checkers.append(name)
|
||||
checkers.sort()
|
||||
return checkers
|
||||
|
||||
|
||||
def check_class_from_id(name: str) -> Type[ClangChecker]:
|
||||
result = getattr(clang_checkers, name)
|
||||
assert issubclass(result, ClangChecker)
|
||||
# MYPY 0.812 doesn't recognize the assert above.
|
||||
return result # type: ignore
|
||||
|
||||
|
||||
def check_docstring_from_id(name: str) -> str:
|
||||
from textwrap import dedent
|
||||
result = getattr(clang_checkers, name).__doc__
|
||||
return dedent(result or '').strip('\n') + '\n'
|
||||
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Generic Clang Checker
|
||||
|
||||
def check_source_file(
|
||||
filepath: str,
|
||||
args: Sequence[str],
|
||||
check_ids: Sequence[str],
|
||||
shared_check_data_foreach_check: Sequence[Any],
|
||||
) -> str:
|
||||
index = clang.cindex.Index.create()
|
||||
try:
|
||||
tu = index.parse(filepath, args)
|
||||
except clang.cindex.TranslationUnitLoadError as ex:
|
||||
return "PARSE_ERROR: {:s} {!r}".format(filepath, ex)
|
||||
|
||||
with open(filepath, "rb") as fh:
|
||||
file_data = fh.read()
|
||||
|
||||
output: List[str] = []
|
||||
|
||||
# we don't really care what we are looking at, just scan entire file for
|
||||
# function calls.
|
||||
for check, shared_check_data in zip(check_ids, shared_check_data_foreach_check):
|
||||
cls = check_class_from_id(check)
|
||||
output.extend(cls.check_source(filepath, file_data, tu, shared_check_data))
|
||||
|
||||
if not output:
|
||||
return ""
|
||||
return "\n".join(output)
|
||||
|
||||
|
||||
def check_source_file_for_imap(args: Tuple[str, Sequence[str], Sequence[str], Sequence[Any]]) -> str:
|
||||
return check_source_file(*args)
|
||||
|
||||
|
||||
def source_info_filter(
|
||||
source_info: List[Tuple[str, List[str], List[str]]],
|
||||
regex_list: Sequence[re.Pattern[str]],
|
||||
) -> List[Tuple[str, List[str], List[str]]]:
|
||||
source_dir = project_source_info.SOURCE_DIR
|
||||
if not source_dir.endswith(os.sep):
|
||||
source_dir += os.sep
|
||||
source_info_result = []
|
||||
for item in source_info:
|
||||
filepath_source = item[0]
|
||||
if filepath_source.startswith(source_dir):
|
||||
filepath_source_relative = filepath_source[len(source_dir):]
|
||||
if filepath_source_relative in CHECKER_EXCLUDE_SOURCE_FILES:
|
||||
CHECKER_EXCLUDE_SOURCE_FILES.remove(filepath_source_relative)
|
||||
continue
|
||||
if filepath_source_relative.startswith("intern" + os.sep + "ghost"):
|
||||
pass
|
||||
elif filepath_source_relative.startswith("source" + os.sep):
|
||||
pass
|
||||
else:
|
||||
continue
|
||||
|
||||
has_match = False
|
||||
for regex in regex_list:
|
||||
if regex.match(filepath_source_relative) is not None:
|
||||
has_match = True
|
||||
if not has_match:
|
||||
continue
|
||||
|
||||
source_info_result.append(item)
|
||||
|
||||
if CHECKER_EXCLUDE_SOURCE_FILES:
|
||||
sys.stderr.write(
|
||||
"Error: exclude file(s) are missing: {!r}\n".format((list(sorted(CHECKER_EXCLUDE_SOURCE_FILES))))
|
||||
)
|
||||
sys.exit(1)
|
||||
|
||||
return source_info_result
|
||||
|
||||
|
||||
def run_checks_on_project(
|
||||
check_ids: Sequence[str],
|
||||
regex_list: Sequence[re.Pattern[str]],
|
||||
) -> None:
|
||||
source_info = project_source_info.build_info(ignore_prefix_list=CHECKER_IGNORE_PREFIX)
|
||||
source_defines = project_source_info.build_defines_as_args()
|
||||
|
||||
# Apply exclusion.
|
||||
source_info = source_info_filter(source_info, regex_list)
|
||||
|
||||
shared_check_data_foreach_check = [
|
||||
check_class_from_id(check).setup() for check in check_ids
|
||||
]
|
||||
|
||||
all_args = []
|
||||
index = 0
|
||||
for filepath_source, inc_dirs, defs in source_info[index:]:
|
||||
args = (
|
||||
[("-I" + i) for i in inc_dirs] +
|
||||
[("-D" + d) for d in defs] +
|
||||
source_defines
|
||||
)
|
||||
|
||||
all_args.append((filepath_source, args, check_ids, shared_check_data_foreach_check))
|
||||
|
||||
import multiprocessing
|
||||
|
||||
if USE_MULTIPROCESS:
|
||||
job_total = multiprocessing.cpu_count() + 1
|
||||
with multiprocessing.Pool(processes=job_total) as pool:
|
||||
# No `istarmap`, use an intermediate function.
|
||||
for result in pool.imap(check_source_file_for_imap, all_args):
|
||||
if result:
|
||||
print(result)
|
||||
else:
|
||||
for (filepath_source, args, _check_ids, shared_check_data_foreach_check) in all_args:
|
||||
result = check_source_file(filepath_source, args, check_ids, shared_check_data_foreach_check)
|
||||
if result:
|
||||
print(result)
|
||||
|
||||
for (check, shared_check_data) in zip(check_ids, shared_check_data_foreach_check):
|
||||
check_class_from_id(check).teardown(shared_check_data)
|
||||
|
||||
|
||||
def create_parser(checkers_all: Sequence[str]) -> argparse.ArgumentParser:
|
||||
from textwrap import indent
|
||||
|
||||
# Create doc-string for checks.
|
||||
checks_all_docs = []
|
||||
for checker in checkers_all:
|
||||
# `%` -> `%%` is needed for `--help` not to interpret these as formatting arguments.
|
||||
checks_all_docs.append(
|
||||
" %s\n%s" % (
|
||||
checker,
|
||||
indent(check_docstring_from_id(checker).replace("%", "%%"), ' '),
|
||||
)
|
||||
)
|
||||
|
||||
parser = argparse.ArgumentParser(
|
||||
description=__doc__,
|
||||
formatter_class=argparse.RawTextHelpFormatter,
|
||||
)
|
||||
parser.add_argument(
|
||||
"--match",
|
||||
nargs='+',
|
||||
required=True,
|
||||
metavar="REGEX",
|
||||
help="Match file paths against this expression",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--checks",
|
||||
dest="checks",
|
||||
help=(
|
||||
"Specify the check presets to run.\n\n" +
|
||||
"\n".join(checks_all_docs) + "\n"
|
||||
"Multiple checkers may be passed at once (comma separated, no spaces)."),
|
||||
required=True,
|
||||
)
|
||||
|
||||
return parser
|
||||
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Main Function
|
||||
|
||||
def main() -> int:
|
||||
checkers_all = check_function_get_all()
|
||||
parser = create_parser(checkers_all)
|
||||
args = parser.parse_args()
|
||||
|
||||
regex_list = []
|
||||
|
||||
for expr in args.match:
|
||||
try:
|
||||
regex_list.append(re.compile(expr))
|
||||
except Exception as ex:
|
||||
print("Error in expression: \"{:s}\"\n {!r}".format(expr, ex))
|
||||
return 1
|
||||
|
||||
run_checks_on_project(args.checks.split(','), regex_list)
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
Loading…
Reference in New Issue
Block a user