Cleanup: use type checking for credits_git_gen.py & git_log.py

This commit is contained in:
Campbell Barton 2023-08-15 15:18:01 +10:00
parent 31c025c856
commit 8c5c5142d5
3 changed files with 80 additions and 54 deletions

@ -53,11 +53,9 @@ PATHS_EXCLUDE = set(
"tools/utils/blender_keyconfig_export_permutations.py", "tools/utils/blender_keyconfig_export_permutations.py",
"tools/utils/blender_merge_format_changes.py", "tools/utils/blender_merge_format_changes.py",
"tools/utils/blender_theme_as_c.py", "tools/utils/blender_theme_as_c.py",
"tools/utils/credits_git_gen.py",
"tools/utils/cycles_commits_sync.py", "tools/utils/cycles_commits_sync.py",
"tools/utils/cycles_timeit.py", "tools/utils/cycles_timeit.py",
"tools/utils/gdb_struct_repr_c99.py", "tools/utils/gdb_struct_repr_c99.py",
"tools/utils/git_log.py",
"tools/utils/git_log_review_commits.py", "tools/utils/git_log_review_commits.py",
"tools/utils/git_log_review_commits_advanced.py", "tools/utils/git_log_review_commits_advanced.py",
"tools/utils/gitea_inactive_developers.py", "tools/utils/gitea_inactive_developers.py",

@ -9,10 +9,22 @@ Example use:
credits_git_gen.py --source=/src/blender --range=SHA1..HEAD credits_git_gen.py --source=/src/blender --range=SHA1..HEAD
""" """
from git_log import GitCommitIter import argparse
import re
import multiprocessing import multiprocessing
import re
import unicodedata
from git_log import (
GitCommitIter,
GitCommit,
)
from typing import (
Dict,
Tuple,
Iterable,
List,
)
# ----------------------------------------------------------------------------- # -----------------------------------------------------------------------------
@ -100,7 +112,7 @@ author_table = {
# Fully overwrite authors gathered from git commit info. # Fully overwrite authors gathered from git commit info.
# Intended usage: Correction of info stored in git commit itself. # Intended usage: Correction of info stored in git commit itself.
# Note that the names of the authors here are assumed fully valid and usable as-is. # Note that the names of the authors here are assumed fully valid and usable as-is.
commit_authors_overwrite = { commit_authors_overwrite: Dict[bytes, Tuple[str, str]] = {
# Format: {full_git_hash: (tuple, of, authors),}. # Format: {full_git_hash: (tuple, of, authors),}.
# Example: # Example:
# b"a60c1e5bb814078411ce105b7cf347afac6f2afd": ("Blender Foundation", "Suzanne", "Ton"), # b"a60c1e5bb814078411ce105b7cf347afac6f2afd": ("Blender Foundation", "Suzanne", "Ton"),
@ -110,7 +122,7 @@ commit_authors_overwrite = {
# ----------------------------------------------------------------------------- # -----------------------------------------------------------------------------
# Multi-Processing # Multi-Processing
def process_commits_for_map(commits): def process_commits_for_map(commits: Iterable[GitCommit]) -> "Credits":
result = Credits() result = Credits()
for c in commits: for c in commits:
result.process_commit(c) result.process_commit(c)
@ -128,8 +140,10 @@ class CreditUser:
"year_max", "year_max",
) )
def __init__(self): def __init__(self) -> None:
self.commit_total = 0 self.commit_total = 0
self.year_min = 0
self.year_max = 0
class Credits: class Credits:
@ -143,15 +157,14 @@ class Credits:
# `Co-authored-by: Blender Foundation <Suzanne>` # `Co-authored-by: Blender Foundation <Suzanne>`
GIT_COMMIT_COAUTHORS_RE = re.compile(r"^Co-authored-by:[ \t]*(?P<author>[ \w\t]*\w)(?:$|[ \t]*<)", re.MULTILINE) GIT_COMMIT_COAUTHORS_RE = re.compile(r"^Co-authored-by:[ \t]*(?P<author>[ \w\t]*\w)(?:$|[ \t]*<)", re.MULTILINE)
def __init__(self): def __init__(self) -> None:
self.users = {} self.users: Dict[str, CreditUser] = {}
@classmethod @classmethod
def commit_authors_get(cls, c): def commit_authors_get(cls, c: GitCommit) -> List[str]:
authors = commit_authors_overwrite.get(c.sha1, None) if (authors_overwrite := commit_authors_overwrite.get(c.sha1, None)) is not None:
if authors is not None:
# Ignore git commit info for these having an entry in commit_authors_overwrite. # Ignore git commit info for these having an entry in commit_authors_overwrite.
return [author_table.get(author, author) for author in authors] return [author_table.get(author, author) for author in authors_overwrite]
authors = [c.author] + cls.GIT_COMMIT_COAUTHORS_RE.findall(c.body) authors = [c.author] + cls.GIT_COMMIT_COAUTHORS_RE.findall(c.body)
# Normalize author string into canonical form, prevents duplicate credit users # Normalize author string into canonical form, prevents duplicate credit users
@ -159,7 +172,7 @@ class Credits:
return [author_table.get(author, author) for author in authors] return [author_table.get(author, author) for author in authors]
@classmethod @classmethod
def is_credit_commit_valid(cls, c): def is_credit_commit_valid(cls, c: GitCommit) -> bool:
ignore_dir = ( ignore_dir = (
b"blender/extern/", b"blender/extern/",
b"blender/intern/opennl/", b"blender/intern/opennl/",
@ -170,7 +183,7 @@ class Credits:
return True return True
def merge(self, other): def merge(self, other: "Credits") -> None:
""" """
Merge other Credits into this, clearing the other. Merge other Credits into this, clearing the other.
""" """
@ -185,7 +198,7 @@ class Credits:
user.year_max = max(user.year_max, user_other.year_max) user.year_max = max(user.year_max, user_other.year_max)
other.users.clear() other.users.clear()
def process_commit(self, c): def process_commit(self, c: GitCommit) -> None:
if not self.is_credit_commit_valid(c): if not self.is_credit_commit_valid(c):
return return
@ -202,7 +215,7 @@ class Credits:
cu.year_min = min(cu.year_min, year) cu.year_min = min(cu.year_min, year)
cu.year_max = max(cu.year_max, year) cu.year_max = max(cu.year_max, year)
def _process_multiprocessing(self, commit_iter, *, jobs): def _process_multiprocessing(self, commit_iter: Iterable[GitCommit], *, jobs: int) -> None:
print("Collecting commits...") print("Collecting commits...")
# NOTE(@ideasman42): that the chunk size doesn't have as much impact on # NOTE(@ideasman42): that the chunk size doesn't have as much impact on
# performance as you might expect, values between 16 and 1024 seem reasonable. # performance as you might expect, values between 16 and 1024 seem reasonable.
@ -226,7 +239,7 @@ class Credits:
print("{:d} of {:d}".format(i, len(chunk_list))) print("{:d} of {:d}".format(i, len(chunk_list)))
self.merge(result) self.merge(result)
def process(self, commit_iter, *, jobs): def process(self, commit_iter: Iterable[GitCommit], *, jobs: int) -> None:
if jobs > 1: if jobs > 1:
self._process_multiprocessing(commit_iter, jobs=jobs) self._process_multiprocessing(commit_iter, jobs=jobs)
return return
@ -237,10 +250,13 @@ class Credits:
if not (i % 100): if not (i % 100):
print(i) print(i)
def write(self, filepath, def write(
is_main_credits=True, self,
contrib_companies=(), filepath: str,
sort="name"): is_main_credits: bool = True,
contrib_companies: Tuple[str, ...] = (),
sort: str = "name",
) -> None:
# patch_word = "patch", "patches" # patch_word = "patch", "patches"
commit_word = "commit", "commits" commit_word = "commit", "commits"
@ -280,8 +296,7 @@ class Credits:
)) ))
def argparse_create(): def argparse_create() -> argparse.ArgumentParser:
import argparse
# When --help or no args are given, print this help # When --help or no args are given, print this help
usage_text = "Review revisions." usage_text = "Review revisions."
@ -325,7 +340,7 @@ def argparse_create():
return parser return parser
def main(): def main() -> None:
# ---------- # ----------
# Parse Args # Parse Args

@ -6,6 +6,14 @@
import os import os
import subprocess import subprocess
import datetime
from typing import (
List,
Union,
Optional,
Tuple,
)
class GitCommit: class GitCommit:
@ -24,20 +32,19 @@ class GitCommit:
"_diff", "_diff",
) )
def __init__(self, sha1, git_dir): def __init__(self, sha1: bytes, git_dir: str):
self.sha1 = sha1 self.sha1 = sha1
self._git_dir = git_dir self._git_dir = git_dir
self._author = \ self._author: Optional[str] = None
self._email = \ self._email: Optional[str] = None
self._date = \ self._date: Optional[datetime.datetime] = None
self._body = \ self._body: Optional[str] = None
self._files = \ self._files: Optional[List[bytes]] = None
self._files_status = \ self._files_status: Optional[List[List[bytes]]] = None
self._diff = \ self._diff: Optional[str] = None
None
def cache(self): def cache(self) -> None:
""" """
Cache all properties Cache all properties
(except for diff as it's significantly larger than other members). (except for diff as it's significantly larger than other members).
@ -49,9 +56,9 @@ class GitCommit:
self.files self.files
self.files_status self.files_status
def _log_format(self, format, args=()): def _log_format(self, format: str, args: Tuple[Union[str, bytes], ...] = ()) -> bytes:
# sha1 = self.sha1.decode('ascii') # sha1 = self.sha1.decode('ascii')
cmd = ( cmd: Tuple[Union[str, bytes], ...] = (
"git", "git",
"--git-dir", "--git-dir",
self._git_dir, self._git_dir,
@ -59,17 +66,20 @@ class GitCommit:
"-1", # only this rev "-1", # only this rev
self.sha1, self.sha1,
"--format=" + format, "--format=" + format,
) + args *args,
)
# print(" ".join(cmd)) # print(" ".join(cmd))
with subprocess.Popen( with subprocess.Popen(
cmd, cmd,
stdout=subprocess.PIPE, stdout=subprocess.PIPE,
) as p: ) as p:
assert p is not None and p.stdout is not None
return p.stdout.read() return p.stdout.read()
@property @property
def sha1_short(self): def sha1_short(self) -> str:
cmd = ( cmd = (
"git", "git",
"--git-dir", "--git-dir",
@ -82,10 +92,11 @@ class GitCommit:
cmd, cmd,
stdout=subprocess.PIPE, stdout=subprocess.PIPE,
) as p: ) as p:
assert p is not None and p.stdout is not None
return p.stdout.read().strip().decode('ascii') return p.stdout.read().strip().decode('ascii')
@property @property
def author(self): def author(self) -> str:
ret = self._author ret = self._author
if ret is None: if ret is None:
content = self._log_format("%an")[:-1] content = self._log_format("%an")[:-1]
@ -94,7 +105,7 @@ class GitCommit:
return ret return ret
@property @property
def email(self): def email(self) -> str:
ret = self._email ret = self._email
if ret is None: if ret is None:
content = self._log_format("%ae")[:-1] content = self._log_format("%ae")[:-1]
@ -103,7 +114,7 @@ class GitCommit:
return ret return ret
@property @property
def date(self): def date(self) -> datetime.datetime:
ret = self._date ret = self._date
if ret is None: if ret is None:
import datetime import datetime
@ -112,7 +123,7 @@ class GitCommit:
return ret return ret
@property @property
def body(self): def body(self) -> str:
ret = self._body ret = self._body
if ret is None: if ret is None:
content = self._log_format("%B")[:-1] content = self._log_format("%B")[:-1]
@ -121,11 +132,11 @@ class GitCommit:
return ret return ret
@property @property
def subject(self): def subject(self) -> str:
return self.body.lstrip().partition("\n")[0] return self.body.lstrip().partition("\n")[0]
@property @property
def files(self): def files(self) -> List[bytes]:
ret = self._files ret = self._files
if ret is None: if ret is None:
ret = [f for f in self._log_format("format:", args=("--name-only",)).split(b"\n") if f] ret = [f for f in self._log_format("format:", args=("--name-only",)).split(b"\n") if f]
@ -133,7 +144,7 @@ class GitCommit:
return ret return ret
@property @property
def files_status(self): def files_status(self) -> List[List[bytes]]:
ret = self._files_status ret = self._files_status
if ret is None: if ret is None:
ret = [f.split(None, 1) for f in self._log_format("format:", args=("--name-status",)).split(b"\n") if f] ret = [f.split(None, 1) for f in self._log_format("format:", args=("--name-status",)).split(b"\n") if f]
@ -141,7 +152,7 @@ class GitCommit:
return ret return ret
@property @property
def diff(self): def diff(self) -> str:
ret = self._diff ret = self._diff
if ret is None: if ret is None:
content = self._log_format("", args=("-p",)) content = self._log_format("", args=("-p",))
@ -158,13 +169,13 @@ class GitCommitIter:
"_process", "_process",
) )
def __init__(self, path, sha1_range): def __init__(self, path: str, sha1_range: str):
self._path = path self._path = path
self._git_dir = os.path.join(path, ".git") self._git_dir = os.path.join(path, ".git")
self._sha1_range = sha1_range self._sha1_range = sha1_range
self._process = None self._process: Optional[subprocess.Popen[bytes]] = None
def __iter__(self): def __iter__(self) -> "GitCommitIter":
cmd = ( cmd = (
"git", "git",
"--git-dir", "--git-dir",
@ -181,7 +192,8 @@ class GitCommitIter:
) )
return self return self
def __next__(self): def __next__(self) -> GitCommit:
assert self._process is not None and self._process.stdout is not None
sha1 = self._process.stdout.readline()[:-1] sha1 = self._process.stdout.readline()[:-1]
if sha1: if sha1:
return GitCommit(sha1, self._git_dir) return GitCommit(sha1, self._git_dir)
@ -195,12 +207,12 @@ class GitRepo:
"_git_dir", "_git_dir",
) )
def __init__(self, path): def __init__(self, path: str):
self._path = path self._path = path
self._git_dir = os.path.join(path, ".git") self._git_dir = os.path.join(path, ".git")
@property @property
def branch(self): def branch(self) -> bytes:
cmd = ( cmd = (
"git", "git",
"--git-dir", "--git-dir",
@ -215,4 +227,5 @@ class GitRepo:
cmd, cmd,
stdout=subprocess.PIPE, stdout=subprocess.PIPE,
) )
assert p is not None and p.stdout is not None
return p.stdout.read() return p.stdout.read()