8974ca8f24
Previously this was the double the CPU count because: - Modern CPU's from AMD & Intel support SMT/hyper-threading which present twice as many cores, doubling again has little to no benefit. - Using 2x or 4x the number of physical cores number can use a lot of memory on systems with many cores which are becoming more common.
412 lines
13 KiB
Python
Executable File
412 lines
13 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
# SPDX-FileCopyrightText: 2023 Blender Authors
|
|
#
|
|
# SPDX-License-Identifier: GPL-2.0-or-later
|
|
|
|
"""
|
|
Example use to generate all authors:
|
|
|
|
authors_git_gen.py
|
|
|
|
Example use a custom range:
|
|
|
|
authors_git_gen.py --source=/src/blender --range=SHA1..HEAD
|
|
"""
|
|
|
|
# NOTE: this shares the basic structure with `credits_git_gen.py`,
|
|
# however details differ enough for them to be separate scripts.
|
|
# Improvements to this script may apply there too.
|
|
|
|
import argparse
|
|
import io
|
|
import multiprocessing
|
|
import os
|
|
import sys
|
|
import unicodedata
|
|
|
|
from typing import (
|
|
Dict,
|
|
Iterable,
|
|
List,
|
|
)
|
|
|
|
from git_log import (
|
|
GitCommitIter,
|
|
GitCommit,
|
|
)
|
|
|
|
import git_data_canonical_authors
|
|
import git_data_sha1_override_authors
|
|
|
|
IS_ATTY = sys.stdout.isatty()
|
|
|
|
BASE_DIR = os.path.abspath(os.path.dirname(__file__))
|
|
|
|
SOURCE_DIR = os.path.normpath(os.path.join(BASE_DIR, "..", ".."))
|
|
|
|
# Exclude authors with this many lines modified.
|
|
AUTHOR_LINES_SKIP = 4
|
|
|
|
# Stop counting after this line limit.
|
|
AUTHOR_LINES_LIMIT = 100
|
|
|
|
|
|
# -----------------------------------------------------------------------------
|
|
# Lookup Table to clean up the authors
|
|
#
|
|
# This is a combination of unifying git logs as well as
|
|
# name change requested by the authors.
|
|
|
|
# Some projects prefer not to have their developers listed.
|
|
author_exclude_individuals = {
|
|
"Jason Fielder <jason-fielder@noreply.localhost>", # `@apple.com` developer.
|
|
"Michael B Johnson <wave@noreply.localhost>", # `@apple.com` developer.
|
|
}
|
|
|
|
author_exclude_glob = (
|
|
"* <*@apple.com>",
|
|
)
|
|
|
|
author_table = git_data_canonical_authors.canonical_author_map()
|
|
author_override_table = git_data_sha1_override_authors.sha1_authors_map()
|
|
|
|
# -----------------------------------------------------------------------------
|
|
# Multi-Processing
|
|
|
|
|
|
def process_commits_for_map(commits: Iterable[GitCommit]) -> "Credits":
|
|
result = Credits()
|
|
for c in commits:
|
|
result.process_commit(c)
|
|
return result
|
|
|
|
|
|
# -----------------------------------------------------------------------------
|
|
# Progress Display
|
|
|
|
def value_as_percentage(value_partial: int, value_final: int) -> str:
|
|
percent = 0.0 if (value_final == 0) else (value_partial / value_final)
|
|
return "{:-6.2f}%".format(percent * 100)
|
|
|
|
|
|
if IS_ATTY:
|
|
def progress_output(value_partial: int, value_final: int, info: str) -> None:
|
|
sys.stdout.write("\r\033[K[{:s}]: {:s}".format(value_as_percentage(value_partial, value_final), info))
|
|
else:
|
|
def progress_output(value_partial: int, value_final: int, info: str) -> None:
|
|
sys.stdout.write("[{:s}]: {:s}\n".format(value_as_percentage(value_partial, value_final), info))
|
|
|
|
|
|
# -----------------------------------------------------------------------------
|
|
# Class for generating authors
|
|
|
|
class CreditUser:
|
|
__slots__ = (
|
|
"commit_total",
|
|
"lines_change",
|
|
)
|
|
|
|
def __init__(self) -> None:
|
|
self.commit_total = 0
|
|
self.lines_change = -1
|
|
|
|
|
|
class Credits:
|
|
__slots__ = (
|
|
"users",
|
|
# Use for progress, simply the number of times `process_commit` has been called.
|
|
"process_commits_count",
|
|
)
|
|
|
|
def __init__(self) -> None:
|
|
self.users: Dict[str, CreditUser] = {}
|
|
self.process_commits_count = 0
|
|
|
|
@classmethod
|
|
def commit_authors_get(cls, c: GitCommit) -> List[str]:
|
|
if (authors_overwrite := author_override_table.get(c.sha1, None)) is not None:
|
|
# Ignore git commit info for these having an entry in `author_override_table`.
|
|
return [author_table.get(author, author) for author in authors_overwrite]
|
|
|
|
authors = [c.author] + c.co_authors
|
|
# Normalize author string into canonical form, prevents duplicate credit users
|
|
authors = [unicodedata.normalize('NFC', author) for author in authors]
|
|
return [author_table.get(author, author) for author in authors]
|
|
|
|
@classmethod
|
|
def is_credit_commit_valid(cls, c: GitCommit) -> bool:
|
|
ignore_dir = (
|
|
b"blender/extern/",
|
|
b"blender/intern/opennl/",
|
|
# Will have own authors file.
|
|
b"intern/cycles/",
|
|
# Will have own authors file.
|
|
b"intern/libmv/libmv/"
|
|
)
|
|
|
|
if not any(f for f in c.files if not f.startswith(ignore_dir)):
|
|
return False
|
|
|
|
return True
|
|
|
|
def merge(self, other: "Credits") -> None:
|
|
"""
|
|
Merge other Credits into this, clearing the other.
|
|
"""
|
|
for user_key, user_other in other.users.items():
|
|
user = self.users.get(user_key)
|
|
if user is None:
|
|
# Consume the user.
|
|
self.users[user_key] = user_other
|
|
else:
|
|
user.commit_total += user_other.commit_total
|
|
user.lines_change += user_other.lines_change
|
|
other.users.clear()
|
|
|
|
def process_commit(self, c: GitCommit) -> None:
|
|
self.process_commits_count += 1
|
|
|
|
if not self.is_credit_commit_valid(c):
|
|
return
|
|
|
|
lines_change = -1
|
|
|
|
authors = self.commit_authors_get(c)
|
|
for author in authors:
|
|
cu = self.users.get(author)
|
|
if cu is None:
|
|
cu = self.users[author] = CreditUser()
|
|
|
|
cu.commit_total += 1
|
|
|
|
lines_change_for_author = cu.lines_change
|
|
if lines_change_for_author < AUTHOR_LINES_LIMIT:
|
|
if lines_change_for_author == -1:
|
|
cu.lines_change = lines_change_for_author = 0
|
|
|
|
if lines_change == -1:
|
|
diff = c.diff
|
|
lines_change = diff.count("\n-") + diff.count("\n+")
|
|
|
|
cu.lines_change += lines_change
|
|
|
|
def _process_multiprocessing(self, commit_iter: Iterable[GitCommit], *, jobs: int) -> None:
|
|
print("Collecting commits...")
|
|
# NOTE(@ideasman42): that the chunk size doesn't have as much impact on
|
|
# performance as you might expect, values between 16 and 1024 seem reasonable.
|
|
# Although higher values tend to bottleneck as the process finishes.
|
|
chunk_size = 256
|
|
chunk_list = []
|
|
chunk = []
|
|
for i, c in enumerate(commit_iter):
|
|
chunk.append(c)
|
|
if len(chunk) >= chunk_size:
|
|
chunk_list.append(chunk)
|
|
chunk = []
|
|
if chunk:
|
|
chunk_list.append(chunk)
|
|
|
|
commit_count_total = (max(len(chunk_list) - 1, 0) * chunk_size) + len(chunk)
|
|
|
|
print("Found {:,d} commits, processing...".format(commit_count_total))
|
|
commit_count_handled = 0
|
|
with multiprocessing.Pool(processes=jobs) as pool:
|
|
for result in pool.imap_unordered(process_commits_for_map, chunk_list):
|
|
commit_count_handled += result.process_commits_count
|
|
progress_output(
|
|
commit_count_handled,
|
|
commit_count_total,
|
|
"{:,d} / {:,d} commits".format(commit_count_handled, commit_count_total),
|
|
)
|
|
self.merge(result)
|
|
|
|
if IS_ATTY:
|
|
print("") # Was printing on one-line, move to next.
|
|
|
|
def process(self, commit_iter: Iterable[GitCommit], *, jobs: int) -> None:
|
|
if jobs > 1:
|
|
self._process_multiprocessing(commit_iter, jobs=jobs)
|
|
return
|
|
|
|
# Simple single process operation.
|
|
for i, c in enumerate(commit_iter):
|
|
self.process_commit(c)
|
|
if not (i % 100):
|
|
print(i)
|
|
|
|
def write_object(
|
|
self,
|
|
fh: io.TextIOWrapper,
|
|
*,
|
|
use_metadata: bool = False,
|
|
) -> None:
|
|
import fnmatch
|
|
import re
|
|
|
|
commit_word = "commit", "commits"
|
|
metadata_right_margin = 79
|
|
|
|
author_exclude_regex = tuple(
|
|
(match_glob, re.compile(fnmatch.translate(match_glob)))
|
|
for match_glob in author_exclude_glob
|
|
)
|
|
|
|
sorted_authors = dict(sorted(self.users.items()))
|
|
for author_with_email, cu in sorted_authors.items():
|
|
if author_with_email.endswith(" <>"):
|
|
print("Skipping:", author_with_email, "(no email)")
|
|
continue
|
|
if cu.lines_change <= AUTHOR_LINES_SKIP:
|
|
print("Skipping:", author_with_email, cu.lines_change, "line(s) changed.")
|
|
continue
|
|
if author_with_email in author_exclude_individuals:
|
|
print("Skipping:", author_with_email, "explicit exclusion requested.")
|
|
continue
|
|
if author_with_email in author_exclude_individuals:
|
|
print("Skipping:", author_with_email, "explicit exclusion requested.")
|
|
continue
|
|
if match_glob_found := next(iter([
|
|
match_glob for match_glob, match_regex in author_exclude_regex
|
|
if match_regex.match(author_with_email)
|
|
]), None):
|
|
print("Skipping:", author_with_email, "glob exclusion \"{:s}\" requested.".format(match_glob_found))
|
|
continue
|
|
|
|
if use_metadata:
|
|
fh.write("{:s} {:s}# lines={:,d} ({:s}), {:,d} {:s}\n".format(
|
|
author_with_email,
|
|
(" " * max(1, metadata_right_margin - len(author_with_email))),
|
|
min(cu.lines_change, AUTHOR_LINES_LIMIT),
|
|
"" if cu.lines_change >= AUTHOR_LINES_LIMIT else "<SKIP?>",
|
|
cu.commit_total,
|
|
commit_word[cu.commit_total > 1],
|
|
))
|
|
else:
|
|
fh.write("{:s}\n".format(author_with_email))
|
|
|
|
def write(
|
|
self,
|
|
filepath: str,
|
|
*,
|
|
use_metadata: bool = False,
|
|
) -> None:
|
|
with open(filepath, 'w', encoding="utf8", errors='xmlcharrefreplace') as fh:
|
|
self.write_object(fh, use_metadata=use_metadata)
|
|
|
|
|
|
def argparse_create() -> argparse.ArgumentParser:
|
|
|
|
# When --help or no args are given, print this help
|
|
usage_text = "List authors."
|
|
|
|
epilog = "This script is used to generate an AUTHORS file"
|
|
|
|
parser = argparse.ArgumentParser(description=usage_text, epilog=epilog)
|
|
|
|
parser.add_argument(
|
|
"--source",
|
|
dest="source_dir",
|
|
metavar='PATH',
|
|
required=False,
|
|
default=SOURCE_DIR,
|
|
help="Path to git repository",
|
|
)
|
|
parser.add_argument(
|
|
"--output",
|
|
dest="output_filepath",
|
|
metavar='PATH',
|
|
required=False,
|
|
default="",
|
|
help="File path to write the output to (when omitted, update the \"AUTHORS\" file)",
|
|
)
|
|
parser.add_argument(
|
|
"--range",
|
|
dest="range_sha1",
|
|
metavar='SHA1_RANGE',
|
|
required=False,
|
|
default="HEAD",
|
|
help="Range to use, eg: 169c95b8..HEAD or HEAD for all history.",
|
|
)
|
|
|
|
parser.add_argument(
|
|
"--jobs",
|
|
dest="jobs",
|
|
type=int,
|
|
default=0,
|
|
help=(
|
|
"The number of processes to use. "
|
|
"Defaults to zero which detects the available cores, 1 is single threaded (useful for debugging)."
|
|
),
|
|
required=False,
|
|
)
|
|
|
|
parser.add_argument(
|
|
"--use-metadata",
|
|
action='store_true',
|
|
dest="use_metadata",
|
|
default=False,
|
|
help="Add commits & changed lines as metadata after the author."
|
|
)
|
|
|
|
return parser
|
|
|
|
|
|
def main() -> None:
|
|
|
|
# ----------
|
|
# Parse Args
|
|
|
|
args = argparse_create().parse_args()
|
|
|
|
credits = Credits()
|
|
# commit_range = "HEAD~10..HEAD"
|
|
# commit_range = "blender-v2.81-release..blender-v2.82-release"
|
|
# commit_range = "blender-v2.82-release"
|
|
commit_range = args.range_sha1
|
|
jobs = args.jobs
|
|
if jobs <= 0:
|
|
# Clamp the value, higher values give errors with too many open files.
|
|
# Allow users to manually pass very high values in as they might want to tweak system limits themselves.
|
|
jobs = min(multiprocessing.cpu_count(), 400)
|
|
|
|
credits.process(GitCommitIter(args.source_dir, commit_range), jobs=jobs)
|
|
|
|
if args.output_filepath:
|
|
credits.write(args.output_filepath, use_metadata=args.use_metadata)
|
|
print("Written:", args.output_filepath)
|
|
return
|
|
|
|
filepath_authors = os.path.join(args.source_dir, "AUTHORS")
|
|
|
|
authors_text_io = io.StringIO()
|
|
credits.write_object(authors_text_io, use_metadata=args.use_metadata)
|
|
authors_text = authors_text_io.getvalue()
|
|
del authors_text_io
|
|
|
|
text_beg = "# BEGIN individuals section.\n"
|
|
text_end = "# Please DO NOT APPEND here. See comments at the top of the file.\n"
|
|
with open(filepath_authors, "r", encoding="utf-8") as fh:
|
|
authors_contents = fh.read()
|
|
|
|
text_beg_index = authors_contents.index(text_beg)
|
|
text_end_index = authors_contents.index(text_end)
|
|
if text_beg_index == -1:
|
|
print("Text: {!r} not found in {!r}".format(text_beg, filepath_authors))
|
|
sys.exit(1)
|
|
if text_end_index == -1:
|
|
print("Text: {!r} not found in {!r}".format(text_end, filepath_authors))
|
|
sys.exit(1)
|
|
|
|
text_beg_index += len(text_beg)
|
|
|
|
with open(filepath_authors, "w", encoding="utf-8") as fh:
|
|
fh.write(authors_contents[:text_beg_index])
|
|
fh.write(authors_text)
|
|
fh.write(authors_contents[text_end_index:])
|
|
|
|
print("Updated:", filepath_authors)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|