blender/tools/triage/gitea_utils.py
Jacques Lucke 3e814bc702 Tools: gitea utils does not load all pages
The issue is that the default `limit` seems to be 30, and not 50. Due to the
special case for `page == 1` and the `len(result_page) < limit` check, the
`url_json_get_all_pages` function may return before all pages have been loaded.

The fix is to simply remove the `limit` parameter. It wasn't used anyway.
Using it correctly is somewhat tricky if we can't pass in the limit in the
`page == 1` case. This may result in a couple more API requests but it's
probably not a problem in practice. If it becomes a problem, we should
figure out in which cases the `page == 1` special case is actually
required (it was not in the cases that I tested).

Note that the first link returns fewer results than the second link:
* https://projects.blender.org/api/v1/users/jacqueslucke/activities/feeds?only-performed-by=true&date=2024-04-22
* https://projects.blender.org/api/v1/users/jacqueslucke/activities/feeds?only-performed-by=true&date=2024-04-22&limit=50

Pull Request: https://projects.blender.org/blender/blender/pulls/120948
2024-04-28 01:06:51 +02:00

223 lines
7.4 KiB
Python

#!/usr/bin/env python3
# SPDX-FileCopyrightText: 2023 Blender Authors
#
# SPDX-License-Identifier: GPL-2.0-or-later
# Simple module for inspecting GITEA users, pulls and issues.
import datetime
import json
import urllib.error
import urllib.parse
import urllib.request
from typing import (
Any,
Dict,
List,
Optional,
Set,
Union,
)
BASE_API_URL = "https://projects.blender.org/api/v1"
def url_json_get(url: str) -> Optional[Union[Dict[str, Any], List[Dict[str, Any]]]]:
try:
# Make the HTTP request and store the response in a 'response' object
response = urllib.request.urlopen(url)
except urllib.error.URLError as ex:
print(url)
print("Error making HTTP request:", ex)
return None
# Convert the response content to a JSON object containing the user information.
result = json.loads(response.read())
assert result is None or isinstance(result, (dict, list))
return result
def url_json_get_all_pages(
url: str,
verbose: bool = False,
) -> List[Dict[str, Any]]:
result: List[Dict[str, Any]] = []
page = 1
while True:
if verbose:
print(f"Requesting page {page}", end="\r", flush=True)
if page == 1:
# XXX: In some cases, a bug prevents using the `page` and `limit` parameters if the page is 1
result_page = url_json_get(url)
else:
result_page = url_json_get(f"{url}&page={page}")
if not result_page:
break
assert isinstance(result_page, list)
result.extend(result_page)
if len(result_page) == 0:
break
page += 1
return result
def gitea_user_get(username: str) -> Dict[str, Any]:
"""
Get the user data as JSON from the user name. https://docs.gitea.com/api/next/#tag/user/operation/userGet
"""
url = f"{BASE_API_URL}/users/{username}"
result = url_json_get(url)
assert isinstance(result, dict)
return result
def gitea_json_issue_get(issue_fullname: str) -> Dict[str, Any]:
"""
Get issue/pull JSON data.
:param issue_fullname: string in the format "{owner}/{repo}/issues/{number}"
"""
url = f"{BASE_API_URL}/repos/{issue_fullname}"
result = url_json_get(url)
assert isinstance(result, dict)
return result
def gitea_json_activities_get(username: str, date: str) -> List[Dict[str, Any]]:
"""
List a user's activity feeds.
:param username: username of user.
:param date: the date of the activities to be found.
"""
activity_url = f"{BASE_API_URL}/users/{username}/activities/feeds?only-performed-by=true&date={date}"
result = url_json_get_all_pages(activity_url)
assert isinstance(result, list)
return result
def gitea_json_issues_search(
type: Optional[str] = None,
since: Optional[str] = None,
before: Optional[str] = None,
state: str = 'all',
labels: Optional[str] = None,
created: bool = False,
reviewed: bool = False,
access_token: Optional[str] = None,
verbose: bool = True,
) -> List[Dict[str, Any]]:
"""
Search for issues across the repositories that the user has access to.
:param type: filter by type (issues / pulls) if set.
:param since: Only show notifications updated after the given time. This is a timestamp in RFC 3339 format.
:param before: Only show notifications updated before the given time. This is a timestamp in RFC 3339 format.
:param state: whether issue is open or closed.
:param labels: comma separated list of labels.
Fetch only issues that have any of this labels. Non existent labels are discarded.
:param created: filter (issues / pulls) created by you, default is false.
:param reviewed: filter pulls reviewed by you, default is false.
:param access_token: token generated by the GITEA API.
:return: List of issues or pulls.
"""
query_params = {k: v for k, v in locals().items() if v and k not in {"verbose"}}
for k, v in query_params.items():
if v is True:
query_params[k] = "true"
elif v is False:
query_params[k] = "false"
if verbose:
print("# Searching for {} #".format(
query_params["type"] if "type" in query_params else "issues and pulls"))
print("Query params:", {
k: v for k, v in query_params.items() if k not in {"type", "access_token"}})
base_url = f"{BASE_API_URL}/repos/issues/search"
encoded_query_params = urllib.parse.urlencode(query_params)
issues_url = f"{base_url}?{encoded_query_params}"
issues = url_json_get_all_pages(issues_url, verbose=verbose)
if verbose:
print(f"Total: {len(issues)} ", end="\n\n", flush=True)
return issues
def gitea_json_issue_events_filter(
issue_fullname: str,
date_start: Optional[datetime.datetime] = None,
date_end: Optional[datetime.datetime] = None,
username: Optional[str] = None,
labels: Optional[Set[str]] = None,
event_type: Set[str] = set(),
) -> List[Dict[str, Any]]:
"""
Filter all comments and events on the issue list.
:param issue_fullname: string in the format "{owner}/{repo}/issues/{number}"
:param date_start: if provided, only comments updated since the specified time are returned.
:param date_end: if provided, only comments updated before the provided time are returned.
:param labels: list of labels. Fetch only events that have any of this labels.
:param event_type: set of types of events in {"close", "commit_ref"...}.
:return: List of comments or events.
"""
issue_events_url = f"{BASE_API_URL}/repos/{issue_fullname}/timeline"
if date_start or date_end:
query_params = {}
if date_start:
query_params["since"] = f"{date_start.isoformat()}Z"
if date_end:
query_params["before"] = f"{date_end.isoformat()}Z"
encoded_query_params = urllib.parse.urlencode(query_params)
issue_events_url = f"{issue_events_url}?{encoded_query_params}"
result = []
for event in url_json_get_all_pages(issue_events_url):
if not event:
continue
if not event["user"] or event["user"]["username"] != username:
continue
if labels and event["type"] == "label" and event["label"]["name"] in labels:
pass
elif event["type"] in event_type:
pass
else:
continue
result.append(event)
return result
# WORKAROUND: This function doesn't involve GITEA, and the obtained username may not match the username used in GITEA.
# However, it provides an option to fetch the configured username from the local Git,
# in case the user does not explicitly supply the username.
def git_username_detect() -> Optional[str]:
import os
import subprocess
# Get the repository directory
repo_dir = os.path.abspath(os.path.normpath(os.path.join(os.path.dirname(__file__), "..", "..")))
# Attempt to get the configured username from the local Git
try:
result = subprocess.run(["git", "config", "user.username"], stdout=subprocess.PIPE, cwd=repo_dir)
result.check_returncode() # Check if the command was executed successfully
username = result.stdout.decode().rstrip()
return username
except subprocess.CalledProcessError as ex:
# Handle errors if the git config command fails
print(f"Error fetching Git username: {ex}")
return None