# utils_gitea.py import json import time import urllib.error import urllib.parse import urllib.request from concurrent.futures import ThreadPoolExecutor, as_completed BASE_API_URL = "https://projects.blender.org/api/v1" CRAWL_DELAY = 2 last_request_time = None # Conform to Blenders crawl delay request: # https://projects.blender.org/robots.txt try: projects = urllib.robotparser.RobotFileParser( url="https://projects.blender.org/robots.txt") projects.read() projects_crawl_delay = projects.crawl_delay("*") if projects_crawl_delay is not None: CRAWL_DELAY = projects_crawl_delay except: pass def wait_for_crawl_delay() -> None: global last_request_time # Respect the crawl delay if last_request_time is not None: time.sleep(max(CRAWL_DELAY - (time.time() - last_request_time), 0)) last_request_time = time.time() def url_json_get(url, data=None): try: if data: data = json.dumps(data).encode('utf-8') request = urllib.request.Request(url, data=data, method='POST') request.add_header('Content-Type', 'application/json') else: request = urllib.request.Request(url) wait_for_crawl_delay() response = urllib.request.urlopen(request) response_data = json.loads(response.read()) return response_data except urllib.error.URLError as ex: print("Error making HTTP request:", ex) return None def url_json_get_all_pages(url, item_filter=None, limit=50, exclude=set(), verbose=False): assert limit <= 50, "50 is the maximum limit of items per page" url_for_page = f"{url}&limit={limit}&page=" with urllib.request.urlopen(url_for_page + '1') as response: headers_first = response.info() json_data_first = json.loads(response.read()) total_count = int(headers_first.get('X-Total-Count')) total_pages = (total_count + limit - 1) // limit def fetch_page(page): if page == 1: json_data = json_data_first else: json_data = url_json_get(url_for_page + str(page)) if verbose: print(f"Fetched page {page}") data = [] for item in json_data: if exclude and int(item["number"]) in exclude: continue data.append({k: item[k] for k in item_filter} if item_filter else item) return data with ThreadPoolExecutor() as executor: futures = [executor.submit(fetch_page, page) for page in range(1, total_pages + 1)] all_results = [future.result() for future in as_completed(futures)] return [item for sublist in all_results for item in sublist] def gitea_json_issue_get(owner, repo, number): """ Get issue/pull JSON data. """ url = f"{BASE_API_URL}/repos/{owner}/{repo}/issues/{number}" return url_json_get(url) def gitea_fetch_issues(owner, repo, state='all', labels='', issue_attr_filter=None, since=None, exclude=set()): query_params = { 'labels': labels, 'state': state, 'type': 'issues'} if since: query_params['since'] = since base_url = f"{BASE_API_URL}/repos/{owner}/{repo}/issues" encoded_query_params = urllib.parse.urlencode(query_params) issues_url = f"{base_url}?{encoded_query_params}" return url_json_get_all_pages(issues_url, item_filter=issue_attr_filter, exclude=exclude, verbose=True) def gitea_issues_body_updated_at_get(issues, verbose=True): def fetch_issue(issue): number = issue['number'] if verbose: print(f"Fetched issue #{number}") # Changes to the issue description are not reflected in `issue['updated_at']`. # To retrieve the actual update datetime for the issue body, we need to use a different API endpoint. # Note: Requests to this endpoint may trigger "HTTP Error 403: Forbidden" # due to Blender's anti-scraping measures. json_data = url_json_get( f"https://projects.blender.org/blender/blender/issues/{number}/content-history/list") if json_data and json_data['results']: return json_data['results'][0]['name'].split('datetime="')[1].split('"')[0] else: return issue['updated_at'] with ThreadPoolExecutor() as executor: futures = [executor.submit(fetch_issue, issue) for issue in issues] all_results = [future.result() for future in as_completed(futures)] return all_results def gitea_wiki_page_get(owner, repo, page_name, verbose=True): """ Get a wiki page. """ encoded_page_name = urllib.parse.quote(page_name, safe='') base_url = f"{BASE_API_URL}/repos/{owner}/{repo}/wiki/page/{encoded_page_name}" return url_json_get(base_url) def gitea_wiki_pages_get(owner, repo, verbose=True): """ Get all wiki pages. """ base_url = f"{BASE_API_URL}/repos/{owner}/{repo}/wiki/pages" return url_json_get(base_url)