|
import json |
|
import requests |
|
import os |
|
import base64 |
|
import loguru |
|
from dotenv import load_dotenv |
|
|
|
load_dotenv() |
|
|
|
TOKEN = os.getenv('TOKEN') |
|
|
|
def get_repos(org_name, token, export_dir): |
|
headers = { |
|
'Authorization': f'token {token}', |
|
} |
|
url = f'https://api.github.com/orgs/{org_name}/repos' |
|
response = requests.get(url, headers=headers, params={'per_page': 200, 'page': 0}) |
|
if response.status_code == 200: |
|
repos = response.json() |
|
loguru.logger.info(f'Fetched {len(repos)} repositories for {org_name}.') |
|
|
|
repositories_path = os.path.join(export_dir, 'repositories.txt') |
|
with open(repositories_path, 'w', encoding='utf-8') as file: |
|
for repo in repos: |
|
file.write(repo['name'] + '\n') |
|
return repos |
|
else: |
|
loguru.logger.error(f"Error fetching repositories: {response.status_code}") |
|
loguru.logger.error(response.text) |
|
return [] |
|
|
|
def fetch_repo_readme(org_name, repo_name, token, export_dir): |
|
headers = { |
|
'Authorization': f'token {token}', |
|
} |
|
url = f'https://api.github.com/repos/{org_name}/{repo_name}/readme' |
|
response = requests.get(url, headers=headers) |
|
if response.status_code == 200: |
|
readme_content = response.json()['content'] |
|
|
|
readme_content = base64.b64decode(readme_content).decode('utf-8') |
|
|
|
repo_dir = os.path.join(export_dir, repo_name) |
|
if not os.path.exists(repo_dir): |
|
os.makedirs(repo_dir) |
|
readme_path = os.path.join(repo_dir, 'README.md') |
|
with open(readme_path, 'w', encoding='utf-8') as file: |
|
file.write(readme_content) |
|
else: |
|
loguru.logger.error(f"Error fetching README for {repo_name}: {response.status_code}") |
|
loguru.logger.error(response.text) |
|
|
|
if __name__ == '__main__': |
|
|
|
org_name = 'datawhalechina' |
|
|
|
export_dir = "database/readme_db" |
|
|
|
repos = get_repos(org_name, TOKEN, export_dir) |
|
|
|
if repos: |
|
for repo in repos: |
|
repo_name = repo['name'] |
|
|
|
fetch_repo_readme(org_name, repo_name, TOKEN, export_dir) |
|
|
|
|
|
|