Spaces:
Sleeping
Sleeping
""" | |
Populate the CodeReview Bench leaderboard from HuggingFace datasets. | |
""" | |
import json | |
import os | |
import pandas as pd | |
import tempfile | |
from typing import Dict, List, Optional | |
from datetime import datetime | |
import numpy as np | |
from huggingface_hub import hf_hub_download, HfApi | |
from datasets import load_dataset | |
from src.display.utils import CODEREVIEW_COLUMN, DISPLAY_COLS, CATEGORIES | |
from src.envs import RESULTS_DATASET_ID, TOKEN, CACHE_PATH | |
from src.leaderboard.processor import leaderboard_to_dataframe | |
def get_latest_leaderboard(version="v0") -> Optional[Dict]: | |
""" | |
Get the latest leaderboard data from HuggingFace dataset. | |
Fallback to local JSON file if HF download fails or is unavailable. | |
""" | |
# First try to fetch from HuggingFace Hub | |
try: | |
leaderboard_path = hf_hub_download( | |
repo_id=RESULTS_DATASET_ID, | |
filename=f"leaderboards/leaderboard_{version}.json", | |
repo_type="dataset", | |
token=TOKEN | |
) | |
with open(leaderboard_path, 'r') as f: | |
return json.load(f) | |
except Exception as hf_err: | |
print(f"HF download failed or unavailable: {hf_err}. Trying local fallback...") | |
# Fallback: attempt to load a local leaderboard_data.json located at the project root | |
project_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) | |
local_path_candidates = [ | |
os.path.join(project_root, "leaderboard_data.json"), # legacy path in root | |
os.path.join(project_root, "data", "leaderboard.json"), # path defined in envs.py | |
] | |
for local_path in local_path_candidates: | |
if os.path.exists(local_path): | |
try: | |
with open(local_path, 'r') as f: | |
return json.load(f) | |
except Exception as local_err: | |
print(f"Error loading local leaderboard file {local_path}: {local_err}") | |
# If nothing found, return None | |
return None | |
def get_model_entry(model_name: str, mode: str, version="v0") -> Optional[Dict]: | |
""" | |
Get a specific model's entry from the entries folder, uniquely identified by model_name, mode, and version. | |
""" | |
try: | |
model_name_safe = model_name.replace("/", "_").replace(" ", "_") | |
mode_safe = str(mode).replace("/", "_").replace(" ", "_").lower() | |
entry_path = hf_hub_download( | |
repo_id=RESULTS_DATASET_ID, | |
filename=f"entries/entry_{model_name_safe}_{mode_safe}_{version}.json", | |
repo_type="dataset", | |
token=TOKEN | |
) | |
with open(entry_path, 'r') as f: | |
return json.load(f) | |
except Exception as e: | |
print(f"Error downloading model entry: {e}") | |
return None | |
def get_all_entries(version="v0") -> List[Dict]: | |
""" | |
Get all entries from the HuggingFace dataset. | |
""" | |
try: | |
api = HfApi(token=TOKEN) | |
files = api.list_repo_files(repo_id=RESULTS_DATASET_ID, repo_type="dataset") | |
entry_files = [f for f in files if f.startswith("entries/") and f.endswith(f"_{version}.json")] | |
all_entries = [] | |
for entry_file in entry_files: | |
try: | |
entry_path = hf_hub_download( | |
repo_id=RESULTS_DATASET_ID, | |
filename=entry_file, | |
repo_type="dataset", | |
token=TOKEN | |
) | |
with open(entry_path, 'r') as f: | |
entry_data = json.load(f) | |
all_entries.append(entry_data) | |
except Exception as e: | |
print(f"Error loading entry {entry_file}: {e}") | |
return all_entries | |
except Exception as e: | |
print(f"Error getting all entries: {e}") | |
return [] | |
def get_leaderboard_df(version="v0") -> pd.DataFrame: | |
""" | |
Get the leaderboard data as a DataFrame. | |
""" | |
# Get latest leaderboard data | |
leaderboard_data = get_latest_leaderboard(version) | |
if not leaderboard_data: | |
# If no leaderboard exists, try to build it from entries | |
entries = get_all_entries(version) | |
if entries: | |
leaderboard_data = { | |
"entries": entries, | |
"last_updated": datetime.now().isoformat(), | |
"version": version | |
} | |
else: | |
# Return empty DataFrame if no data available | |
return pd.DataFrame(columns=DISPLAY_COLS) | |
# Convert to DataFrame | |
return leaderboard_to_dataframe(leaderboard_data) | |
def get_category_leaderboard_df(category: str, version="v0") -> pd.DataFrame: | |
""" | |
Get the leaderboard data filtered by a specific programming language category. | |
""" | |
# Get latest leaderboard data | |
leaderboard_data = get_latest_leaderboard(version) | |
if not leaderboard_data: | |
# If no leaderboard exists, try to build it from entries | |
entries = get_all_entries(version) | |
if entries: | |
leaderboard_data = { | |
"entries": entries, | |
"last_updated": datetime.now().isoformat(), | |
"version": version | |
} | |
else: | |
# Return empty DataFrame if no data available | |
return pd.DataFrame(columns=DISPLAY_COLS) | |
# Filter entries to only include those with data for the specified programming language | |
filtered_entries = [] | |
for entry in leaderboard_data.get("entries", []): | |
# Check if entry has data for this programming language | |
programming_language = entry.get("programming_language", "").lower() | |
if programming_language == category.lower() or category.lower() == "other": | |
# For "other" category, include entries that don't match any specific language | |
if category.lower() == "other": | |
if programming_language not in [cat.lower() for cat in CATEGORIES[:-1]]: # Exclude "Other" from check | |
filtered_entries.append(entry) | |
else: | |
filtered_entries.append(entry) | |
# Create a new leaderboard data structure with the filtered entries | |
filtered_leaderboard = { | |
"entries": filtered_entries, | |
"last_updated": leaderboard_data.get("last_updated", datetime.now().isoformat()), | |
"version": version | |
} | |
# Convert to DataFrame | |
return leaderboard_to_dataframe(filtered_leaderboard) | |
def get_detailed_model_data(model_name: str, mode: str, version="v0") -> Dict: | |
""" | |
Get detailed data for a specific model and mode. | |
""" | |
entry = get_model_entry(model_name, mode, version) | |
if entry: | |
return entry | |
leaderboard_data = get_latest_leaderboard(version) | |
if leaderboard_data: | |
for entry in leaderboard_data.get("entries", []): | |
if entry.get("model_name") == model_name and str(entry.get("mode")).lower() == str(mode).lower(): | |
return entry | |
return {} | |