""" Populate the CodeReview Bench leaderboard from HuggingFace datasets. """ import json import os import pandas as pd import tempfile from typing import Dict, List, Optional from datetime import datetime import numpy as np from huggingface_hub import hf_hub_download, HfApi from datasets import load_dataset from src.display.utils import CODEREVIEW_COLUMN, DISPLAY_COLS, CATEGORIES from src.envs import RESULTS_DATASET_ID, TOKEN, CACHE_PATH from src.leaderboard.processor import leaderboard_to_dataframe def get_latest_leaderboard(version="v0") -> Optional[Dict]: """ Get the latest leaderboard data from HuggingFace dataset. Fallback to local JSON file if HF download fails or is unavailable. """ # First try to fetch from HuggingFace Hub try: leaderboard_path = hf_hub_download( repo_id=RESULTS_DATASET_ID, filename=f"leaderboards/leaderboard_{version}.json", repo_type="dataset", token=TOKEN ) with open(leaderboard_path, 'r') as f: return json.load(f) except Exception as hf_err: print(f"HF download failed or unavailable: {hf_err}. Trying local fallback...") # Fallback: attempt to load a local leaderboard_data.json located at the project root project_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) local_path_candidates = [ os.path.join(project_root, "leaderboard_data.json"), # legacy path in root os.path.join(project_root, "data", "leaderboard.json"), # path defined in envs.py ] for local_path in local_path_candidates: if os.path.exists(local_path): try: with open(local_path, 'r') as f: return json.load(f) except Exception as local_err: print(f"Error loading local leaderboard file {local_path}: {local_err}") # If nothing found, return None return None def get_model_entry(model_name: str, mode: str, version="v0") -> Optional[Dict]: """ Get a specific model's entry from the entries folder, uniquely identified by model_name, mode, and version. """ try: model_name_safe = model_name.replace("/", "_").replace(" ", "_") mode_safe = str(mode).replace("/", "_").replace(" ", "_").lower() entry_path = hf_hub_download( repo_id=RESULTS_DATASET_ID, filename=f"entries/entry_{model_name_safe}_{mode_safe}_{version}.json", repo_type="dataset", token=TOKEN ) with open(entry_path, 'r') as f: return json.load(f) except Exception as e: print(f"Error downloading model entry: {e}") return None def get_all_entries(version="v0") -> List[Dict]: """ Get all entries from the HuggingFace dataset. """ try: api = HfApi(token=TOKEN) files = api.list_repo_files(repo_id=RESULTS_DATASET_ID, repo_type="dataset") entry_files = [f for f in files if f.startswith("entries/") and f.endswith(f"_{version}.json")] all_entries = [] for entry_file in entry_files: try: entry_path = hf_hub_download( repo_id=RESULTS_DATASET_ID, filename=entry_file, repo_type="dataset", token=TOKEN ) with open(entry_path, 'r') as f: entry_data = json.load(f) all_entries.append(entry_data) except Exception as e: print(f"Error loading entry {entry_file}: {e}") return all_entries except Exception as e: print(f"Error getting all entries: {e}") return [] def get_leaderboard_df(version="v0") -> pd.DataFrame: """ Get the leaderboard data as a DataFrame. """ # Get latest leaderboard data leaderboard_data = get_latest_leaderboard(version) if not leaderboard_data: # If no leaderboard exists, try to build it from entries entries = get_all_entries(version) if entries: leaderboard_data = { "entries": entries, "last_updated": datetime.now().isoformat(), "version": version } else: # Return empty DataFrame if no data available return pd.DataFrame(columns=DISPLAY_COLS) # Convert to DataFrame return leaderboard_to_dataframe(leaderboard_data) def get_category_leaderboard_df(category: str, version="v0") -> pd.DataFrame: """ Get the leaderboard data filtered by a specific programming language category. """ # Get latest leaderboard data leaderboard_data = get_latest_leaderboard(version) if not leaderboard_data: # If no leaderboard exists, try to build it from entries entries = get_all_entries(version) if entries: leaderboard_data = { "entries": entries, "last_updated": datetime.now().isoformat(), "version": version } else: # Return empty DataFrame if no data available return pd.DataFrame(columns=DISPLAY_COLS) # Filter entries to only include those with data for the specified programming language filtered_entries = [] for entry in leaderboard_data.get("entries", []): # Check if entry has data for this programming language programming_language = entry.get("programming_language", "").lower() if programming_language == category.lower() or category.lower() == "other": # For "other" category, include entries that don't match any specific language if category.lower() == "other": if programming_language not in [cat.lower() for cat in CATEGORIES[:-1]]: # Exclude "Other" from check filtered_entries.append(entry) else: filtered_entries.append(entry) # Create a new leaderboard data structure with the filtered entries filtered_leaderboard = { "entries": filtered_entries, "last_updated": leaderboard_data.get("last_updated", datetime.now().isoformat()), "version": version } # Convert to DataFrame return leaderboard_to_dataframe(filtered_leaderboard) def get_detailed_model_data(model_name: str, mode: str, version="v0") -> Dict: """ Get detailed data for a specific model and mode. """ entry = get_model_entry(model_name, mode, version) if entry: return entry leaderboard_data = get_latest_leaderboard(version) if leaderboard_data: for entry in leaderboard_data.get("entries", []): if entry.get("model_name") == model_name and str(entry.get("mode")).lower() == str(mode).lower(): return entry return {}