Spaces:
Sleeping
Sleeping
""" | |
Handle submissions to the CodeReview Bench leaderboard. | |
""" | |
import json | |
import os | |
import tempfile | |
from datetime import datetime | |
from typing import Dict, List, Tuple | |
from huggingface_hub import HfApi | |
from datasets import load_dataset | |
from src.display.formatting import styled_error, styled_message | |
from src.envs import RESULTS_DATASET_ID, TOKEN, REPO_ID | |
from src.leaderboard.processor import process_jsonl_submission, add_entries_to_leaderboard | |
def validate_submission(file_path: str) -> Tuple[bool, str]: | |
""" | |
Validate a submission file. | |
""" | |
try: | |
entries, message = process_jsonl_submission(file_path) | |
if not entries: | |
return False, message | |
return True, "Submission is valid" | |
except Exception as e: | |
return False, f"Error validating submission: {e}" | |
def submit_entry_to_hub(entry: Dict, model_name: str, mode: str, version="v0") -> Tuple[bool, str]: | |
""" | |
Submit a model's evaluation entry to the HuggingFace dataset. The entry is uniquely identified by model_name, mode, and version. | |
""" | |
try: | |
# Create safe model name for file path | |
model_name_safe = model_name.replace("/", "_").replace(" ", "_") | |
mode_safe = str(mode).replace("/", "_").replace(" ", "_").lower() | |
# Create entry path in entries folder | |
entry_path = f"entries/entry_{model_name_safe}_{mode_safe}_{version}.json" | |
# Save entry to temporary file | |
with tempfile.NamedTemporaryFile(mode='w', suffix='.json', delete=False) as temp_file: | |
json.dump(entry, temp_file, indent=2) | |
temp_path = temp_file.name | |
# Upload file | |
api = HfApi(token=TOKEN) | |
api.upload_file( | |
path_or_fileobj=temp_path, | |
path_in_repo=entry_path, | |
repo_id=RESULTS_DATASET_ID, | |
repo_type="dataset", | |
commit_message=f"Add evaluation entry for {model_name} (mode {mode}, version {version})" | |
) | |
os.unlink(temp_path) | |
return True, f"Successfully uploaded evaluation entry for {model_name} (mode {mode})" | |
except Exception as e: | |
return False, f"Error submitting entry to dataset: {e}" | |
def submit_leaderboard_to_hub(entries: List[Dict], version="v0") -> Tuple[bool, str]: | |
""" | |
Submit updated leaderboard to the HuggingFace dataset. | |
""" | |
try: | |
# Create leaderboard data | |
leaderboard_data = { | |
"entries": entries, | |
"last_updated": datetime.now().isoformat(), | |
"version": version | |
} | |
# Save to temporary file | |
with tempfile.NamedTemporaryFile(mode='w', suffix='.json', delete=False) as temp_file: | |
json.dump(leaderboard_data, temp_file, indent=2) | |
temp_path = temp_file.name | |
# Upload file | |
api = HfApi(token=TOKEN) | |
api.upload_file( | |
path_or_fileobj=temp_path, | |
path_in_repo=f"leaderboards/leaderboard_{version}.json", | |
repo_id=RESULTS_DATASET_ID, | |
repo_type="dataset", | |
commit_message=f"Update leaderboard for version {version}" | |
) | |
os.unlink(temp_path) | |
return True, "Leaderboard updated successfully" | |
except Exception as e: | |
return False, f"Error updating leaderboard: {e}" | |
def process_submission(file_path: str, metadata: Dict, version="v0") -> str: | |
""" | |
Process a submission to the CodeReview Bench leaderboard. | |
""" | |
try: | |
# Validate submission | |
is_valid, validation_message = validate_submission(file_path) | |
if not is_valid: | |
return styled_error(validation_message) | |
# Process the submission entries | |
entries, message = process_jsonl_submission(file_path) | |
if not entries: | |
return styled_error(f"Failed to process submission: {message}") | |
# Upload raw submission file | |
model_name = metadata.get("model_name", "unknown") | |
model_name_safe = model_name.replace("/", "_").replace(" ", "_") | |
api = HfApi(token=TOKEN) | |
submission_path = f"submissions_{version}/{model_name_safe}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.jsonl" | |
api.upload_file( | |
path_or_fileobj=file_path, | |
path_in_repo=submission_path, | |
repo_id=RESULTS_DATASET_ID, | |
repo_type="dataset", | |
commit_message=f"Add raw submission for {model_name}" | |
) | |
# Process entries and add metadata | |
processed_entries = [] | |
for entry in entries: | |
# Add metadata to entry | |
entry.update({ | |
"model_name": metadata.get("model_name"), | |
"model_type": metadata.get("model_type"), | |
"review_model_type": str(metadata.get("review_model_type", "custom")).lower(), | |
"mode": metadata.get("mode"), | |
"base_model": metadata.get("base_model"), | |
"revision": metadata.get("revision"), | |
"precision": metadata.get("precision"), | |
"weight_type": metadata.get("weight_type"), | |
"version": version, | |
"submission_date": datetime.now().isoformat() | |
}) | |
processed_entries.append(entry) | |
# Submit entries to entries folder | |
for entry in processed_entries: | |
success, message = submit_entry_to_hub(entry, model_name, metadata.get("mode"), version) | |
if not success: | |
return styled_error(message) | |
# Get all entries from HF dataset and update leaderboard | |
files = api.list_repo_files(repo_id=RESULTS_DATASET_ID, repo_type="dataset") | |
entry_files = [f for f in files if f.startswith("entries/") and f.endswith(f"_{version}.json")] | |
all_entries = [] | |
for entry_file in entry_files: | |
try: | |
entry_path = api.hf_hub_download( | |
repo_id=RESULTS_DATASET_ID, | |
filename=entry_file, | |
repo_type="dataset", | |
) | |
with open(entry_path, 'r') as f: | |
entry_data = json.load(f) | |
all_entries.append(entry_data) | |
except Exception as e: | |
print(f"Error loading entry {entry_file}: {e}") | |
# Update leaderboard with all entries | |
success, message = submit_leaderboard_to_hub(all_entries, version) | |
if not success: | |
return styled_error(message) | |
return styled_message("Submission successful! Model evaluated and leaderboard updated.") | |
except Exception as e: | |
return styled_error(f"Error processing submission: {e}") | |
finally: | |
# Clean up temporary files if they exist | |
try: | |
if os.path.exists(file_path): | |
os.remove(file_path) | |
except: | |
pass | |