kenkaneki's picture
zalupa3
346c3c5
"""
Handle submissions to the CodeReview Bench leaderboard.
"""
import json
import os
import tempfile
from datetime import datetime
from typing import Dict, List, Tuple
from huggingface_hub import HfApi
from datasets import load_dataset
from src.display.formatting import styled_error, styled_message
from src.envs import RESULTS_DATASET_ID, TOKEN, REPO_ID
from src.leaderboard.processor import process_jsonl_submission, add_entries_to_leaderboard
def validate_submission(file_path: str) -> Tuple[bool, str]:
"""
Validate a submission file.
"""
try:
entries, message = process_jsonl_submission(file_path)
if not entries:
return False, message
return True, "Submission is valid"
except Exception as e:
return False, f"Error validating submission: {e}"
def submit_entry_to_hub(entry: Dict, model_name: str, mode: str, version="v0") -> Tuple[bool, str]:
"""
Submit a model's evaluation entry to the HuggingFace dataset. The entry is uniquely identified by model_name, mode, and version.
"""
try:
# Create safe model name for file path
model_name_safe = model_name.replace("/", "_").replace(" ", "_")
mode_safe = str(mode).replace("/", "_").replace(" ", "_").lower()
# Create entry path in entries folder
entry_path = f"entries/entry_{model_name_safe}_{mode_safe}_{version}.json"
# Save entry to temporary file
with tempfile.NamedTemporaryFile(mode='w', suffix='.json', delete=False) as temp_file:
json.dump(entry, temp_file, indent=2)
temp_path = temp_file.name
# Upload file
api = HfApi(token=TOKEN)
api.upload_file(
path_or_fileobj=temp_path,
path_in_repo=entry_path,
repo_id=RESULTS_DATASET_ID,
repo_type="dataset",
commit_message=f"Add evaluation entry for {model_name} (mode {mode}, version {version})"
)
os.unlink(temp_path)
return True, f"Successfully uploaded evaluation entry for {model_name} (mode {mode})"
except Exception as e:
return False, f"Error submitting entry to dataset: {e}"
def submit_leaderboard_to_hub(entries: List[Dict], version="v0") -> Tuple[bool, str]:
"""
Submit updated leaderboard to the HuggingFace dataset.
"""
try:
# Create leaderboard data
leaderboard_data = {
"entries": entries,
"last_updated": datetime.now().isoformat(),
"version": version
}
# Save to temporary file
with tempfile.NamedTemporaryFile(mode='w', suffix='.json', delete=False) as temp_file:
json.dump(leaderboard_data, temp_file, indent=2)
temp_path = temp_file.name
# Upload file
api = HfApi(token=TOKEN)
api.upload_file(
path_or_fileobj=temp_path,
path_in_repo=f"leaderboards/leaderboard_{version}.json",
repo_id=RESULTS_DATASET_ID,
repo_type="dataset",
commit_message=f"Update leaderboard for version {version}"
)
os.unlink(temp_path)
return True, "Leaderboard updated successfully"
except Exception as e:
return False, f"Error updating leaderboard: {e}"
def process_submission(file_path: str, metadata: Dict, version="v0") -> str:
"""
Process a submission to the CodeReview Bench leaderboard.
"""
try:
# Validate submission
is_valid, validation_message = validate_submission(file_path)
if not is_valid:
return styled_error(validation_message)
# Process the submission entries
entries, message = process_jsonl_submission(file_path)
if not entries:
return styled_error(f"Failed to process submission: {message}")
# Upload raw submission file
model_name = metadata.get("model_name", "unknown")
model_name_safe = model_name.replace("/", "_").replace(" ", "_")
api = HfApi(token=TOKEN)
submission_path = f"submissions_{version}/{model_name_safe}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.jsonl"
api.upload_file(
path_or_fileobj=file_path,
path_in_repo=submission_path,
repo_id=RESULTS_DATASET_ID,
repo_type="dataset",
commit_message=f"Add raw submission for {model_name}"
)
# Process entries and add metadata
processed_entries = []
for entry in entries:
# Add metadata to entry
entry.update({
"model_name": metadata.get("model_name"),
"model_type": metadata.get("model_type"),
"review_model_type": str(metadata.get("review_model_type", "custom")).lower(),
"mode": metadata.get("mode"),
"base_model": metadata.get("base_model"),
"revision": metadata.get("revision"),
"precision": metadata.get("precision"),
"weight_type": metadata.get("weight_type"),
"version": version,
"submission_date": datetime.now().isoformat()
})
processed_entries.append(entry)
# Submit entries to entries folder
for entry in processed_entries:
success, message = submit_entry_to_hub(entry, model_name, metadata.get("mode"), version)
if not success:
return styled_error(message)
# Get all entries from HF dataset and update leaderboard
files = api.list_repo_files(repo_id=RESULTS_DATASET_ID, repo_type="dataset")
entry_files = [f for f in files if f.startswith("entries/") and f.endswith(f"_{version}.json")]
all_entries = []
for entry_file in entry_files:
try:
entry_path = api.hf_hub_download(
repo_id=RESULTS_DATASET_ID,
filename=entry_file,
repo_type="dataset",
)
with open(entry_path, 'r') as f:
entry_data = json.load(f)
all_entries.append(entry_data)
except Exception as e:
print(f"Error loading entry {entry_file}: {e}")
# Update leaderboard with all entries
success, message = submit_leaderboard_to_hub(all_entries, version)
if not success:
return styled_error(message)
return styled_message("Submission successful! Model evaluated and leaderboard updated.")
except Exception as e:
return styled_error(f"Error processing submission: {e}")
finally:
# Clean up temporary files if they exist
try:
if os.path.exists(file_path):
os.remove(file_path)
except:
pass