Spaces:

kenkaneki
/

CodeReviewBench

Sleeping

File size: 6,836 Bytes

d4d998a
346c3c5
d4d998a
 
 
 
 
 
 
 
 
b1cb07d
d4d998a
b1cb07d
98f5e7c
346c3c5
d4d998a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a7eca29
d4d998a
a7eca29
d4d998a
 
b1cb07d
d4d998a
a7eca29
3c01baa
b1cb07d
a7eca29
3c01baa
b1cb07d
 
 
 
3c01baa
b1cb07d
 
 
 
 
 
 
a7eca29
b1cb07d
3c01baa
b1cb07d
a7eca29
b1cb07d
 
3c01baa
 
b1cb07d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d4d998a
3c01baa
b1cb07d
 
d4d998a
 
b1cb07d
 
d4d998a
b1cb07d
d4d998a
3c01baa
d4d998a
b1cb07d
d4d998a
b1cb07d
d4d998a
 
3c01baa
d4d998a
346c3c5
d4d998a
b1cb07d
 
 
 
 
3c01baa
346c3c5
 
 
 
3c01baa
346c3c5
b1cb07d
 
346c3c5
b1cb07d
 
 
 
 
 
 
 
 
 
346c3c5
 
 
b1cb07d
346c3c5
 
b1cb07d
346c3c5
a17bcda
b1cb07d
 
 
 
 
 
 
346c3c5
b1cb07d
346c3c5
 
 
b1cb07d
 
 
346c3c5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a7b55ff
346c3c5
 
 
 
b1cb07d
346c3c5
b1cb07d
 
 
 
346c3c5
b1cb07d

"""
Handle submissions to the CodeReview Bench leaderboard.
"""

import json
import os
import tempfile
from datetime import datetime
from typing import Dict, List, Tuple

from huggingface_hub import HfApi
from datasets import load_dataset

from src.display.formatting import styled_error, styled_message
from src.envs import RESULTS_DATASET_ID, TOKEN, REPO_ID
from src.leaderboard.processor import process_jsonl_submission, add_entries_to_leaderboard


def validate_submission(file_path: str) -> Tuple[bool, str]:
    """
    Validate a submission file.
    """
    try:
        entries, message = process_jsonl_submission(file_path)
        if not entries:
            return False, message
        return True, "Submission is valid"
    except Exception as e:
        return False, f"Error validating submission: {e}"


def submit_entry_to_hub(entry: Dict, model_name: str, mode: str, version="v0") -> Tuple[bool, str]:
    """
    Submit a model's evaluation entry to the HuggingFace dataset. The entry is uniquely identified by model_name, mode, and version.
    """
    try:
        # Create safe model name for file path
        model_name_safe = model_name.replace("/", "_").replace(" ", "_")
        mode_safe = str(mode).replace("/", "_").replace(" ", "_").lower()

        # Create entry path in entries folder
        entry_path = f"entries/entry_{model_name_safe}_{mode_safe}_{version}.json"

        # Save entry to temporary file
        with tempfile.NamedTemporaryFile(mode='w', suffix='.json', delete=False) as temp_file:
            json.dump(entry, temp_file, indent=2)
            temp_path = temp_file.name

        # Upload file
        api = HfApi(token=TOKEN)
        api.upload_file(
            path_or_fileobj=temp_path,
            path_in_repo=entry_path,
            repo_id=RESULTS_DATASET_ID,
            repo_type="dataset",
            commit_message=f"Add evaluation entry for {model_name} (mode {mode}, version {version})"
        )

        os.unlink(temp_path)
        return True, f"Successfully uploaded evaluation entry for {model_name} (mode {mode})"
    except Exception as e:
        return False, f"Error submitting entry to dataset: {e}"


def submit_leaderboard_to_hub(entries: List[Dict], version="v0") -> Tuple[bool, str]:
    """
    Submit updated leaderboard to the HuggingFace dataset.
    """
    try:
        # Create leaderboard data
        leaderboard_data = {
            "entries": entries,
            "last_updated": datetime.now().isoformat(),
            "version": version
        }

        # Save to temporary file
        with tempfile.NamedTemporaryFile(mode='w', suffix='.json', delete=False) as temp_file:
            json.dump(leaderboard_data, temp_file, indent=2)
            temp_path = temp_file.name

        # Upload file
        api = HfApi(token=TOKEN)
        api.upload_file(
            path_or_fileobj=temp_path,
            path_in_repo=f"leaderboards/leaderboard_{version}.json",
            repo_id=RESULTS_DATASET_ID,
            repo_type="dataset",
            commit_message=f"Update leaderboard for version {version}"
        )

        os.unlink(temp_path)
        return True, "Leaderboard updated successfully"
    except Exception as e:
        return False, f"Error updating leaderboard: {e}"


def process_submission(file_path: str, metadata: Dict, version="v0") -> str:
    """
    Process a submission to the CodeReview Bench leaderboard.
    """
    try:
        # Validate submission
        is_valid, validation_message = validate_submission(file_path)
        if not is_valid:
            return styled_error(validation_message)

        # Process the submission entries
        entries, message = process_jsonl_submission(file_path)
        if not entries:
            return styled_error(f"Failed to process submission: {message}")

        # Upload raw submission file
        model_name = metadata.get("model_name", "unknown")
        model_name_safe = model_name.replace("/", "_").replace(" ", "_")
        
        api = HfApi(token=TOKEN)
        submission_path = f"submissions_{version}/{model_name_safe}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.jsonl"
        api.upload_file(
            path_or_fileobj=file_path,
            path_in_repo=submission_path,
            repo_id=RESULTS_DATASET_ID,
            repo_type="dataset",
            commit_message=f"Add raw submission for {model_name}"
        )

        # Process entries and add metadata
        processed_entries = []
        for entry in entries:
            # Add metadata to entry
            entry.update({
                "model_name": metadata.get("model_name"),
                "model_type": metadata.get("model_type"),
                "review_model_type": str(metadata.get("review_model_type", "custom")).lower(),
                "mode": metadata.get("mode"),
                "base_model": metadata.get("base_model"),
                "revision": metadata.get("revision"),
                "precision": metadata.get("precision"),
                "weight_type": metadata.get("weight_type"),
                "version": version,
                "submission_date": datetime.now().isoformat()
            })
            processed_entries.append(entry)

        # Submit entries to entries folder
        for entry in processed_entries:
            success, message = submit_entry_to_hub(entry, model_name, metadata.get("mode"), version)
            if not success:
                return styled_error(message)

        # Get all entries from HF dataset and update leaderboard
        files = api.list_repo_files(repo_id=RESULTS_DATASET_ID, repo_type="dataset")
        entry_files = [f for f in files if f.startswith("entries/") and f.endswith(f"_{version}.json")]

        all_entries = []
        for entry_file in entry_files:
            try:
                entry_path = api.hf_hub_download(
                    repo_id=RESULTS_DATASET_ID,
                    filename=entry_file,
                    repo_type="dataset",
                )
                with open(entry_path, 'r') as f:
                    entry_data = json.load(f)
                    all_entries.append(entry_data)
            except Exception as e:
                print(f"Error loading entry {entry_file}: {e}")

        # Update leaderboard with all entries
        success, message = submit_leaderboard_to_hub(all_entries, version)
        if not success:
            return styled_error(message)

        return styled_message("Submission successful! Model evaluated and leaderboard updated.")

    except Exception as e:
        return styled_error(f"Error processing submission: {e}")
    finally:
        # Clean up temporary files if they exist
        try:
            if os.path.exists(file_path):
                os.remove(file_path)
        except:
            pass