Spaces:

kenkaneki
/

CodeReviewBench

Sleeping

CodeReviewBench / src /submission /submit.py

Alex

zalupa

15c92e9 about 2 months ago

7.72 kB

	import json
	import os
	from datetime import datetime, timezone

	from src.display.formatting import styled_error, styled_message, styled_warning
	from src.envs import API, EVAL_REQUESTS_PATH, TOKEN, QUEUE_REPO, EVAL_RESULTS_PATH, RESULTS_REPO
	from src.submission.check_validity import (
	already_submitted_models,
	check_model_card,
	get_model_size,
	is_model_on_hub,
	)

	REQUESTED_MODELS = None
	USERS_TO_SUBMISSION_DATES = None

	def add_new_eval(
	model: str,
	base_model: str,
	revision: str,
	precision: str,
	weight_type: str,
	model_type: str,
	):
	global REQUESTED_MODELS
	global USERS_TO_SUBMISSION_DATES
	if not REQUESTED_MODELS:
	REQUESTED_MODELS, USERS_TO_SUBMISSION_DATES = already_submitted_models(EVAL_REQUESTS_PATH)

	user_name = ""
	model_path = model
	if "/" in model:
	user_name = model.split("/")[0]
	model_path = model.split("/")[1]

	precision = precision.split(" ")[0]
	current_time = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")

	if model_type is None or model_type == "":
	return styled_error("Please select a model type.")

	# Does the model actually exist?
	if revision == "":
	revision = "main"

	# Is the model on the hub?
	if weight_type in ["Delta", "Adapter"]:
	base_model_on_hub, error, _ = is_model_on_hub(model_name=base_model, revision=revision, token=TOKEN, test_tokenizer=True)
	if not base_model_on_hub:
	return styled_error(f'Base model "{base_model}" {error}')

	if not weight_type == "Adapter":
	model_on_hub, error, _ = is_model_on_hub(model_name=model, revision=revision, token=TOKEN, test_tokenizer=True)
	if not model_on_hub:
	return styled_error(f'Model "{model}" {error}')

	# Is the model info correctly filled?
	try:
	model_info = API.model_info(repo_id=model, revision=revision)
	except Exception:
	return styled_error("Could not get your model information. Please fill it up properly.")

	model_size = get_model_size(model_info=model_info, precision=precision)

	# Were the model card and license filled?
	try:
	license = model_info.cardData["license"]
	except Exception:
	return styled_error("Please select a license for your model")

	modelcard_OK, error_msg = check_model_card(model)
	if not modelcard_OK:
	return styled_error(error_msg)

	# Seems good, creating the eval
	print("Adding new eval")

	eval_entry = {
	"model": model,
	"base_model": base_model,
	"revision": revision,
	"precision": precision,
	"weight_type": weight_type,
	"status": "PENDING",
	"submitted_time": current_time,
	"model_type": model_type,
	"likes": model_info.likes,
	"params": model_size,
	"license": license,
	"private": False,
	}

	# Check for duplicate submission
	if f"{model}_{revision}_{precision}" in REQUESTED_MODELS:
	return styled_warning("This model has been already submitted.")

	print("Creating eval file")
	OUT_DIR = f"{EVAL_REQUESTS_PATH}/{user_name}"
	os.makedirs(OUT_DIR, exist_ok=True)
	out_path = f"{OUT_DIR}/{model_path}_eval_request_False_{precision}_{weight_type}.json"

	with open(out_path, "w") as f:
	f.write(json.dumps(eval_entry))

	print("Uploading eval file")
	API.upload_file(
	path_or_fileobj=out_path,
	path_in_repo=out_path.split("eval-queue/")[1],
	repo_id=QUEUE_REPO,
	repo_type="dataset",
	commit_message=f"Add {model} to eval queue",
	)

	# Remove the local file
	os.remove(out_path)

	return styled_message(
	"Your request has been submitted to the evaluation queue!\nPlease wait for up to an hour for the model to show in the PENDING list."
	)

	# --------------------------------------------------------
	# Manual metrics submission (bypass evaluation queue)
	# --------------------------------------------------------

	ALL_SUBJECTIVE_FIELDS = [
	"readability",
	"relevance",
	"explanation_clarity",
	"problem_identification",
	"actionability",
	"completeness",
	"specificity",
	"contextual_adequacy",
	"consistency",
	"brevity",
	]

	def _compute_multimetric(payload: dict) -> float:
	"""Average of the 10 subjective metrics."""
	total = sum(float(payload[f]) for f in ALL_SUBJECTIVE_FIELDS)
	return total / len(ALL_SUBJECTIVE_FIELDS)

	def add_manual_results(
	model: str,
	revision: str,
	bleu: float,
	readability: int,
	relevance: int,
	explanation_clarity: int,
	problem_identification: int,
	actionability: int,
	completeness: int,
	specificity: int,
	contextual_adequacy: int,
	consistency: int,
	brevity: int,
	pass_at_1: float,
	pass_at_5: float,
	pass_at_10: float,
	):
	"""Directly submit evaluation metrics for a model and push them to the results dataset."""

	# Basic validation
	if model == "":
	return styled_error("Please specify a model name.")

	if revision == "":
	revision = "main"

	if pass_at_5 < pass_at_1:
	return styled_error("pass@5 must be greater or equal to pass@1")
	if pass_at_10 < pass_at_5:
	return styled_error("pass@10 must be greater or equal to pass@5")

	# Prepare dictionary in the same format used by read_evals.py
	payload_dict = {
	"model": model,
	"revision": revision,
	"bleu": bleu,
	"readability": readability,
	"relevance": relevance,
	"explanation_clarity": explanation_clarity,
	"problem_identification": problem_identification,
	"actionability": actionability,
	"completeness": completeness,
	"specificity": specificity,
	"contextual_adequacy": contextual_adequacy,
	"consistency": consistency,
	"brevity": brevity,
	"pass_at_1": pass_at_1,
	"pass_at_5": pass_at_5,
	"pass_at_10": pass_at_10,
	}

	multimetric = _compute_multimetric(payload_dict)

	# Compose final results file (same structure as api_submit_results)
	result_json = {
	"config": {
	"model_dtype": "unknown",
	"model_name": model,
	"model_sha": revision,
	},
	"results": {
	"bleu": {"score": bleu},
	"multimetric": {"score": multimetric},
	"pass_at_1": {"score": pass_at_1},
	"pass_at_5": {"score": pass_at_5},
	"pass_at_10": {"score": pass_at_10},
	},
	}

	# Add subjective metrics
	for field in ALL_SUBJECTIVE_FIELDS:
	result_json["results"][field] = {"score": payload_dict[field]}

	# Write file locally then upload
	try:
	os.makedirs(EVAL_RESULTS_PATH, exist_ok=True)
	except Exception:
	pass

	from datetime import datetime, timezone
	import uuid

	ts = datetime.now(timezone.utc).strftime("%Y%m%dT%H%M%SZ")
	unique_id = uuid.uuid4().hex[:8]
	filename = f"results_{model.replace('/', '_')}_{ts}_{unique_id}.json"
	local_path = os.path.join(EVAL_RESULTS_PATH, filename)

	try:
	with open(local_path, "w") as fp:
	json.dump(result_json, fp)

	API.upload_file(
	path_or_fileobj=local_path,
	path_in_repo=filename,
	repo_id=RESULTS_REPO,
	repo_type="dataset",
	commit_message=f"Add manual results for {model}",
	)
	except Exception as e:
	return styled_error(f"Failed to upload results: {e}")
	finally:
	if os.path.exists(local_path):
	os.remove(local_path)

	return styled_message("Metrics successfully submitted! The leaderboard will refresh shortly.")