demo

Paused

demo / backend /tests /run_lighteval.py

update lighteval results

39acd70 6 months ago

1.52 kB

	import os
	import tempfile
	import subprocess
	from dotenv import load_dotenv
	import time
	from lighteval_task.lighteval_task import create_yourbench_task
	import datetime

	# Load environment variables
	load_dotenv()

	# Create temporary task file
	temp_file_path = tempfile.mktemp(suffix=".py")
	with open(temp_file_path, 'w') as temp_file:
	temp_file.write("""
	from lighteval_task.lighteval_task import create_yourbench_task

	# Create yourbench task
	yourbench = create_yourbench_task("yourbench/yourbench_fbfe278f-70c8-4579-9447-8275b94250bd", "single_shot_questions")

	# Define TASKS_TABLE needed by lighteval
	TASKS_TABLE = [yourbench]
	""")

	# Créer un dossier de sortie avec timestamp pour éviter d'écraser les anciens résultats
	output_dir = f"data/lighteval_results_strict_{datetime.datetime.now().strftime('%Y%m%d_%H%M%S')}"

	# LightEval command
	cmd_args = [
	"lighteval",
	"endpoint",
	"inference-providers",
	"model=Qwen/Qwen2.5-72B-Instruct,provider=novita",
	"custom\|yourbench\|0\|0",
	"--custom-tasks",
	temp_file_path,
	"--max-samples", "10",
	"--output-dir", output_dir,
	"--save-details",
	"--no-push-to-hub"
	]

	# Start timer
	start_time = time.time()

	# Run the command with environment variables
	subprocess.run(cmd_args, env=os.environ)

	# Calculate and print execution time
	execution_time = time.time() - start_time
	print(f"\nTemps d'exécution : {execution_time:.2f} secondes")
	print(f"Résultats sauvegardés dans : {output_dir}")

	# Clean up
	os.unlink(temp_file_path)