|
import os |
|
import tempfile |
|
import subprocess |
|
from dotenv import load_dotenv |
|
import time |
|
from lighteval_task.lighteval_task import create_yourbench_task |
|
import datetime |
|
|
|
|
|
load_dotenv() |
|
|
|
|
|
temp_file_path = tempfile.mktemp(suffix=".py") |
|
with open(temp_file_path, 'w') as temp_file: |
|
temp_file.write(""" |
|
from lighteval_task.lighteval_task import create_yourbench_task |
|
|
|
# Create yourbench task |
|
yourbench = create_yourbench_task("yourbench/yourbench_fbfe278f-70c8-4579-9447-8275b94250bd", "single_shot_questions") |
|
|
|
# Define TASKS_TABLE needed by lighteval |
|
TASKS_TABLE = [yourbench] |
|
""") |
|
|
|
|
|
output_dir = f"data/lighteval_results_strict_{datetime.datetime.now().strftime('%Y%m%d_%H%M%S')}" |
|
|
|
|
|
cmd_args = [ |
|
"lighteval", |
|
"endpoint", |
|
"inference-providers", |
|
"model=Qwen/Qwen2.5-72B-Instruct,provider=novita", |
|
"custom|yourbench|0|0", |
|
"--custom-tasks", |
|
temp_file_path, |
|
"--max-samples", "10", |
|
"--output-dir", output_dir, |
|
"--save-details", |
|
"--no-push-to-hub" |
|
] |
|
|
|
|
|
start_time = time.time() |
|
|
|
|
|
subprocess.run(cmd_args, env=os.environ) |
|
|
|
|
|
execution_time = time.time() - start_time |
|
print(f"\nTemps d'exécution : {execution_time:.2f} secondes") |
|
print(f"Résultats sauvegardés dans : {output_dir}") |
|
|
|
|
|
os.unlink(temp_file_path) |