Spaces:
Sleeping
Sleeping
Alex
commited on
Commit
·
b4d9db9
1
Parent(s):
e7ea9f6
leaderboard
Browse files- src/populate.py +10 -3
src/populate.py
CHANGED
@@ -14,9 +14,16 @@ def get_leaderboard_df(results_path: str, requests_path: str, cols: list, benchm
|
|
14 |
all_data_json = [v.to_dict() for v in raw_data]
|
15 |
|
16 |
df = pd.DataFrame.from_records(all_data_json)
|
17 |
-
# Sort primarily by LLM exact-match Pass@1 metric
|
18 |
-
|
19 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
20 |
df = df[cols].round(decimals=2)
|
21 |
|
22 |
# filter out if any of the benchmarks have not been produced
|
|
|
14 |
all_data_json = [v.to_dict() for v in raw_data]
|
15 |
|
16 |
df = pd.DataFrame.from_records(all_data_json)
|
17 |
+
# Sort primarily by LLM exact-match Pass@1 metric; if not present, fall back to average
|
18 |
+
preferred_cols = []
|
19 |
+
if hasattr(AutoEvalColumn, "pass_at_1"):
|
20 |
+
preferred_cols.append(AutoEvalColumn.pass_at_1.name)
|
21 |
+
preferred_cols.append(AutoEvalColumn.average.name)
|
22 |
+
|
23 |
+
for col in preferred_cols:
|
24 |
+
if col in df.columns:
|
25 |
+
df = df.sort_values(by=[col], ascending=False)
|
26 |
+
break
|
27 |
df = df[cols].round(decimals=2)
|
28 |
|
29 |
# filter out if any of the benchmarks have not been produced
|