Spaces:
Sleeping
Sleeping
Alex
commited on
Commit
Β·
ffff7f4
1
Parent(s):
c762a51
error
Browse files
app.py
CHANGED
@@ -70,20 +70,50 @@ def _flatten_entry(entry: Dict) -> Dict:
|
|
70 |
"Pass@1": entry["llm_pass_1"],
|
71 |
"Pass@5": entry["llm_pass_5"],
|
72 |
"Pass@10": entry["llm_pass_10"],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
73 |
}
|
74 |
-
for metric_name, score in entry["metrics"].items():
|
75 |
-
flat[metric_name.replace("_", " ").title()] = score
|
76 |
return flat
|
77 |
|
78 |
|
79 |
-
def _table_data() -> List[
|
80 |
data = _load_leaderboard()
|
81 |
if not data:
|
82 |
# Return empty list if no data
|
83 |
return []
|
84 |
# Sort descending by pass@1 as requested
|
85 |
data.sort(key=lambda x: x["llm_pass_1"], reverse=True)
|
86 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
87 |
|
88 |
|
89 |
# --------------- Gradio callbacks ---------------
|
@@ -127,7 +157,7 @@ def submit_model(
|
|
127 |
},
|
128 |
)
|
129 |
except Exception as e:
|
130 |
-
return
|
131 |
|
132 |
data = _load_leaderboard()
|
133 |
# Replace existing model entry if any
|
@@ -135,7 +165,7 @@ def submit_model(
|
|
135 |
data.append(entry.dict())
|
136 |
_save_leaderboard(data)
|
137 |
|
138 |
-
return
|
139 |
|
140 |
|
141 |
# --------------- Interface ---------------
|
|
|
70 |
"Pass@1": entry["llm_pass_1"],
|
71 |
"Pass@5": entry["llm_pass_5"],
|
72 |
"Pass@10": entry["llm_pass_10"],
|
73 |
+
"Readability": entry["metrics"]["readability"],
|
74 |
+
"Relevance": entry["metrics"]["relevance"],
|
75 |
+
"Explanation Clarity": entry["metrics"]["explanation_clarity"],
|
76 |
+
"Problem Identification": entry["metrics"]["problem_identification"],
|
77 |
+
"Actionability": entry["metrics"]["actionability"],
|
78 |
+
"Completeness": entry["metrics"]["completeness"],
|
79 |
+
"Specificity": entry["metrics"]["specificity"],
|
80 |
+
"Contextual Adequacy": entry["metrics"]["contextual_adequacy"],
|
81 |
+
"Consistency": entry["metrics"]["consistency"],
|
82 |
+
"Brevity": entry["metrics"]["brevity"],
|
83 |
}
|
|
|
|
|
84 |
return flat
|
85 |
|
86 |
|
87 |
+
def _table_data() -> List[List]:
|
88 |
data = _load_leaderboard()
|
89 |
if not data:
|
90 |
# Return empty list if no data
|
91 |
return []
|
92 |
# Sort descending by pass@1 as requested
|
93 |
data.sort(key=lambda x: x["llm_pass_1"], reverse=True)
|
94 |
+
|
95 |
+
# Convert to list of lists for Gradio table
|
96 |
+
table_rows = []
|
97 |
+
for entry in data:
|
98 |
+
row = [
|
99 |
+
entry["model_name"],
|
100 |
+
entry["bleu"],
|
101 |
+
entry["llm_pass_1"],
|
102 |
+
entry["llm_pass_5"],
|
103 |
+
entry["llm_pass_10"],
|
104 |
+
entry["metrics"]["readability"],
|
105 |
+
entry["metrics"]["relevance"],
|
106 |
+
entry["metrics"]["explanation_clarity"],
|
107 |
+
entry["metrics"]["problem_identification"],
|
108 |
+
entry["metrics"]["actionability"],
|
109 |
+
entry["metrics"]["completeness"],
|
110 |
+
entry["metrics"]["specificity"],
|
111 |
+
entry["metrics"]["contextual_adequacy"],
|
112 |
+
entry["metrics"]["consistency"],
|
113 |
+
entry["metrics"]["brevity"],
|
114 |
+
]
|
115 |
+
table_rows.append(row)
|
116 |
+
return table_rows
|
117 |
|
118 |
|
119 |
# --------------- Gradio callbacks ---------------
|
|
|
157 |
},
|
158 |
)
|
159 |
except Exception as e:
|
160 |
+
return _table_data(), f"β Submission failed: {e}"
|
161 |
|
162 |
data = _load_leaderboard()
|
163 |
# Replace existing model entry if any
|
|
|
165 |
data.append(entry.dict())
|
166 |
_save_leaderboard(data)
|
167 |
|
168 |
+
return _table_data(), "β
Submission recorded!"
|
169 |
|
170 |
|
171 |
# --------------- Interface ---------------
|