Spaces:
Sleeping
Sleeping
Alex
commited on
Commit
Β·
9d40219
1
Parent(s):
d369cff
error
Browse files
app.py
CHANGED
@@ -87,12 +87,9 @@ def _flatten_entry(entry: Dict) -> Dict:
|
|
87 |
def _table_data() -> List[List]:
|
88 |
data = _load_leaderboard()
|
89 |
if not data:
|
90 |
-
# Return empty list if no data
|
91 |
return []
|
92 |
-
# Sort descending by pass@1 as requested
|
93 |
data.sort(key=lambda x: x["llm_pass_1"], reverse=True)
|
94 |
|
95 |
-
# Convert to list of lists for Gradio table
|
96 |
table_rows = []
|
97 |
for entry in data:
|
98 |
row = [
|
@@ -101,6 +98,21 @@ def _table_data() -> List[List]:
|
|
101 |
entry["llm_pass_1"],
|
102 |
entry["llm_pass_5"],
|
103 |
entry["llm_pass_10"],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
104 |
entry["metrics"]["readability"],
|
105 |
entry["metrics"]["relevance"],
|
106 |
entry["metrics"]["explanation_clarity"],
|
@@ -157,7 +169,7 @@ def submit_model(
|
|
157 |
},
|
158 |
)
|
159 |
except Exception as e:
|
160 |
-
return _table_data(), f"β Submission failed: {e}"
|
161 |
|
162 |
data = _load_leaderboard()
|
163 |
# Replace existing model entry if any
|
@@ -165,7 +177,7 @@ def submit_model(
|
|
165 |
data.append(entry.dict())
|
166 |
_save_leaderboard(data)
|
167 |
|
168 |
-
return _table_data(), "β
Submission recorded!"
|
169 |
|
170 |
|
171 |
# --------------- Interface ---------------
|
@@ -202,11 +214,19 @@ with gr.Blocks(title="CodeReview Leaderboard") as demo:
|
|
202 |
|
203 |
# Initialize table data
|
204 |
initial_data = _table_data()
|
|
|
205 |
|
206 |
leaderboard_df = gr.Dataframe(
|
207 |
-
headers=["Model", "BLEU", "Pass@1", "Pass@5", "Pass@10"
|
208 |
value=initial_data,
|
209 |
-
label="
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
210 |
interactive=False,
|
211 |
)
|
212 |
|
@@ -255,7 +275,7 @@ with gr.Blocks(title="CodeReview Leaderboard") as demo:
|
|
255 |
consistency_inp,
|
256 |
brevity_inp,
|
257 |
],
|
258 |
-
outputs=[leaderboard_df, status_markdown],
|
259 |
api_name="submit_model",
|
260 |
)
|
261 |
|
|
|
87 |
def _table_data() -> List[List]:
|
88 |
data = _load_leaderboard()
|
89 |
if not data:
|
|
|
90 |
return []
|
|
|
91 |
data.sort(key=lambda x: x["llm_pass_1"], reverse=True)
|
92 |
|
|
|
93 |
table_rows = []
|
94 |
for entry in data:
|
95 |
row = [
|
|
|
98 |
entry["llm_pass_1"],
|
99 |
entry["llm_pass_5"],
|
100 |
entry["llm_pass_10"],
|
101 |
+
]
|
102 |
+
table_rows.append(row)
|
103 |
+
return table_rows
|
104 |
+
|
105 |
+
|
106 |
+
def _multimetric_table_data() -> List[List]:
|
107 |
+
data = _load_leaderboard()
|
108 |
+
if not data:
|
109 |
+
return []
|
110 |
+
data.sort(key=lambda x: x["llm_pass_1"], reverse=True)
|
111 |
+
|
112 |
+
table_rows = []
|
113 |
+
for entry in data:
|
114 |
+
row = [
|
115 |
+
entry["model_name"],
|
116 |
entry["metrics"]["readability"],
|
117 |
entry["metrics"]["relevance"],
|
118 |
entry["metrics"]["explanation_clarity"],
|
|
|
169 |
},
|
170 |
)
|
171 |
except Exception as e:
|
172 |
+
return _table_data(), _multimetric_table_data(), f"β Submission failed: {e}"
|
173 |
|
174 |
data = _load_leaderboard()
|
175 |
# Replace existing model entry if any
|
|
|
177 |
data.append(entry.dict())
|
178 |
_save_leaderboard(data)
|
179 |
|
180 |
+
return _table_data(), _multimetric_table_data(), "β
Submission recorded!"
|
181 |
|
182 |
|
183 |
# --------------- Interface ---------------
|
|
|
214 |
|
215 |
# Initialize table data
|
216 |
initial_data = _table_data()
|
217 |
+
initial_multimetric_data = _multimetric_table_data()
|
218 |
|
219 |
leaderboard_df = gr.Dataframe(
|
220 |
+
headers=["Model", "BLEU", "Pass@1", "Pass@5", "Pass@10"],
|
221 |
value=initial_data,
|
222 |
+
label="Main Metrics Leaderboard",
|
223 |
+
interactive=False,
|
224 |
+
)
|
225 |
+
|
226 |
+
multimetric_df = gr.Dataframe(
|
227 |
+
headers=["Model", "Readability", "Relevance", "Explanation Clarity", "Problem Identification", "Actionability", "Completeness", "Specificity", "Contextual Adequacy", "Consistency", "Brevity"],
|
228 |
+
value=initial_multimetric_data,
|
229 |
+
label="Multi-Metric Scores",
|
230 |
interactive=False,
|
231 |
)
|
232 |
|
|
|
275 |
consistency_inp,
|
276 |
brevity_inp,
|
277 |
],
|
278 |
+
outputs=[leaderboard_df, multimetric_df, status_markdown],
|
279 |
api_name="submit_model",
|
280 |
)
|
281 |
|