Alex commited on
Commit
ffff7f4
Β·
1 Parent(s): c762a51
Files changed (1) hide show
  1. app.py +36 -6
app.py CHANGED
@@ -70,20 +70,50 @@ def _flatten_entry(entry: Dict) -> Dict:
70
  "Pass@1": entry["llm_pass_1"],
71
  "Pass@5": entry["llm_pass_5"],
72
  "Pass@10": entry["llm_pass_10"],
 
 
 
 
 
 
 
 
 
 
73
  }
74
- for metric_name, score in entry["metrics"].items():
75
- flat[metric_name.replace("_", " ").title()] = score
76
  return flat
77
 
78
 
79
- def _table_data() -> List[Dict]:
80
  data = _load_leaderboard()
81
  if not data:
82
  # Return empty list if no data
83
  return []
84
  # Sort descending by pass@1 as requested
85
  data.sort(key=lambda x: x["llm_pass_1"], reverse=True)
86
- return [_flatten_entry(e) for e in data]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
87
 
88
 
89
  # --------------- Gradio callbacks ---------------
@@ -127,7 +157,7 @@ def submit_model(
127
  },
128
  )
129
  except Exception as e:
130
- return gr.update(value=_table_data()), gr.update(value=f"❌ Submission failed: {e}")
131
 
132
  data = _load_leaderboard()
133
  # Replace existing model entry if any
@@ -135,7 +165,7 @@ def submit_model(
135
  data.append(entry.dict())
136
  _save_leaderboard(data)
137
 
138
- return gr.update(value=_table_data()), gr.update(value="βœ… Submission recorded!")
139
 
140
 
141
  # --------------- Interface ---------------
 
70
  "Pass@1": entry["llm_pass_1"],
71
  "Pass@5": entry["llm_pass_5"],
72
  "Pass@10": entry["llm_pass_10"],
73
+ "Readability": entry["metrics"]["readability"],
74
+ "Relevance": entry["metrics"]["relevance"],
75
+ "Explanation Clarity": entry["metrics"]["explanation_clarity"],
76
+ "Problem Identification": entry["metrics"]["problem_identification"],
77
+ "Actionability": entry["metrics"]["actionability"],
78
+ "Completeness": entry["metrics"]["completeness"],
79
+ "Specificity": entry["metrics"]["specificity"],
80
+ "Contextual Adequacy": entry["metrics"]["contextual_adequacy"],
81
+ "Consistency": entry["metrics"]["consistency"],
82
+ "Brevity": entry["metrics"]["brevity"],
83
  }
 
 
84
  return flat
85
 
86
 
87
+ def _table_data() -> List[List]:
88
  data = _load_leaderboard()
89
  if not data:
90
  # Return empty list if no data
91
  return []
92
  # Sort descending by pass@1 as requested
93
  data.sort(key=lambda x: x["llm_pass_1"], reverse=True)
94
+
95
+ # Convert to list of lists for Gradio table
96
+ table_rows = []
97
+ for entry in data:
98
+ row = [
99
+ entry["model_name"],
100
+ entry["bleu"],
101
+ entry["llm_pass_1"],
102
+ entry["llm_pass_5"],
103
+ entry["llm_pass_10"],
104
+ entry["metrics"]["readability"],
105
+ entry["metrics"]["relevance"],
106
+ entry["metrics"]["explanation_clarity"],
107
+ entry["metrics"]["problem_identification"],
108
+ entry["metrics"]["actionability"],
109
+ entry["metrics"]["completeness"],
110
+ entry["metrics"]["specificity"],
111
+ entry["metrics"]["contextual_adequacy"],
112
+ entry["metrics"]["consistency"],
113
+ entry["metrics"]["brevity"],
114
+ ]
115
+ table_rows.append(row)
116
+ return table_rows
117
 
118
 
119
  # --------------- Gradio callbacks ---------------
 
157
  },
158
  )
159
  except Exception as e:
160
+ return _table_data(), f"❌ Submission failed: {e}"
161
 
162
  data = _load_leaderboard()
163
  # Replace existing model entry if any
 
165
  data.append(entry.dict())
166
  _save_leaderboard(data)
167
 
168
+ return _table_data(), "βœ… Submission recorded!"
169
 
170
 
171
  # --------------- Interface ---------------