Alex commited on
Commit
ea6e048
Β·
1 Parent(s): 6ec1619
Files changed (2) hide show
  1. app.py +42 -33
  2. leaderboard_data.json +10 -10
app.py CHANGED
@@ -11,16 +11,22 @@ DEFAULT_MODEL_NAME = "example/model"
11
 
12
  # --------------- Data models ---------------
13
  class Metrics(BaseModel):
14
- readability: float
15
- relevance: float
16
- explanation_clarity: float = Field(alias="explanation_clarity")
17
- problem_identification: float
18
- actionability: float
19
- completeness: float
20
- specificity: float
21
- contextual_adequacy: float
22
- consistency: float
23
- brevity: float
 
 
 
 
 
 
24
 
25
 
26
  class LeaderboardEntry(BaseModel):
@@ -85,16 +91,16 @@ def submit_model(
85
  llm_pass_1: float,
86
  llm_pass_5: float,
87
  llm_pass_10: float,
88
- readability: float,
89
- relevance: float,
90
- explanation_clarity: float,
91
- problem_identification: float,
92
- actionability: float,
93
- completeness: float,
94
- specificity: float,
95
- contextual_adequacy: float,
96
- consistency: float,
97
- brevity: float,
98
  ):
99
  """Validate and append a new model entry to the leaderboard."""
100
  try:
@@ -133,9 +139,12 @@ def submit_model(
133
  with gr.Blocks(title="Custom LLM Leaderboard") as demo:
134
  gr.Markdown("""# πŸ† LLM Leaderboard\nSubmit your model results below. Leaderboard is sorted by **Pass@1**. """)
135
 
 
 
 
136
  leaderboard_df = gr.Dataframe(
137
- headers=list(_table_data()[0].keys()) if _table_data() else [],
138
- value=_table_data(),
139
  label="Current Leaderboard",
140
  interactive=False,
141
  )
@@ -150,18 +159,18 @@ with gr.Blocks(title="Custom LLM Leaderboard") as demo:
150
  pass5_inp = gr.Number(label="Pass@5", value=0.0, minimum=0.0, maximum=1.0)
151
  pass10_inp = gr.Number(label="Pass@10", value=0.0, minimum=0.0, maximum=1.0)
152
 
153
- gr.Markdown("### Multi-metric subjective scores (0.0 – 1.0)")
154
  with gr.Row():
155
- readability_inp = gr.Slider(minimum=0.0, maximum=1.0, value=0.0, step=0.05, label="Readability")
156
- relevance_inp = gr.Slider(minimum=0.0, maximum=1.0, value=0.0, step=0.05, label="Relevance")
157
- explanation_inp = gr.Slider(minimum=0.0, maximum=1.0, value=0.0, step=0.05, label="Explanation Clarity")
158
- problem_inp = gr.Slider(minimum=0.0, maximum=1.0, value=0.0, step=0.05, label="Problem Identification")
159
- actionability_inp = gr.Slider(minimum=0.0, maximum=1.0, value=0.0, step=0.05, label="Actionability")
160
- completeness_inp = gr.Slider(minimum=0.0, maximum=1.0, value=0.0, step=0.05, label="Completeness")
161
- specificity_inp = gr.Slider(minimum=0.0, maximum=1.0, value=0.0, step=0.05, label="Specificity")
162
- contextual_inp = gr.Slider(minimum=0.0, maximum=1.0, value=0.0, step=0.05, label="Contextual Adequacy")
163
- consistency_inp = gr.Slider(minimum=0.0, maximum=1.0, value=0.0, step=0.05, label="Consistency")
164
- brevity_inp = gr.Slider(minimum=0.0, maximum=1.0, value=0.0, step=0.05, label="Brevity")
165
 
166
  submit_btn = gr.Button("Submit")
167
  status_markdown = gr.Markdown("")
 
11
 
12
  # --------------- Data models ---------------
13
  class Metrics(BaseModel):
14
+ readability: int
15
+ relevance: int
16
+ explanation_clarity: int = Field(alias="explanation_clarity")
17
+ problem_identification: int
18
+ actionability: int
19
+ completeness: int
20
+ specificity: int
21
+ contextual_adequacy: int
22
+ consistency: int
23
+ brevity: int
24
+
25
+ @field_validator("readability", "relevance", "explanation_clarity", "problem_identification", "actionability", "completeness", "specificity", "contextual_adequacy", "consistency", "brevity")
26
+ def metric_range(cls, v: int):
27
+ if not 0 <= v <= 10:
28
+ raise ValueError("Multi-metrics should be between 0 and 10")
29
+ return v
30
 
31
 
32
  class LeaderboardEntry(BaseModel):
 
91
  llm_pass_1: float,
92
  llm_pass_5: float,
93
  llm_pass_10: float,
94
+ readability: int,
95
+ relevance: int,
96
+ explanation_clarity: int,
97
+ problem_identification: int,
98
+ actionability: int,
99
+ completeness: int,
100
+ specificity: int,
101
+ contextual_adequacy: int,
102
+ consistency: int,
103
+ brevity: int,
104
  ):
105
  """Validate and append a new model entry to the leaderboard."""
106
  try:
 
139
  with gr.Blocks(title="Custom LLM Leaderboard") as demo:
140
  gr.Markdown("""# πŸ† LLM Leaderboard\nSubmit your model results below. Leaderboard is sorted by **Pass@1**. """)
141
 
142
+ # Initialize table data
143
+ initial_data = _table_data()
144
+
145
  leaderboard_df = gr.Dataframe(
146
+ headers=list(initial_data[0].keys()) if initial_data else ["Model", "BLEU", "Pass@1", "Pass@5", "Pass@10", "Readability", "Relevance", "Explanation Clarity", "Problem Identification", "Actionability", "Completeness", "Specificity", "Contextual Adequacy", "Consistency", "Brevity"],
147
+ value=initial_data,
148
  label="Current Leaderboard",
149
  interactive=False,
150
  )
 
159
  pass5_inp = gr.Number(label="Pass@5", value=0.0, minimum=0.0, maximum=1.0)
160
  pass10_inp = gr.Number(label="Pass@10", value=0.0, minimum=0.0, maximum=1.0)
161
 
162
+ gr.Markdown("### Multi-metric subjective scores (0 – 10)")
163
  with gr.Row():
164
+ readability_inp = gr.Slider(minimum=0, maximum=10, value=5, step=1, label="Readability")
165
+ relevance_inp = gr.Slider(minimum=0, maximum=10, value=5, step=1, label="Relevance")
166
+ explanation_inp = gr.Slider(minimum=0, maximum=10, value=5, step=1, label="Explanation Clarity")
167
+ problem_inp = gr.Slider(minimum=0, maximum=10, value=5, step=1, label="Problem Identification")
168
+ actionability_inp = gr.Slider(minimum=0, maximum=10, value=5, step=1, label="Actionability")
169
+ completeness_inp = gr.Slider(minimum=0, maximum=10, value=5, step=1, label="Completeness")
170
+ specificity_inp = gr.Slider(minimum=0, maximum=10, value=5, step=1, label="Specificity")
171
+ contextual_inp = gr.Slider(minimum=0, maximum=10, value=5, step=1, label="Contextual Adequacy")
172
+ consistency_inp = gr.Slider(minimum=0, maximum=10, value=5, step=1, label="Consistency")
173
+ brevity_inp = gr.Slider(minimum=0, maximum=10, value=5, step=1, label="Brevity")
174
 
175
  submit_btn = gr.Button("Submit")
176
  status_markdown = gr.Markdown("")
leaderboard_data.json CHANGED
@@ -7,16 +7,16 @@
7
  "llm_pass_5": 0.5,
8
  "llm_pass_10": 0.5,
9
  "metrics": {
10
- "readability": 0.5,
11
- "relevance": 0.5,
12
- "explanation_clarity": 0.5,
13
- "problem_identification": 0.5,
14
- "actionability": 0.5,
15
- "completeness": 0.5,
16
- "specificity": 0.5,
17
- "contextual_adequacy": 0.5,
18
- "consistency": 0.5,
19
- "brevity": 0.5
20
  }
21
  }
22
  ]
 
7
  "llm_pass_5": 0.5,
8
  "llm_pass_10": 0.5,
9
  "metrics": {
10
+ "readability": 5,
11
+ "relevance": 5,
12
+ "explanation_clarity": 5,
13
+ "problem_identification": 5,
14
+ "actionability": 5,
15
+ "completeness": 5,
16
+ "specificity": 5,
17
+ "contextual_adequacy": 5,
18
+ "consistency": 5,
19
+ "brevity": 5
20
  }
21
  }
22
  ]