Spaces:
Sleeping
Sleeping
Alex
commited on
Commit
Β·
ea6e048
1
Parent(s):
6ec1619
error
Browse files- app.py +42 -33
- leaderboard_data.json +10 -10
app.py
CHANGED
@@ -11,16 +11,22 @@ DEFAULT_MODEL_NAME = "example/model"
|
|
11 |
|
12 |
# --------------- Data models ---------------
|
13 |
class Metrics(BaseModel):
|
14 |
-
readability:
|
15 |
-
relevance:
|
16 |
-
explanation_clarity:
|
17 |
-
problem_identification:
|
18 |
-
actionability:
|
19 |
-
completeness:
|
20 |
-
specificity:
|
21 |
-
contextual_adequacy:
|
22 |
-
consistency:
|
23 |
-
brevity:
|
|
|
|
|
|
|
|
|
|
|
|
|
24 |
|
25 |
|
26 |
class LeaderboardEntry(BaseModel):
|
@@ -85,16 +91,16 @@ def submit_model(
|
|
85 |
llm_pass_1: float,
|
86 |
llm_pass_5: float,
|
87 |
llm_pass_10: float,
|
88 |
-
readability:
|
89 |
-
relevance:
|
90 |
-
explanation_clarity:
|
91 |
-
problem_identification:
|
92 |
-
actionability:
|
93 |
-
completeness:
|
94 |
-
specificity:
|
95 |
-
contextual_adequacy:
|
96 |
-
consistency:
|
97 |
-
brevity:
|
98 |
):
|
99 |
"""Validate and append a new model entry to the leaderboard."""
|
100 |
try:
|
@@ -133,9 +139,12 @@ def submit_model(
|
|
133 |
with gr.Blocks(title="Custom LLM Leaderboard") as demo:
|
134 |
gr.Markdown("""# π LLM Leaderboard\nSubmit your model results below. Leaderboard is sorted by **Pass@1**. """)
|
135 |
|
|
|
|
|
|
|
136 |
leaderboard_df = gr.Dataframe(
|
137 |
-
headers=list(
|
138 |
-
value=
|
139 |
label="Current Leaderboard",
|
140 |
interactive=False,
|
141 |
)
|
@@ -150,18 +159,18 @@ with gr.Blocks(title="Custom LLM Leaderboard") as demo:
|
|
150 |
pass5_inp = gr.Number(label="Pass@5", value=0.0, minimum=0.0, maximum=1.0)
|
151 |
pass10_inp = gr.Number(label="Pass@10", value=0.0, minimum=0.0, maximum=1.0)
|
152 |
|
153 |
-
gr.Markdown("### Multi-metric subjective scores (0
|
154 |
with gr.Row():
|
155 |
-
readability_inp = gr.Slider(minimum=0
|
156 |
-
relevance_inp = gr.Slider(minimum=0
|
157 |
-
explanation_inp = gr.Slider(minimum=0
|
158 |
-
problem_inp = gr.Slider(minimum=0
|
159 |
-
actionability_inp = gr.Slider(minimum=0
|
160 |
-
completeness_inp = gr.Slider(minimum=0
|
161 |
-
specificity_inp = gr.Slider(minimum=0
|
162 |
-
contextual_inp = gr.Slider(minimum=0
|
163 |
-
consistency_inp = gr.Slider(minimum=0
|
164 |
-
brevity_inp = gr.Slider(minimum=0
|
165 |
|
166 |
submit_btn = gr.Button("Submit")
|
167 |
status_markdown = gr.Markdown("")
|
|
|
11 |
|
12 |
# --------------- Data models ---------------
|
13 |
class Metrics(BaseModel):
|
14 |
+
readability: int
|
15 |
+
relevance: int
|
16 |
+
explanation_clarity: int = Field(alias="explanation_clarity")
|
17 |
+
problem_identification: int
|
18 |
+
actionability: int
|
19 |
+
completeness: int
|
20 |
+
specificity: int
|
21 |
+
contextual_adequacy: int
|
22 |
+
consistency: int
|
23 |
+
brevity: int
|
24 |
+
|
25 |
+
@field_validator("readability", "relevance", "explanation_clarity", "problem_identification", "actionability", "completeness", "specificity", "contextual_adequacy", "consistency", "brevity")
|
26 |
+
def metric_range(cls, v: int):
|
27 |
+
if not 0 <= v <= 10:
|
28 |
+
raise ValueError("Multi-metrics should be between 0 and 10")
|
29 |
+
return v
|
30 |
|
31 |
|
32 |
class LeaderboardEntry(BaseModel):
|
|
|
91 |
llm_pass_1: float,
|
92 |
llm_pass_5: float,
|
93 |
llm_pass_10: float,
|
94 |
+
readability: int,
|
95 |
+
relevance: int,
|
96 |
+
explanation_clarity: int,
|
97 |
+
problem_identification: int,
|
98 |
+
actionability: int,
|
99 |
+
completeness: int,
|
100 |
+
specificity: int,
|
101 |
+
contextual_adequacy: int,
|
102 |
+
consistency: int,
|
103 |
+
brevity: int,
|
104 |
):
|
105 |
"""Validate and append a new model entry to the leaderboard."""
|
106 |
try:
|
|
|
139 |
with gr.Blocks(title="Custom LLM Leaderboard") as demo:
|
140 |
gr.Markdown("""# π LLM Leaderboard\nSubmit your model results below. Leaderboard is sorted by **Pass@1**. """)
|
141 |
|
142 |
+
# Initialize table data
|
143 |
+
initial_data = _table_data()
|
144 |
+
|
145 |
leaderboard_df = gr.Dataframe(
|
146 |
+
headers=list(initial_data[0].keys()) if initial_data else ["Model", "BLEU", "Pass@1", "Pass@5", "Pass@10", "Readability", "Relevance", "Explanation Clarity", "Problem Identification", "Actionability", "Completeness", "Specificity", "Contextual Adequacy", "Consistency", "Brevity"],
|
147 |
+
value=initial_data,
|
148 |
label="Current Leaderboard",
|
149 |
interactive=False,
|
150 |
)
|
|
|
159 |
pass5_inp = gr.Number(label="Pass@5", value=0.0, minimum=0.0, maximum=1.0)
|
160 |
pass10_inp = gr.Number(label="Pass@10", value=0.0, minimum=0.0, maximum=1.0)
|
161 |
|
162 |
+
gr.Markdown("### Multi-metric subjective scores (0 β 10)")
|
163 |
with gr.Row():
|
164 |
+
readability_inp = gr.Slider(minimum=0, maximum=10, value=5, step=1, label="Readability")
|
165 |
+
relevance_inp = gr.Slider(minimum=0, maximum=10, value=5, step=1, label="Relevance")
|
166 |
+
explanation_inp = gr.Slider(minimum=0, maximum=10, value=5, step=1, label="Explanation Clarity")
|
167 |
+
problem_inp = gr.Slider(minimum=0, maximum=10, value=5, step=1, label="Problem Identification")
|
168 |
+
actionability_inp = gr.Slider(minimum=0, maximum=10, value=5, step=1, label="Actionability")
|
169 |
+
completeness_inp = gr.Slider(minimum=0, maximum=10, value=5, step=1, label="Completeness")
|
170 |
+
specificity_inp = gr.Slider(minimum=0, maximum=10, value=5, step=1, label="Specificity")
|
171 |
+
contextual_inp = gr.Slider(minimum=0, maximum=10, value=5, step=1, label="Contextual Adequacy")
|
172 |
+
consistency_inp = gr.Slider(minimum=0, maximum=10, value=5, step=1, label="Consistency")
|
173 |
+
brevity_inp = gr.Slider(minimum=0, maximum=10, value=5, step=1, label="Brevity")
|
174 |
|
175 |
submit_btn = gr.Button("Submit")
|
176 |
status_markdown = gr.Markdown("")
|
leaderboard_data.json
CHANGED
@@ -7,16 +7,16 @@
|
|
7 |
"llm_pass_5": 0.5,
|
8 |
"llm_pass_10": 0.5,
|
9 |
"metrics": {
|
10 |
-
"readability":
|
11 |
-
"relevance":
|
12 |
-
"explanation_clarity":
|
13 |
-
"problem_identification":
|
14 |
-
"actionability":
|
15 |
-
"completeness":
|
16 |
-
"specificity":
|
17 |
-
"contextual_adequacy":
|
18 |
-
"consistency":
|
19 |
-
"brevity":
|
20 |
}
|
21 |
}
|
22 |
]
|
|
|
7 |
"llm_pass_5": 0.5,
|
8 |
"llm_pass_10": 0.5,
|
9 |
"metrics": {
|
10 |
+
"readability": 5,
|
11 |
+
"relevance": 5,
|
12 |
+
"explanation_clarity": 5,
|
13 |
+
"problem_identification": 5,
|
14 |
+
"actionability": 5,
|
15 |
+
"completeness": 5,
|
16 |
+
"specificity": 5,
|
17 |
+
"contextual_adequacy": 5,
|
18 |
+
"consistency": 5,
|
19 |
+
"brevity": 5
|
20 |
}
|
21 |
}
|
22 |
]
|