Alex commited on
Commit
1125184
Β·
1 Parent(s): 313559c
Files changed (2) hide show
  1. main.py +166 -0
  2. requirements.txt +2 -1
main.py ADDED
@@ -0,0 +1,166 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import List
2
+
3
+ import os
4
+
5
+ import pandas as pd
6
+ from fastapi import FastAPI, HTTPException
7
+ from pydantic import BaseModel, Field, validator
8
+ import gradio as gr
9
+
10
+ # -----------------------------------------------------------------------------
11
+ # Constants
12
+ # -----------------------------------------------------------------------------
13
+ CSV_PATH = os.getenv("LEADERBOARD_CSV", "leaderboard.csv")
14
+ LEADERBOARD_COLUMNS = [
15
+ "model_name",
16
+ "bleu",
17
+ "multimetric",
18
+ # individual multimetric dimensions
19
+ "readability",
20
+ "relevance",
21
+ "explanation_clarity",
22
+ "problem_identification",
23
+ "actionability",
24
+ "completeness",
25
+ "specificity",
26
+ "contextual_adequacy",
27
+ "consistency",
28
+ "brevity",
29
+ # exact-match metrics
30
+ "pass_at_1",
31
+ "pass_at_5",
32
+ "pass_at_10",
33
+ ]
34
+
35
+
36
+ # -----------------------------------------------------------------------------
37
+ # Pydantic schema for incoming submissions
38
+ # -----------------------------------------------------------------------------
39
+ class Submission(BaseModel):
40
+ model_name: str = Field(..., description="Arbitrary display name for the submission")
41
+
42
+ # automatic metric
43
+ bleu: float = Field(..., ge=0, description="BLEU score (0-100)")
44
+
45
+ # ten subjective dimensions
46
+ readability: int = Field(..., ge=0, le=5)
47
+ relevance: int = Field(..., ge=0, le=5)
48
+ explanation_clarity: int = Field(..., ge=0, le=5)
49
+ problem_identification: int = Field(..., ge=0, le=5)
50
+ actionability: int = Field(..., ge=0, le=5)
51
+ completeness: int = Field(..., ge=0, le=5)
52
+ specificity: int = Field(..., ge=0, le=5)
53
+ contextual_adequacy: int = Field(..., ge=0, le=5)
54
+ consistency: int = Field(..., ge=0, le=5)
55
+ brevity: int = Field(..., ge=0, le=5)
56
+
57
+ # exact-match pass@k
58
+ pass_at_1: float = Field(..., ge=0, le=1)
59
+ pass_at_5: float = Field(..., ge=0, le=1)
60
+ pass_at_10: float = Field(..., ge=0, le=1)
61
+
62
+ @validator("pass_at_5")
63
+ def pass5_ge_pass1(cls, v, values):
64
+ if "pass_at_1" in values and v < values["pass_at_1"]:
65
+ raise ValueError("pass@5 must be >= pass@1")
66
+ return v
67
+
68
+ @validator("pass_at_10")
69
+ def pass10_ge_pass5(cls, v, values):
70
+ if "pass_at_5" in values and v < values["pass_at_5"]:
71
+ raise ValueError("pass@10 must be >= pass@5")
72
+ return v
73
+
74
+ # computed property (not part of submission payload)
75
+ def compute_multimetric(self) -> float:
76
+ fields = [
77
+ self.readability,
78
+ self.relevance,
79
+ self.explanation_clarity,
80
+ self.problem_identification,
81
+ self.actionability,
82
+ self.completeness,
83
+ self.specificity,
84
+ self.contextual_adequacy,
85
+ self.consistency,
86
+ self.brevity,
87
+ ]
88
+ return float(sum(fields)) / len(fields)
89
+
90
+
91
+ # -----------------------------------------------------------------------------
92
+ # Helpers
93
+ # -----------------------------------------------------------------------------
94
+
95
+ def _init_storage(csv_path: str):
96
+ """Ensure the CSV exists with the correct header"""
97
+ if not os.path.exists(csv_path):
98
+ df = pd.DataFrame(columns=LEADERBOARD_COLUMNS)
99
+ df.to_csv(csv_path, index=False)
100
+
101
+
102
+ def _load_leaderboard() -> pd.DataFrame:
103
+ _init_storage(CSV_PATH)
104
+ df = pd.read_csv(CSV_PATH)
105
+ # sort descending by Pass@1
106
+ if not df.empty and "pass_at_1" in df.columns:
107
+ df = df.sort_values("pass_at_1", ascending=False)
108
+ return df
109
+
110
+
111
+ def _save_submission(sub: Submission):
112
+ _init_storage(CSV_PATH)
113
+ df = pd.read_csv(CSV_PATH)
114
+
115
+ # Remove previous entry for the same model (if any)
116
+ df = df[df["model_name"] != sub.model_name]
117
+
118
+ # Compose new row
119
+ record = sub.dict()
120
+ record["multimetric"] = sub.compute_multimetric()
121
+ df = pd.concat([df, pd.DataFrame([record])], ignore_index=True)
122
+
123
+ # keep ordering of columns
124
+ df = df[LEADERBOARD_COLUMNS]
125
+ df.to_csv(CSV_PATH, index=False)
126
+
127
+
128
+ # -----------------------------------------------------------------------------
129
+ # FastAPI backend
130
+ # -----------------------------------------------------------------------------
131
+ api = FastAPI(title="Leaderboard API", version="0.1.0")
132
+
133
+
134
+ @api.post("/submit", tags=["submission"])
135
+ async def submit_results(payload: Submission):
136
+ """Receive a new result entry and persist it."""
137
+ try:
138
+ _save_submission(payload)
139
+ return {"status": "ok", "detail": "Submission stored."}
140
+ except Exception as e:
141
+ raise HTTPException(status_code=400, detail=str(e))
142
+
143
+
144
+ @api.get("/leaderboard", tags=["leaderboard"])
145
+ async def get_leaderboard():
146
+ """Return the current leaderboard as JSON (sorted by Pass@1)."""
147
+ return _load_leaderboard().to_dict(orient="records")
148
+
149
+
150
+ # -----------------------------------------------------------------------------
151
+ # Gradio frontend
152
+ # -----------------------------------------------------------------------------
153
+
154
+ def _load_leaderboard_df():
155
+ return _load_leaderboard()
156
+
157
+
158
+ with gr.Blocks(title="πŸ“Š Leaderboard") as demo:
159
+ gr.Markdown("# πŸ“Š Leaderboard β€” sorted by **LLM-based exact-match Pass@1**")
160
+ df_component = gr.Dataframe(value=_load_leaderboard_df(), interactive=False, wrap=True)
161
+ refresh_btn = gr.Button("πŸ”„ Refresh")
162
+ refresh_btn.click(lambda: _load_leaderboard_df(), outputs=df_component)
163
+
164
+
165
+ # Mount gradio under "/"
166
+ app = gr.mount_gradio_app(api, demo, path="/")
requirements.txt CHANGED
@@ -13,4 +13,5 @@ python-dateutil
13
  tqdm
14
  transformers
15
  tokenizers>=0.15.0
16
- sentencepiece
 
 
13
  tqdm
14
  transformers
15
  tokenizers>=0.15.0
16
+ sentencepiece
17
+ fastapi