Hao Xu
commited on
Commit
Β·
30c3967
1
Parent(s):
ed84703
submission functionality update
Browse files- app.py +223 -60
- community_results.json +0 -3
app.py
CHANGED
@@ -2,81 +2,236 @@ import gradio as gr
|
|
2 |
import os
|
3 |
import json
|
4 |
import pandas as pd
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5 |
|
6 |
def load_data(source):
|
7 |
-
data = []
|
8 |
if source == "core":
|
9 |
-
with open("data.json", "r") as
|
10 |
-
data = json.load(
|
11 |
-
|
12 |
-
|
13 |
-
|
|
|
|
|
14 |
return data
|
15 |
|
|
|
16 |
def build_table(source):
|
17 |
data = load_data(source)
|
18 |
entries = []
|
19 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
20 |
for entry in data:
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
"
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
"
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
59 |
|
60 |
with gr.Blocks() as interface:
|
61 |
gr.Markdown("# π Benchmark Contamination Bulletin")
|
62 |
|
63 |
with gr.Tabs():
|
64 |
-
with gr.Tab(label="
|
65 |
-
|
66 |
source_radio = gr.Radio(
|
67 |
choices=["core", "community"],
|
68 |
label="Select Benchmark Source",
|
69 |
value="core"
|
70 |
)
|
71 |
|
72 |
-
|
73 |
-
leaderboard_table = gr.Dataframe(
|
74 |
-
value=build_table("core"),
|
75 |
-
headers=table_columns,
|
76 |
-
interactive=False,
|
77 |
-
wrap=True,
|
78 |
-
label="Dirty Rates"
|
79 |
-
)
|
80 |
|
81 |
def update_table(source):
|
82 |
return build_table(source)
|
@@ -84,14 +239,22 @@ with gr.Blocks() as interface:
|
|
84 |
source_radio.change(
|
85 |
fn=update_table,
|
86 |
inputs=source_radio,
|
87 |
-
outputs=
|
88 |
)
|
89 |
|
90 |
-
|
91 |
-
|
92 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
93 |
|
94 |
-
|
|
|
|
|
95 |
|
96 |
with gr.Row():
|
97 |
jsonl_input = gr.File(label="Upload .jsonl File", file_types=[".jsonl"])
|
@@ -105,7 +268,7 @@ with gr.Blocks() as interface:
|
|
105 |
|
106 |
submit_button.click(
|
107 |
fn=record_submission,
|
108 |
-
inputs=[benchmark_name_input, jsonl_input, hf_path_input, hf_split_input, field_name_input],
|
109 |
outputs=result_output
|
110 |
)
|
111 |
|
|
|
2 |
import os
|
3 |
import json
|
4 |
import pandas as pd
|
5 |
+
from huggingface_hub import HfApi, hf_hub_download
|
6 |
+
from datasets import load_dataset
|
7 |
+
import requests
|
8 |
+
|
9 |
+
TOKEN = os.environ.get("HF_TOKEN")
|
10 |
+
OWNER = os.environ.get("OWNER")
|
11 |
+
RESULTS_COMMUNITY = f"{OWNER}/benchmark_results"
|
12 |
+
api = HfApi()
|
13 |
+
|
14 |
+
URL = os.environ.get("URL")
|
15 |
+
|
16 |
|
17 |
def load_data(source):
|
|
|
18 |
if source == "core":
|
19 |
+
with open("data.json", "r") as f:
|
20 |
+
data = json.load(f)
|
21 |
+
else:
|
22 |
+
ds = load_dataset(RESULTS_COMMUNITY, split='train')
|
23 |
+
data = []
|
24 |
+
for entry in ds:
|
25 |
+
data.append(entry)
|
26 |
return data
|
27 |
|
28 |
+
|
29 |
def build_table(source):
|
30 |
data = load_data(source)
|
31 |
entries = []
|
32 |
|
33 |
+
if source == "core":
|
34 |
+
headers = ["Benchmark", "Category", "Pile-train Dirty (%)", "DCLM-baseline Dirty (%)", "CC-2025-05 Dirty (%)"]
|
35 |
+
else:
|
36 |
+
headers = ["Benchmark", "Contributor", "Pile-train Dirty (%)", "DCLM-baseline Dirty (%)", "CC-2025-05 Dirty (%)"]
|
37 |
+
|
38 |
+
html = """
|
39 |
+
<table id="benchmarkTable" style="border-collapse: collapse; width: 100%;">
|
40 |
+
<thead><tr>
|
41 |
+
"""
|
42 |
+
for col in headers:
|
43 |
+
html += f'<th style="border: 1px solid #ddd; padding: 8px; text-align: right;" onclick="sortTable(this)">{col} <span class="triangle"></span></th>'
|
44 |
+
html += '</tr></thead>\n<tbody>\n'
|
45 |
+
|
46 |
for entry in data:
|
47 |
+
name = entry.get("Benchmark", "")
|
48 |
+
url = entry.get("URL", "#")
|
49 |
+
hyperlink = f'<a href="{url}" target="_blank">{name}</a>'
|
50 |
+
|
51 |
+
row = {
|
52 |
+
"Benchmark": hyperlink,
|
53 |
+
"Pile-train Dirty (%)": entry.get("Pile Dirty", -1),
|
54 |
+
"DCLM-baseline Dirty (%)": entry.get("DCLM Dirty", -1),
|
55 |
+
"CC-2025-05 Dirty (%)": entry.get("CC202505 Dirty", -1),
|
56 |
+
}
|
57 |
+
|
58 |
+
if source == "core":
|
59 |
+
row["Category"] = entry.get("Category", "")
|
60 |
+
elif source == "community":
|
61 |
+
row["Contributor"] = entry.get("Contributor", "")
|
62 |
+
|
63 |
+
html += "<tr>"
|
64 |
+
for col in headers:
|
65 |
+
val = row.get(col, "")
|
66 |
+
if isinstance(val, float) and val >= 0:
|
67 |
+
val = f"{val:5.1f}"
|
68 |
+
elif isinstance(val, float):
|
69 |
+
val = "N/A"
|
70 |
+
html += f'<td style="border: 1px solid #ddd; padding: 8px; text-align: right;">{val}</td>'
|
71 |
+
html += "</tr>\n"
|
72 |
+
|
73 |
+
html += "</tbody></table>"
|
74 |
+
|
75 |
+
html += """
|
76 |
+
<script>
|
77 |
+
let sortDirection = {};
|
78 |
+
|
79 |
+
function sortTable(header) {
|
80 |
+
var table = document.getElementById("benchmarkTable");
|
81 |
+
var rows = Array.from(table.rows).slice(1);
|
82 |
+
var columnIndex = Array.from(header.parentNode.children).indexOf(header);
|
83 |
+
var isAscending = sortDirection[columnIndex] === 'ascending';
|
84 |
+
|
85 |
+
sortDirection[columnIndex] = isAscending ? 'descending' : 'ascending';
|
86 |
+
|
87 |
+
var allHeaders = header.parentNode.children;
|
88 |
+
Array.from(allHeaders).forEach(th => {
|
89 |
+
th.querySelector('.triangle').classList.remove('ascending', 'descending');
|
90 |
+
});
|
91 |
+
|
92 |
+
header.querySelector('.triangle').classList.add(sortDirection[columnIndex]);
|
93 |
+
|
94 |
+
rows.sort(function(rowA, rowB) {
|
95 |
+
var cellA = rowA.cells[columnIndex].innerText;
|
96 |
+
var cellB = rowB.cells[columnIndex].innerText;
|
97 |
+
|
98 |
+
if (isNaN(cellA)) {
|
99 |
+
return isAscending ? cellA.localeCompare(cellB) : cellB.localeCompare(cellA);
|
100 |
+
}
|
101 |
+
return isAscending ? parseFloat(cellA) - parseFloat(cellB) : parseFloat(cellB) - parseFloat(cellA);
|
102 |
+
});
|
103 |
+
|
104 |
+
for (var i = 0; i < rows.length; i++) {
|
105 |
+
table.appendChild(rows[i]);
|
106 |
+
}
|
107 |
+
}
|
108 |
+
</script>
|
109 |
+
"""
|
110 |
+
|
111 |
+
html += """
|
112 |
+
<style>
|
113 |
+
.triangle {
|
114 |
+
display: inline-block;
|
115 |
+
width: 0;
|
116 |
+
height: 0;
|
117 |
+
border-left: 5px solid transparent;
|
118 |
+
border-right: 5px solid transparent;
|
119 |
+
margin-left: 5px;
|
120 |
+
transition: transform 0.2s;
|
121 |
+
}
|
122 |
+
.ascending {
|
123 |
+
border-bottom: 5px solid #000;
|
124 |
+
}
|
125 |
+
.descending {
|
126 |
+
border-top: 5px solid #000;
|
127 |
+
}
|
128 |
+
</style>
|
129 |
+
"""
|
130 |
+
|
131 |
+
return html
|
132 |
+
|
133 |
+
|
134 |
+
def record_submission(benchmark_name, contributor, jsonl_file, hf_path, hf_split, field_name):
|
135 |
+
if not benchmark_name or not benchmark_name.strip():
|
136 |
+
return "β Please provide a benchmark name."
|
137 |
+
|
138 |
+
if not field_name or not field_name.strip():
|
139 |
+
return "β Please provide a field name."
|
140 |
+
|
141 |
+
has_jsonl = jsonl_file is not None
|
142 |
+
has_hf = hf_path and hf_path.strip()
|
143 |
+
|
144 |
+
if not has_jsonl and not has_hf:
|
145 |
+
return "β Please provide either a .jsonl file or a HuggingFace dataset path."
|
146 |
+
|
147 |
+
if has_jsonl:
|
148 |
+
try:
|
149 |
+
with open(jsonl_file.name, 'r', encoding='utf-8') as f:
|
150 |
+
line_count = 0
|
151 |
+
for line in f:
|
152 |
+
line_count += 1
|
153 |
+
if line_count > 10:
|
154 |
+
break
|
155 |
+
|
156 |
+
try:
|
157 |
+
entry = json.loads(line.strip())
|
158 |
+
if field_name.strip() not in entry:
|
159 |
+
available_fields = list(entry.keys())
|
160 |
+
return f"β Field '{field_name.strip()}' not found in JSONL file. Available fields: {', '.join(available_fields)}"
|
161 |
+
except json.JSONDecodeError as e:
|
162 |
+
return f"β Invalid JSON format in line {line_count}: {str(e)}"
|
163 |
+
|
164 |
+
if line_count == 0:
|
165 |
+
return "β The uploaded file is empty."
|
166 |
+
|
167 |
+
except Exception as e:
|
168 |
+
return f"β Error reading file: {str(e)}"
|
169 |
+
elif has_hf:
|
170 |
+
if not hf_split or not hf_split.strip():
|
171 |
+
return "β Please provide a dataset split for the HuggingFace dataset."
|
172 |
+
|
173 |
+
try:
|
174 |
+
dataset_info = load_dataset(hf_path.strip(), split=hf_split.strip(), streaming=True, trust_remote_code=True)
|
175 |
+
first_item = next(iter(dataset_info))
|
176 |
+
if field_name.strip() not in first_item:
|
177 |
+
available_fields = list(first_item.keys())
|
178 |
+
return f"β Field '{field_name.strip()}' not found in dataset. Available fields: {', '.join(available_fields)}"
|
179 |
+
except Exception as e:
|
180 |
+
return f"β Could not access HuggingFace dataset: {str(e)}"
|
181 |
+
|
182 |
+
try:
|
183 |
+
data = {
|
184 |
+
'name': benchmark_name.strip(),
|
185 |
+
'contributor': contributor.strip(),
|
186 |
+
'type': 'jsonl' if has_jsonl else 'hf',
|
187 |
+
'split': hf_split.strip() if has_hf else '',
|
188 |
+
'field_name': field_name.strip(),
|
189 |
+
'hf_path': hf_path.strip() if has_hf else ''
|
190 |
+
}
|
191 |
+
|
192 |
+
files = {}
|
193 |
+
if has_jsonl:
|
194 |
+
files['file'] = (benchmark_name.strip() + '.jsonl', open(jsonl_file.name, 'rb'), 'application/json')
|
195 |
+
|
196 |
+
response = requests.post(URL + "/submit", data=data, files=files)
|
197 |
+
|
198 |
+
if files:
|
199 |
+
files['file'][1].close()
|
200 |
+
|
201 |
+
if response.status_code == 200:
|
202 |
+
result = response.json()
|
203 |
+
if result.get("status") == "success":
|
204 |
+
message = result.get('message', 'Submission successful!')
|
205 |
+
|
206 |
+
full_message = f"{message}\n\n" \
|
207 |
+
f"π Your submission has been saved and will be processed automatically.\n" \
|
208 |
+
f"π‘ Results will appear in the main leaderboard when ready.\n" \
|
209 |
+
f"π You can refresh the leaderboard to check for updates."
|
210 |
+
|
211 |
+
return full_message
|
212 |
+
elif result.get("status") == "info":
|
213 |
+
return f"βΉοΈ {result.get('message', 'Submission already exists')}"
|
214 |
+
else:
|
215 |
+
return f"β {result.get('message', 'Unknown error occurred')}"
|
216 |
+
else:
|
217 |
+
return f"β Server error: {response.status_code} - {response.text}"
|
218 |
+
|
219 |
+
except Exception as e:
|
220 |
+
return f"β Error submitting benchmark: {str(e)}"
|
221 |
+
|
222 |
|
223 |
with gr.Blocks() as interface:
|
224 |
gr.Markdown("# π Benchmark Contamination Bulletin")
|
225 |
|
226 |
with gr.Tabs():
|
227 |
+
with gr.Tab(label="Bulletin"):
|
|
|
228 |
source_radio = gr.Radio(
|
229 |
choices=["core", "community"],
|
230 |
label="Select Benchmark Source",
|
231 |
value="core"
|
232 |
)
|
233 |
|
234 |
+
leaderboard_html = gr.HTML(build_table("core"))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
235 |
|
236 |
def update_table(source):
|
237 |
return build_table(source)
|
|
|
239 |
source_radio.change(
|
240 |
fn=update_table,
|
241 |
inputs=source_radio,
|
242 |
+
outputs=leaderboard_html
|
243 |
)
|
244 |
|
245 |
+
refresh_button = gr.Button("Refresh")
|
246 |
+
refresh_button.click(
|
247 |
+
fn=update_table,
|
248 |
+
inputs=source_radio,
|
249 |
+
outputs=leaderboard_html
|
250 |
+
)
|
251 |
+
|
252 |
+
with gr.Tab(label="Add New Benchmarks"):
|
253 |
+
gr.Markdown("## Add Your Own Benchmarks for Contamination Checking")
|
254 |
|
255 |
+
with gr.Row():
|
256 |
+
benchmark_name_input = gr.Textbox(label="Benchmark Name")
|
257 |
+
contributor_input = gr.Textbox(label="Contributor")
|
258 |
|
259 |
with gr.Row():
|
260 |
jsonl_input = gr.File(label="Upload .jsonl File", file_types=[".jsonl"])
|
|
|
268 |
|
269 |
submit_button.click(
|
270 |
fn=record_submission,
|
271 |
+
inputs=[benchmark_name_input, contributor_input, jsonl_input, hf_path_input, hf_split_input, field_name_input],
|
272 |
outputs=result_output
|
273 |
)
|
274 |
|
community_results.json
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
[
|
2 |
-
|
3 |
-
]
|
|
|
|
|
|
|
|