Hao Xu commited on
Commit
30c3967
Β·
1 Parent(s): ed84703

submission functionality update

Browse files
Files changed (2) hide show
  1. app.py +223 -60
  2. community_results.json +0 -3
app.py CHANGED
@@ -2,81 +2,236 @@ import gradio as gr
2
  import os
3
  import json
4
  import pandas as pd
 
 
 
 
 
 
 
 
 
 
 
5
 
6
  def load_data(source):
7
- data = []
8
  if source == "core":
9
- with open("data.json", "r") as file:
10
- data = json.load(file)
11
- elif source == "community":
12
- with open("community_results.json", "r") as file:
13
- data = json.load(file)
 
 
14
  return data
15
 
 
16
  def build_table(source):
17
  data = load_data(source)
18
  entries = []
19
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
  for entry in data:
21
- entries.append({
22
- "Benchmark": entry.get("Benchmark", ""),
23
- "Category": entry.get("Category", ""),
24
- "Pile Dirty (%)": entry.get("Pile Dirty", ""),
25
- "DCLM Dirty (%)": entry.get("DCLM Dirty", ""),
26
- "CC Dirty (%)": entry.get("CC202505 Dirty", ""),
27
- })
28
-
29
- return pd.DataFrame(entries).sort_values(by="Pile Dirty (%)", ascending=False)
30
-
31
- def record_submission(benchmark_name, jsonl_file, hf_path, hf_split, field_name):
32
- if jsonl_file is None and not hf_path:
33
- return "Please provide either a .jsonl file or a HuggingFace dataset path."
34
- if hf_path and not hf_split:
35
- return "Please provide a dataset split for the HuggingFace dataset."
36
- if not field_name:
37
- return "Please provide a field name."
38
-
39
-
40
- entry = {
41
- "name": benchmark_name,
42
- "type": "jsonl" if jsonl_file else "hf",
43
- "path": jsonl_file.name if jsonl_file else hf_path,
44
- "split": hf_split if hf_path else None,
45
- "field_name": field_name,
46
- }
47
-
48
-
49
- queue_file = "pending_submissions.json"
50
- existing = []
51
- if os.path.exists(queue_file):
52
- with open(queue_file, "r") as f:
53
- existing = json.load(f)
54
- existing.append(entry)
55
- with open(queue_file, "w") as f:
56
- json.dump(existing, f, indent=2)
57
-
58
- return "βœ… Submission received! You'll be notified when processing is complete."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
59
 
60
  with gr.Blocks() as interface:
61
  gr.Markdown("# πŸ“– Benchmark Contamination Bulletin")
62
 
63
  with gr.Tabs():
64
- with gr.Tab(label="Leaderboard"):
65
-
66
  source_radio = gr.Radio(
67
  choices=["core", "community"],
68
  label="Select Benchmark Source",
69
  value="core"
70
  )
71
 
72
- table_columns = ["Benchmark", "Category", "Pile Dirty (%)", "DCLM Dirty (%)", "CC202505 Dirty (%)"]
73
- leaderboard_table = gr.Dataframe(
74
- value=build_table("core"),
75
- headers=table_columns,
76
- interactive=False,
77
- wrap=True,
78
- label="Dirty Rates"
79
- )
80
 
81
  def update_table(source):
82
  return build_table(source)
@@ -84,14 +239,22 @@ with gr.Blocks() as interface:
84
  source_radio.change(
85
  fn=update_table,
86
  inputs=source_radio,
87
- outputs=leaderboard_table
88
  )
89
 
90
- # Submission Tab
91
- with gr.Tab(label="Submission"):
92
- gr.Markdown("## Submit Your Dataset for Contamination Checking")
 
 
 
 
 
 
93
 
94
- benchmark_name_input = gr.Textbox(label="Benchmark Name")
 
 
95
 
96
  with gr.Row():
97
  jsonl_input = gr.File(label="Upload .jsonl File", file_types=[".jsonl"])
@@ -105,7 +268,7 @@ with gr.Blocks() as interface:
105
 
106
  submit_button.click(
107
  fn=record_submission,
108
- inputs=[benchmark_name_input, jsonl_input, hf_path_input, hf_split_input, field_name_input],
109
  outputs=result_output
110
  )
111
 
 
2
  import os
3
  import json
4
  import pandas as pd
5
+ from huggingface_hub import HfApi, hf_hub_download
6
+ from datasets import load_dataset
7
+ import requests
8
+
9
+ TOKEN = os.environ.get("HF_TOKEN")
10
+ OWNER = os.environ.get("OWNER")
11
+ RESULTS_COMMUNITY = f"{OWNER}/benchmark_results"
12
+ api = HfApi()
13
+
14
+ URL = os.environ.get("URL")
15
+
16
 
17
  def load_data(source):
 
18
  if source == "core":
19
+ with open("data.json", "r") as f:
20
+ data = json.load(f)
21
+ else:
22
+ ds = load_dataset(RESULTS_COMMUNITY, split='train')
23
+ data = []
24
+ for entry in ds:
25
+ data.append(entry)
26
  return data
27
 
28
+
29
  def build_table(source):
30
  data = load_data(source)
31
  entries = []
32
 
33
+ if source == "core":
34
+ headers = ["Benchmark", "Category", "Pile-train Dirty (%)", "DCLM-baseline Dirty (%)", "CC-2025-05 Dirty (%)"]
35
+ else:
36
+ headers = ["Benchmark", "Contributor", "Pile-train Dirty (%)", "DCLM-baseline Dirty (%)", "CC-2025-05 Dirty (%)"]
37
+
38
+ html = """
39
+ <table id="benchmarkTable" style="border-collapse: collapse; width: 100%;">
40
+ <thead><tr>
41
+ """
42
+ for col in headers:
43
+ html += f'<th style="border: 1px solid #ddd; padding: 8px; text-align: right;" onclick="sortTable(this)">{col} <span class="triangle"></span></th>'
44
+ html += '</tr></thead>\n<tbody>\n'
45
+
46
  for entry in data:
47
+ name = entry.get("Benchmark", "")
48
+ url = entry.get("URL", "#")
49
+ hyperlink = f'<a href="{url}" target="_blank">{name}</a>'
50
+
51
+ row = {
52
+ "Benchmark": hyperlink,
53
+ "Pile-train Dirty (%)": entry.get("Pile Dirty", -1),
54
+ "DCLM-baseline Dirty (%)": entry.get("DCLM Dirty", -1),
55
+ "CC-2025-05 Dirty (%)": entry.get("CC202505 Dirty", -1),
56
+ }
57
+
58
+ if source == "core":
59
+ row["Category"] = entry.get("Category", "")
60
+ elif source == "community":
61
+ row["Contributor"] = entry.get("Contributor", "")
62
+
63
+ html += "<tr>"
64
+ for col in headers:
65
+ val = row.get(col, "")
66
+ if isinstance(val, float) and val >= 0:
67
+ val = f"{val:5.1f}"
68
+ elif isinstance(val, float):
69
+ val = "N/A"
70
+ html += f'<td style="border: 1px solid #ddd; padding: 8px; text-align: right;">{val}</td>'
71
+ html += "</tr>\n"
72
+
73
+ html += "</tbody></table>"
74
+
75
+ html += """
76
+ <script>
77
+ let sortDirection = {};
78
+
79
+ function sortTable(header) {
80
+ var table = document.getElementById("benchmarkTable");
81
+ var rows = Array.from(table.rows).slice(1);
82
+ var columnIndex = Array.from(header.parentNode.children).indexOf(header);
83
+ var isAscending = sortDirection[columnIndex] === 'ascending';
84
+
85
+ sortDirection[columnIndex] = isAscending ? 'descending' : 'ascending';
86
+
87
+ var allHeaders = header.parentNode.children;
88
+ Array.from(allHeaders).forEach(th => {
89
+ th.querySelector('.triangle').classList.remove('ascending', 'descending');
90
+ });
91
+
92
+ header.querySelector('.triangle').classList.add(sortDirection[columnIndex]);
93
+
94
+ rows.sort(function(rowA, rowB) {
95
+ var cellA = rowA.cells[columnIndex].innerText;
96
+ var cellB = rowB.cells[columnIndex].innerText;
97
+
98
+ if (isNaN(cellA)) {
99
+ return isAscending ? cellA.localeCompare(cellB) : cellB.localeCompare(cellA);
100
+ }
101
+ return isAscending ? parseFloat(cellA) - parseFloat(cellB) : parseFloat(cellB) - parseFloat(cellA);
102
+ });
103
+
104
+ for (var i = 0; i < rows.length; i++) {
105
+ table.appendChild(rows[i]);
106
+ }
107
+ }
108
+ </script>
109
+ """
110
+
111
+ html += """
112
+ <style>
113
+ .triangle {
114
+ display: inline-block;
115
+ width: 0;
116
+ height: 0;
117
+ border-left: 5px solid transparent;
118
+ border-right: 5px solid transparent;
119
+ margin-left: 5px;
120
+ transition: transform 0.2s;
121
+ }
122
+ .ascending {
123
+ border-bottom: 5px solid #000;
124
+ }
125
+ .descending {
126
+ border-top: 5px solid #000;
127
+ }
128
+ </style>
129
+ """
130
+
131
+ return html
132
+
133
+
134
+ def record_submission(benchmark_name, contributor, jsonl_file, hf_path, hf_split, field_name):
135
+ if not benchmark_name or not benchmark_name.strip():
136
+ return "❌ Please provide a benchmark name."
137
+
138
+ if not field_name or not field_name.strip():
139
+ return "❌ Please provide a field name."
140
+
141
+ has_jsonl = jsonl_file is not None
142
+ has_hf = hf_path and hf_path.strip()
143
+
144
+ if not has_jsonl and not has_hf:
145
+ return "❌ Please provide either a .jsonl file or a HuggingFace dataset path."
146
+
147
+ if has_jsonl:
148
+ try:
149
+ with open(jsonl_file.name, 'r', encoding='utf-8') as f:
150
+ line_count = 0
151
+ for line in f:
152
+ line_count += 1
153
+ if line_count > 10:
154
+ break
155
+
156
+ try:
157
+ entry = json.loads(line.strip())
158
+ if field_name.strip() not in entry:
159
+ available_fields = list(entry.keys())
160
+ return f"❌ Field '{field_name.strip()}' not found in JSONL file. Available fields: {', '.join(available_fields)}"
161
+ except json.JSONDecodeError as e:
162
+ return f"❌ Invalid JSON format in line {line_count}: {str(e)}"
163
+
164
+ if line_count == 0:
165
+ return "❌ The uploaded file is empty."
166
+
167
+ except Exception as e:
168
+ return f"❌ Error reading file: {str(e)}"
169
+ elif has_hf:
170
+ if not hf_split or not hf_split.strip():
171
+ return "❌ Please provide a dataset split for the HuggingFace dataset."
172
+
173
+ try:
174
+ dataset_info = load_dataset(hf_path.strip(), split=hf_split.strip(), streaming=True, trust_remote_code=True)
175
+ first_item = next(iter(dataset_info))
176
+ if field_name.strip() not in first_item:
177
+ available_fields = list(first_item.keys())
178
+ return f"❌ Field '{field_name.strip()}' not found in dataset. Available fields: {', '.join(available_fields)}"
179
+ except Exception as e:
180
+ return f"❌ Could not access HuggingFace dataset: {str(e)}"
181
+
182
+ try:
183
+ data = {
184
+ 'name': benchmark_name.strip(),
185
+ 'contributor': contributor.strip(),
186
+ 'type': 'jsonl' if has_jsonl else 'hf',
187
+ 'split': hf_split.strip() if has_hf else '',
188
+ 'field_name': field_name.strip(),
189
+ 'hf_path': hf_path.strip() if has_hf else ''
190
+ }
191
+
192
+ files = {}
193
+ if has_jsonl:
194
+ files['file'] = (benchmark_name.strip() + '.jsonl', open(jsonl_file.name, 'rb'), 'application/json')
195
+
196
+ response = requests.post(URL + "/submit", data=data, files=files)
197
+
198
+ if files:
199
+ files['file'][1].close()
200
+
201
+ if response.status_code == 200:
202
+ result = response.json()
203
+ if result.get("status") == "success":
204
+ message = result.get('message', 'Submission successful!')
205
+
206
+ full_message = f"{message}\n\n" \
207
+ f"πŸ“‹ Your submission has been saved and will be processed automatically.\n" \
208
+ f"πŸ’‘ Results will appear in the main leaderboard when ready.\n" \
209
+ f"πŸ”„ You can refresh the leaderboard to check for updates."
210
+
211
+ return full_message
212
+ elif result.get("status") == "info":
213
+ return f"ℹ️ {result.get('message', 'Submission already exists')}"
214
+ else:
215
+ return f"❌ {result.get('message', 'Unknown error occurred')}"
216
+ else:
217
+ return f"❌ Server error: {response.status_code} - {response.text}"
218
+
219
+ except Exception as e:
220
+ return f"❌ Error submitting benchmark: {str(e)}"
221
+
222
 
223
  with gr.Blocks() as interface:
224
  gr.Markdown("# πŸ“– Benchmark Contamination Bulletin")
225
 
226
  with gr.Tabs():
227
+ with gr.Tab(label="Bulletin"):
 
228
  source_radio = gr.Radio(
229
  choices=["core", "community"],
230
  label="Select Benchmark Source",
231
  value="core"
232
  )
233
 
234
+ leaderboard_html = gr.HTML(build_table("core"))
 
 
 
 
 
 
 
235
 
236
  def update_table(source):
237
  return build_table(source)
 
239
  source_radio.change(
240
  fn=update_table,
241
  inputs=source_radio,
242
+ outputs=leaderboard_html
243
  )
244
 
245
+ refresh_button = gr.Button("Refresh")
246
+ refresh_button.click(
247
+ fn=update_table,
248
+ inputs=source_radio,
249
+ outputs=leaderboard_html
250
+ )
251
+
252
+ with gr.Tab(label="Add New Benchmarks"):
253
+ gr.Markdown("## Add Your Own Benchmarks for Contamination Checking")
254
 
255
+ with gr.Row():
256
+ benchmark_name_input = gr.Textbox(label="Benchmark Name")
257
+ contributor_input = gr.Textbox(label="Contributor")
258
 
259
  with gr.Row():
260
  jsonl_input = gr.File(label="Upload .jsonl File", file_types=[".jsonl"])
 
268
 
269
  submit_button.click(
270
  fn=record_submission,
271
+ inputs=[benchmark_name_input, contributor_input, jsonl_input, hf_path_input, hf_split_input, field_name_input],
272
  outputs=result_output
273
  )
274
 
community_results.json DELETED
@@ -1,3 +0,0 @@
1
- [
2
-
3
- ]