Hao Xu
commited on
Commit
Β·
04cf55a
1
Parent(s):
73b542b
handling datasets with config
Browse files
app.py
CHANGED
@@ -137,7 +137,7 @@ def build_table(source, refresh=False):
|
|
137 |
return html
|
138 |
|
139 |
|
140 |
-
def record_submission(benchmark_name, contributor, jsonl_file, hf_path, hf_split, field_name):
|
141 |
if not benchmark_name or not benchmark_name.strip():
|
142 |
return "β Please provide a benchmark name."
|
143 |
|
@@ -177,7 +177,10 @@ def record_submission(benchmark_name, contributor, jsonl_file, hf_path, hf_split
|
|
177 |
return "β Please provide a dataset split for the HuggingFace dataset."
|
178 |
|
179 |
try:
|
180 |
-
|
|
|
|
|
|
|
181 |
first_item = next(iter(dataset_info))
|
182 |
if field_name.strip() not in first_item:
|
183 |
available_fields = list(first_item.keys())
|
@@ -303,6 +306,7 @@ with gr.Blocks() as interface:
|
|
303 |
with gr.Column():
|
304 |
hf_path_input = gr.Textbox(label="HuggingFace Dataset Path", placeholder="e.g., author/benchmark-name")
|
305 |
hf_split_input = gr.Textbox(label="Dataset split (only if providing HuggingFace Dataset)", placeholder="e.g., validation, test")
|
|
|
306 |
field_name_input = gr.Textbox(label="Context or Question Field Name", placeholder="e.g., context, question, ...")
|
307 |
|
308 |
submit_button = gr.Button("Submit for Contamination Check")
|
@@ -310,7 +314,7 @@ with gr.Blocks() as interface:
|
|
310 |
|
311 |
submit_button.click(
|
312 |
fn=record_submission,
|
313 |
-
inputs=[benchmark_name_input, contributor_input, jsonl_input, hf_path_input, hf_split_input, field_name_input],
|
314 |
outputs=result_output
|
315 |
)
|
316 |
|
|
|
137 |
return html
|
138 |
|
139 |
|
140 |
+
def record_submission(benchmark_name, contributor, jsonl_file, hf_path, hf_split, field_name, hf_config):
|
141 |
if not benchmark_name or not benchmark_name.strip():
|
142 |
return "β Please provide a benchmark name."
|
143 |
|
|
|
177 |
return "β Please provide a dataset split for the HuggingFace dataset."
|
178 |
|
179 |
try:
|
180 |
+
if hf_config and hf_config.strip():
|
181 |
+
dataset_info = load_dataset(hf_path.strip(), hf_config.strip(), split=hf_split.strip(), streaming=True, trust_remote_code=True)
|
182 |
+
else:
|
183 |
+
dataset_info = load_dataset(hf_path.strip(), split=hf_split.strip(), streaming=True, trust_remote_code=True)
|
184 |
first_item = next(iter(dataset_info))
|
185 |
if field_name.strip() not in first_item:
|
186 |
available_fields = list(first_item.keys())
|
|
|
306 |
with gr.Column():
|
307 |
hf_path_input = gr.Textbox(label="HuggingFace Dataset Path", placeholder="e.g., author/benchmark-name")
|
308 |
hf_split_input = gr.Textbox(label="Dataset split (only if providing HuggingFace Dataset)", placeholder="e.g., validation, test")
|
309 |
+
hf_config_input = gr.Textbox(label="Dataset Config (optional)", placeholder="name of dataset config")
|
310 |
field_name_input = gr.Textbox(label="Context or Question Field Name", placeholder="e.g., context, question, ...")
|
311 |
|
312 |
submit_button = gr.Button("Submit for Contamination Check")
|
|
|
314 |
|
315 |
submit_button.click(
|
316 |
fn=record_submission,
|
317 |
+
inputs=[benchmark_name_input, contributor_input, jsonl_input, hf_path_input, hf_split_input, field_name_input, hf_config_input],
|
318 |
outputs=result_output
|
319 |
)
|
320 |
|