Hao Xu commited on
Commit
04cf55a
Β·
1 Parent(s): 73b542b

handling datasets with config

Browse files
Files changed (1) hide show
  1. app.py +7 -3
app.py CHANGED
@@ -137,7 +137,7 @@ def build_table(source, refresh=False):
137
  return html
138
 
139
 
140
- def record_submission(benchmark_name, contributor, jsonl_file, hf_path, hf_split, field_name):
141
  if not benchmark_name or not benchmark_name.strip():
142
  return "❌ Please provide a benchmark name."
143
 
@@ -177,7 +177,10 @@ def record_submission(benchmark_name, contributor, jsonl_file, hf_path, hf_split
177
  return "❌ Please provide a dataset split for the HuggingFace dataset."
178
 
179
  try:
180
- dataset_info = load_dataset(hf_path.strip(), split=hf_split.strip(), streaming=True, trust_remote_code=True)
 
 
 
181
  first_item = next(iter(dataset_info))
182
  if field_name.strip() not in first_item:
183
  available_fields = list(first_item.keys())
@@ -303,6 +306,7 @@ with gr.Blocks() as interface:
303
  with gr.Column():
304
  hf_path_input = gr.Textbox(label="HuggingFace Dataset Path", placeholder="e.g., author/benchmark-name")
305
  hf_split_input = gr.Textbox(label="Dataset split (only if providing HuggingFace Dataset)", placeholder="e.g., validation, test")
 
306
  field_name_input = gr.Textbox(label="Context or Question Field Name", placeholder="e.g., context, question, ...")
307
 
308
  submit_button = gr.Button("Submit for Contamination Check")
@@ -310,7 +314,7 @@ with gr.Blocks() as interface:
310
 
311
  submit_button.click(
312
  fn=record_submission,
313
- inputs=[benchmark_name_input, contributor_input, jsonl_input, hf_path_input, hf_split_input, field_name_input],
314
  outputs=result_output
315
  )
316
 
 
137
  return html
138
 
139
 
140
+ def record_submission(benchmark_name, contributor, jsonl_file, hf_path, hf_split, field_name, hf_config):
141
  if not benchmark_name or not benchmark_name.strip():
142
  return "❌ Please provide a benchmark name."
143
 
 
177
  return "❌ Please provide a dataset split for the HuggingFace dataset."
178
 
179
  try:
180
+ if hf_config and hf_config.strip():
181
+ dataset_info = load_dataset(hf_path.strip(), hf_config.strip(), split=hf_split.strip(), streaming=True, trust_remote_code=True)
182
+ else:
183
+ dataset_info = load_dataset(hf_path.strip(), split=hf_split.strip(), streaming=True, trust_remote_code=True)
184
  first_item = next(iter(dataset_info))
185
  if field_name.strip() not in first_item:
186
  available_fields = list(first_item.keys())
 
306
  with gr.Column():
307
  hf_path_input = gr.Textbox(label="HuggingFace Dataset Path", placeholder="e.g., author/benchmark-name")
308
  hf_split_input = gr.Textbox(label="Dataset split (only if providing HuggingFace Dataset)", placeholder="e.g., validation, test")
309
+ hf_config_input = gr.Textbox(label="Dataset Config (optional)", placeholder="name of dataset config")
310
  field_name_input = gr.Textbox(label="Context or Question Field Name", placeholder="e.g., context, question, ...")
311
 
312
  submit_button = gr.Button("Submit for Contamination Check")
 
314
 
315
  submit_button.click(
316
  fn=record_submission,
317
+ inputs=[benchmark_name_input, contributor_input, jsonl_input, hf_path_input, hf_split_input, field_name_input, hf_config_input],
318
  outputs=result_output
319
  )
320