Hao Xu commited on
Commit
73b542b
Β·
1 Parent(s): 6e404e3

add documentation

Browse files
Files changed (1) hide show
  1. app.py +41 -2
app.py CHANGED
@@ -224,10 +224,29 @@ def record_submission(benchmark_name, contributor, jsonl_file, hf_path, hf_split
224
 
225
 
226
  with gr.Blocks() as interface:
227
- gr.Markdown("# πŸ“– Benchmark Contamination Bulletin")
 
 
 
 
 
 
 
228
 
229
  with gr.Tabs():
230
  with gr.Tab(label="Bulletin"):
 
 
 
 
 
 
 
 
 
 
 
 
231
  source_radio = gr.Radio(
232
  choices=["core", "community"],
233
  label="Select Benchmark Source",
@@ -253,7 +272,27 @@ with gr.Blocks() as interface:
253
  )
254
 
255
  with gr.Tab(label="Add New Benchmarks"):
256
- gr.Markdown("## Add Your Own Benchmarks for Contamination Checking")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
257
 
258
  with gr.Row():
259
  benchmark_name_input = gr.Textbox(label="Benchmark Name")
 
224
 
225
 
226
  with gr.Blocks() as interface:
227
+ gr.HTML(
228
+ '''<h1 text-align="center">πŸ“– Benchmark Contamination Monitoring System</h1>
229
+
230
+ <p style='font-size: 16px;'>This system monitors potential contamination in benchmark datasets used for evaluating language models across various open-source corpora.</p>
231
+ <p style='font-size: 16px;'>The system is released along with our paper Infini-gram mini: Exact n-gram Search at the Internet Scale with FM-Index, which documents the methodology and findings in detail.</p>
232
+ <p style='font-size: 16px;'>We invite the community to contribute by submitting new benchmarks for contamination analysis using the form available in the <b>"Add New Benchmarks"</b> tab.</p>
233
+ '''
234
+ )
235
 
236
  with gr.Tabs():
237
  with gr.Tab(label="Bulletin"):
238
+ gr.Markdown("## Benchmark Contamination Bulletin")
239
+ with gr.Accordion(label='Click to view instructions', open=False):
240
+ gr.Markdown('''
241
+ The **Benchmark Contamination Bulletin** presents contamination statistics for evaluation benchmarks across different data sources.
242
+
243
+ - Benchmarks analyzed in our accompanying paper are listed under the **core** source.
244
+ - User-submitted benchmarks appear under the **community** source.
245
+ - The contamination rate represents the percentage of benchmark entries identified as *dirty* based on our detection criteria.
246
+ - The bulletin will be updated regularly to include contamination checks on newly released Common Crawl dumps.
247
+ - You can sort the results by clicking on the column headers.
248
+ ''')
249
+
250
  source_radio = gr.Radio(
251
  choices=["core", "community"],
252
  label="Select Benchmark Source",
 
272
  )
273
 
274
  with gr.Tab(label="Add New Benchmarks"):
275
+ gr.Markdown('''
276
+ ## Add Your Own Benchmarks for Contamination Checking
277
+
278
+ You can use this form to submit a benchmark for contamination checking. Submissions may include either a direct upload or a reference to a publicly available dataset on Hugging Face.
279
+
280
+ ### Submission Guidelines:
281
+ - **Benchmark Name**: Provide a name for your benchmark.
282
+ - **Contributor**: Enter your name or affiliation.
283
+ - **Data Source**:
284
+ - Upload a `.jsonl` file containing your benchmark entries, or
285
+ - Specify a Hugging Face dataset path (`author/benchmark-name`) along with the appropriate split (e.g., `test`, `validation`).
286
+ - **Field Name**: Indicate the field to analyze for contamination:
287
+ - For question-answering datasets: use the question field.
288
+ - For language understanding tasks: use the context or passage field.
289
+
290
+ ### What Happens Next:
291
+ Once submitted, your benchmark will be queued for analysis. Results will be published in the **community** section of the bulletin.
292
+
293
+ Processing time may vary depending on the dataset format and size. You can check the results by navigating to the **Bulletin** tab and selecting the **community** source, then clicking **Refresh**.
294
+ ''')
295
+
296
 
297
  with gr.Row():
298
  benchmark_name_input = gr.Textbox(label="Benchmark Name")