Spaces:

infini-gram-mini
/

Benchmark-Contamination-Monitoring-System

Running

App Files Files Community

Hao Xu commited on May 27

Commit

30c3967

1 Parent(s): ed84703

submission functionality update

Browse files

Files changed (2) hide show

app.py +223 -60
community_results.json +0 -3

app.py CHANGED Viewed

@@ -2,81 +2,236 @@ import gradio as gr
 import os
 import json
 import pandas as pd
 def load_data(source):
-    data = []
     if source == "core":
-        with open("data.json", "r") as file:
-            data = json.load(file)
-    elif source == "community":
-        with open("community_results.json", "r") as file:
-            data = json.load(file)
     return data
 def build_table(source):
     data = load_data(source)
     entries = []
     for entry in data:
-        entries.append({
-            "Benchmark": entry.get("Benchmark", ""),
-            "Category": entry.get("Category", ""),
-            "Pile Dirty (%)": entry.get("Pile Dirty", ""),
-            "DCLM Dirty (%)": entry.get("DCLM Dirty", ""),
-            "CC Dirty (%)": entry.get("CC202505 Dirty", ""),
-        })
-    return pd.DataFrame(entries).sort_values(by="Pile Dirty (%)", ascending=False)
-def record_submission(benchmark_name, jsonl_file, hf_path, hf_split, field_name):
-    if jsonl_file is None and not hf_path:
-        return "Please provide either a .jsonl file or a HuggingFace dataset path."
-    if hf_path and not hf_split:
-        return "Please provide a dataset split for the HuggingFace dataset."
-    if not field_name:
-        return "Please provide a field name."
-    entry = {
-        "name": benchmark_name,
-        "type": "jsonl" if jsonl_file else "hf",
-        "path": jsonl_file.name if jsonl_file else hf_path,
-        "split": hf_split if hf_path else None,
-        "field_name": field_name,
-    }
-    queue_file = "pending_submissions.json"
-    existing = []
-    if os.path.exists(queue_file):
-        with open(queue_file, "r") as f:
-            existing = json.load(f)
-    existing.append(entry)
-    with open(queue_file, "w") as f:
-        json.dump(existing, f, indent=2)
-    return "✅ Submission received! You'll be notified when processing is complete."
 with gr.Blocks() as interface:
     gr.Markdown("# 📖 Benchmark Contamination Bulletin")
     with gr.Tabs():
-        with gr.Tab(label="Leaderboard"):
             source_radio = gr.Radio(
                 choices=["core", "community"],
                 label="Select Benchmark Source",
                 value="core"
             )
-            table_columns = ["Benchmark", "Category", "Pile Dirty (%)", "DCLM Dirty (%)", "CC202505 Dirty (%)"]
-            leaderboard_table = gr.Dataframe(
-                value=build_table("core"),
-                headers=table_columns,
-                interactive=False,
-                wrap=True,
-                label="Dirty Rates"
-            )
             def update_table(source):
                 return build_table(source)
@@ -84,14 +239,22 @@ with gr.Blocks() as interface:
             source_radio.change(
                 fn=update_table,
                 inputs=source_radio,
-                outputs=leaderboard_table
             )
-        # Submission Tab
-        with gr.Tab(label="Submission"):
-            gr.Markdown("## Submit Your Dataset for Contamination Checking")
-            benchmark_name_input = gr.Textbox(label="Benchmark Name")
             with gr.Row():
                 jsonl_input = gr.File(label="Upload .jsonl File", file_types=[".jsonl"])
@@ -105,7 +268,7 @@ with gr.Blocks() as interface:
             submit_button.click(
                 fn=record_submission,
-                inputs=[benchmark_name_input, jsonl_input, hf_path_input, hf_split_input, field_name_input],
                 outputs=result_output
             )

 import os
 import json
 import pandas as pd
+from huggingface_hub import HfApi, hf_hub_download
+from datasets import load_dataset
+import requests
+TOKEN = os.environ.get("HF_TOKEN")
+OWNER = os.environ.get("OWNER")
+RESULTS_COMMUNITY = f"{OWNER}/benchmark_results"
+api = HfApi()
+URL = os.environ.get("URL")
 def load_data(source):
     if source == "core":
+        with open("data.json", "r") as f:
+            data = json.load(f)
+    else:
+        ds = load_dataset(RESULTS_COMMUNITY, split='train')
+        data = []
+        for entry in ds:
+            data.append(entry)
     return data
 def build_table(source):
     data = load_data(source)
     entries = []
+    if source == "core":
+        headers = ["Benchmark", "Category", "Pile-train Dirty (%)", "DCLM-baseline Dirty (%)", "CC-2025-05 Dirty (%)"]
+    else:
+        headers = ["Benchmark", "Contributor", "Pile-train Dirty (%)", "DCLM-baseline Dirty (%)", "CC-2025-05 Dirty (%)"]
+    html = """
+    <table id="benchmarkTable" style="border-collapse: collapse; width: 100%;">
+    <thead><tr>
+    """
+    for col in headers:
+        html += f'<th style="border: 1px solid #ddd; padding: 8px; text-align: right;" onclick="sortTable(this)">{col} <span class="triangle"></span></th>'
+    html += '</tr></thead>\n<tbody>\n'
     for entry in data:
+        name = entry.get("Benchmark", "")
+        url = entry.get("URL", "#")
+        hyperlink = f'<a href="{url}" target="_blank">{name}</a>'
+        row = {
+            "Benchmark": hyperlink,
+            "Pile-train Dirty (%)": entry.get("Pile Dirty", -1),
+            "DCLM-baseline Dirty (%)": entry.get("DCLM Dirty", -1),
+            "CC-2025-05 Dirty (%)": entry.get("CC202505 Dirty", -1),
+        }
+        if source == "core":
+            row["Category"] = entry.get("Category", "")
+        elif source == "community":
+            row["Contributor"] = entry.get("Contributor", "")
+        html += "<tr>"
+        for col in headers:
+            val = row.get(col, "")
+            if isinstance(val, float) and val >= 0:
+                val = f"{val:5.1f}"
+            elif isinstance(val, float):
+                val = "N/A"
+            html += f'<td style="border: 1px solid #ddd; padding: 8px; text-align: right;">{val}</td>'
+        html += "</tr>\n"
+    html += "</tbody></table>"
+    html += """
+    <script>
+        let sortDirection = {};
+        function sortTable(header) {
+            var table = document.getElementById("benchmarkTable");
+            var rows = Array.from(table.rows).slice(1);
+            var columnIndex = Array.from(header.parentNode.children).indexOf(header);
+            var isAscending = sortDirection[columnIndex] === 'ascending';
+            sortDirection[columnIndex] = isAscending ? 'descending' : 'ascending';
+            var allHeaders = header.parentNode.children;
+            Array.from(allHeaders).forEach(th => {
+                th.querySelector('.triangle').classList.remove('ascending', 'descending');
+            });
+            header.querySelector('.triangle').classList.add(sortDirection[columnIndex]);
+            rows.sort(function(rowA, rowB) {
+                var cellA = rowA.cells[columnIndex].innerText;
+                var cellB = rowB.cells[columnIndex].innerText;
+                if (isNaN(cellA)) {
+                    return isAscending ? cellA.localeCompare(cellB) : cellB.localeCompare(cellA);
+                }
+                return isAscending ? parseFloat(cellA) - parseFloat(cellB) : parseFloat(cellB) - parseFloat(cellA);
+            });
+            for (var i = 0; i < rows.length; i++) {
+                table.appendChild(rows[i]);
+            }
+        }
+    </script>
+    """
+    html += """
+    <style>
+        .triangle {
+            display: inline-block;
+            width: 0;
+            height: 0;
+            border-left: 5px solid transparent;
+            border-right: 5px solid transparent;
+            margin-left: 5px;
+            transition: transform 0.2s;
+        }
+        .ascending {
+            border-bottom: 5px solid #000;
+        }
+        .descending {
+            border-top: 5px solid #000;
+        }
+    </style>
+    """
+    return html
+def record_submission(benchmark_name, contributor, jsonl_file, hf_path, hf_split, field_name):
+    if not benchmark_name or not benchmark_name.strip():
+        return "❌ Please provide a benchmark name."
+    if not field_name or not field_name.strip():
+        return "❌ Please provide a field name."
+    has_jsonl = jsonl_file is not None
+    has_hf = hf_path and hf_path.strip()
+    if not has_jsonl and not has_hf:
+        return "❌ Please provide either a .jsonl file or a HuggingFace dataset path."
+    if has_jsonl:
+        try:
+            with open(jsonl_file.name, 'r', encoding='utf-8') as f:
+                line_count = 0
+                for line in f:
+                    line_count += 1
+                    if line_count > 10:
+                        break
+                    try:
+                        entry = json.loads(line.strip())
+                        if field_name.strip() not in entry:
+                            available_fields = list(entry.keys())
+                            return f"❌ Field '{field_name.strip()}' not found in JSONL file. Available fields: {', '.join(available_fields)}"
+                    except json.JSONDecodeError as e:
+                        return f"❌ Invalid JSON format in line {line_count}: {str(e)}"
+                if line_count == 0:
+                    return "❌ The uploaded file is empty."
+        except Exception as e:
+            return f"❌ Error reading file: {str(e)}"
+    elif has_hf:
+        if not hf_split or not hf_split.strip():
+            return "❌ Please provide a dataset split for the HuggingFace dataset."
+        try:
+            dataset_info = load_dataset(hf_path.strip(), split=hf_split.strip(), streaming=True, trust_remote_code=True)
+            first_item = next(iter(dataset_info))
+            if field_name.strip() not in first_item:
+                available_fields = list(first_item.keys())
+                return f"❌ Field '{field_name.strip()}' not found in dataset. Available fields: {', '.join(available_fields)}"
+        except Exception as e:
+            return f"❌ Could not access HuggingFace dataset: {str(e)}"
+    try:
+        data = {
+            'name': benchmark_name.strip(),
+            'contributor': contributor.strip(),
+            'type': 'jsonl' if has_jsonl else 'hf',
+            'split': hf_split.strip() if has_hf else '',
+            'field_name': field_name.strip(),
+            'hf_path': hf_path.strip() if has_hf else ''
+        }
+        files = {}
+        if has_jsonl:
+            files['file'] = (benchmark_name.strip() + '.jsonl', open(jsonl_file.name, 'rb'), 'application/json')
+        response = requests.post(URL + "/submit", data=data, files=files)
+        if files:
+            files['file'][1].close()
+        if response.status_code == 200:
+            result = response.json()
+            if result.get("status") == "success":
+                message = result.get('message', 'Submission successful!')
+                full_message = f"{message}\n\n" \
+                              f"📋 Your submission has been saved and will be processed automatically.\n" \
+                              f"💡 Results will appear in the main leaderboard when ready.\n" \
+                              f"🔄 You can refresh the leaderboard to check for updates."
+                return full_message
+            elif result.get("status") == "info":
+                return f"ℹ️ {result.get('message', 'Submission already exists')}"
+            else:
+                return f"❌ {result.get('message', 'Unknown error occurred')}"
+        else:
+            return f"❌ Server error: {response.status_code} - {response.text}"
+    except Exception as e:
+        return f"❌ Error submitting benchmark: {str(e)}"
 with gr.Blocks() as interface:
     gr.Markdown("# 📖 Benchmark Contamination Bulletin")
     with gr.Tabs():
+        with gr.Tab(label="Bulletin"):
             source_radio = gr.Radio(
                 choices=["core", "community"],
                 label="Select Benchmark Source",
                 value="core"
             )
+            leaderboard_html = gr.HTML(build_table("core"))
             def update_table(source):
                 return build_table(source)
             source_radio.change(
                 fn=update_table,
                 inputs=source_radio,
+                outputs=leaderboard_html
             )
+            refresh_button = gr.Button("Refresh")
+            refresh_button.click(
+                fn=update_table,
+                inputs=source_radio,
+                outputs=leaderboard_html
+            )
+        with gr.Tab(label="Add New Benchmarks"):
+            gr.Markdown("## Add Your Own Benchmarks for Contamination Checking")
+            with gr.Row():
+                benchmark_name_input = gr.Textbox(label="Benchmark Name")
+                contributor_input = gr.Textbox(label="Contributor")
             with gr.Row():
                 jsonl_input = gr.File(label="Upload .jsonl File", file_types=[".jsonl"])
             submit_button.click(
                 fn=record_submission,
+                inputs=[benchmark_name_input, contributor_input, jsonl_input, hf_path_input, hf_split_input, field_name_input],
                 outputs=result_output
             )

community_results.json DELETED Viewed

@@ -1,3 +0,0 @@
-[
-]