Spaces:

infini-gram-mini
/

Benchmark-Contamination-Monitoring-System

Running

Benchmark-Contamination-Monitoring-System / app.py

Hao Xu

file upload fix

feaf833 about 1 month ago

9.94 kB

	import gradio as gr
	import os
	import json
	import pandas as pd
	from huggingface_hub import HfApi, hf_hub_download
	from datasets import load_dataset
	import requests

	TOKEN = os.environ.get("HF_TOKEN")
	OWNER = os.environ.get("OWNER")
	RESULTS_COMMUNITY = f"{OWNER}/benchmark_results"
	api = HfApi()

	URL = os.environ.get("URL")


	def load_data(source):
	if source == "core":
	with open("data.json", "r") as f:
	data = json.load(f)
	else:
	ds = load_dataset(RESULTS_COMMUNITY, split='train')
	data = []
	for entry in ds:
	data.append(entry)
	return data


	def build_table(source):
	data = load_data(source)
	entries = []

	if source == "core":
	headers = ["Benchmark", "Category", "Pile-train Dirty (%)", "DCLM-baseline Dirty (%)", "CC-2025-05 Dirty (%)"]
	else:
	headers = ["Benchmark", "Contributor", "Pile-train Dirty (%)", "DCLM-baseline Dirty (%)", "CC-2025-05 Dirty (%)"]

	html = """
	<table id="benchmarkTable" style="border-collapse: collapse; width: 100%;">
	<thead><tr>
	"""
	for col in headers:
	html += f'<th style="border: 1px solid #ddd; padding: 8px; text-align: right;" onclick="sortTable(this)">{col} <span class="triangle"></span></th>'
	html += '</tr></thead>\n<tbody>\n'

	for entry in data:
	name = entry.get("Benchmark", "")
	url = entry.get("URL", "#")
	hyperlink = f'<a href="{url}" target="_blank">{name}</a>'

	row = {
	"Benchmark": hyperlink,
	"Pile-train Dirty (%)": entry.get("Pile Dirty", -1),
	"DCLM-baseline Dirty (%)": entry.get("DCLM Dirty", -1),
	"CC-2025-05 Dirty (%)": entry.get("CC202505 Dirty", -1),
	}

	if source == "core":
	row["Category"] = entry.get("Category", "")
	elif source == "community":
	row["Contributor"] = entry.get("Contributor", "")

	html += "<tr>"
	for col in headers:
	val = row.get(col, "")
	if isinstance(val, float) and val >= 0:
	val = f"{val:5.1f}"
	elif isinstance(val, float):
	val = "N/A"
	html += f'<td style="border: 1px solid #ddd; padding: 8px; text-align: right;">{val}</td>'
	html += "</tr>\n"

	html += "</tbody></table>"

	html += """
	<script>
	let sortDirection = {};

	function sortTable(header) {
	var table = document.getElementById("benchmarkTable");
	var rows = Array.from(table.rows).slice(1);
	var columnIndex = Array.from(header.parentNode.children).indexOf(header);
	var isAscending = sortDirection[columnIndex] === 'ascending';

	sortDirection[columnIndex] = isAscending ? 'descending' : 'ascending';

	var allHeaders = header.parentNode.children;
	Array.from(allHeaders).forEach(th => {
	th.querySelector('.triangle').classList.remove('ascending', 'descending');
	});

	header.querySelector('.triangle').classList.add(sortDirection[columnIndex]);

	rows.sort(function(rowA, rowB) {
	var cellA = rowA.cells[columnIndex].innerText;
	var cellB = rowB.cells[columnIndex].innerText;

	if (isNaN(cellA)) {
	return isAscending ? cellA.localeCompare(cellB) : cellB.localeCompare(cellA);
	}
	return isAscending ? parseFloat(cellA) - parseFloat(cellB) : parseFloat(cellB) - parseFloat(cellA);
	});

	for (var i = 0; i < rows.length; i++) {
	table.appendChild(rows[i]);
	}
	}
	</script>
	"""

	html += """
	<style>
	.triangle {
	display: inline-block;
	width: 0;
	height: 0;
	border-left: 5px solid transparent;
	border-right: 5px solid transparent;
	margin-left: 5px;
	transition: transform 0.2s;
	}
	.ascending {
	border-bottom: 5px solid #000;
	}
	.descending {
	border-top: 5px solid #000;
	}
	</style>
	"""

	return html


	def record_submission(benchmark_name, contributor, jsonl_file, hf_path, hf_split, field_name):
	if not benchmark_name or not benchmark_name.strip():
	return "❌ Please provide a benchmark name."

	if not field_name or not field_name.strip():
	return "❌ Please provide a field name."

	has_jsonl = jsonl_file is not None
	has_hf = hf_path and hf_path.strip()

	if not has_jsonl and not has_hf:
	return "❌ Please provide either a .jsonl file or a HuggingFace dataset path."

	if has_jsonl:
	try:
	with open(jsonl_file.name, 'r', encoding='utf-8') as f:
	line_count = 0
	for line in f:
	line_count += 1
	if line_count > 5:
	break

	try:
	entry = json.loads(line.strip())
	if field_name.strip() not in entry:
	available_fields = list(entry.keys())
	return f"❌ Field '{field_name.strip()}' not found in JSONL file. Available fields: {', '.join(available_fields)}"
	except json.JSONDecodeError as e:
	return f"❌ Invalid JSON format in line {line_count}: {str(e)}"

	if line_count == 0:
	return "❌ The uploaded file is empty."

	except Exception as e:
	return f"❌ Error reading file: {str(e)}"
	elif has_hf:
	if not hf_split or not hf_split.strip():
	return "❌ Please provide a dataset split for the HuggingFace dataset."

	try:
	dataset_info = load_dataset(hf_path.strip(), split=hf_split.strip(), streaming=True, trust_remote_code=True)
	first_item = next(iter(dataset_info))
	if field_name.strip() not in first_item:
	available_fields = list(first_item.keys())
	return f"❌ Field '{field_name.strip()}' not found in dataset. Available fields: {', '.join(available_fields)}"
	except Exception as e:
	return f"❌ Could not access HuggingFace dataset: {str(e)}"

	try:
	data = {
	'name': benchmark_name.strip(),
	'contributor': contributor.strip(),
	'type': 'jsonl' if has_jsonl else 'hf',
	'split': hf_split.strip() if has_hf else '',
	'field_name': field_name.strip(),
	'hf_path': hf_path.strip() if has_hf else ''
	}
	print(json.dumps(data))
	files = {}
	if has_jsonl:
	files['file'] = (benchmark_name.strip() + '.jsonl', open(jsonl_file.name, 'rb'), 'application/json')

	response = requests.post(f"{URL}/", data={"payload": json.dumps(data)}, files=files, timeout=30)

	if files:
	files['file'][1].close()

	if response.status_code == 200:
	result = response.json()
	if result.get("status") == "success":
	message = result.get('message', 'Submission successful!')

	full_message = f"{message}"

	return full_message
	elif result.get("status") == "info":
	return f"❌ {result.get('message', 'Submission already exists')}"
	else:
	return f"❌ {result.get('message', 'Unknown error occurred')}"
	else:
	return f"❌ Server error: {response.status_code} - {response.text}"

	except Exception as e:
	return f"❌ Error submitting benchmark: {str(e)}"


	with gr.Blocks() as interface:
	gr.Markdown("# 📖 Benchmark Contamination Bulletin")

	with gr.Tabs():
	with gr.Tab(label="Bulletin"):
	source_radio = gr.Radio(
	choices=["core", "community"],
	label="Select Benchmark Source",
	value="core"
	)

	leaderboard_html = gr.HTML(build_table("core"))

	def update_table(source):
	return build_table(source)

	source_radio.change(
	fn=update_table,
	inputs=source_radio,
	outputs=leaderboard_html
	)

	refresh_button = gr.Button("Refresh")
	refresh_button.click(
	fn=update_table,
	inputs=source_radio,
	outputs=leaderboard_html
	)

	with gr.Tab(label="Add New Benchmarks"):
	gr.Markdown("## Add Your Own Benchmarks for Contamination Checking")

	with gr.Row():
	benchmark_name_input = gr.Textbox(label="Benchmark Name")
	contributor_input = gr.Textbox(label="Contributor")

	with gr.Row():
	jsonl_input = gr.File(label="Upload .jsonl File", file_types=[".jsonl"])
	with gr.Column():
	hf_path_input = gr.Textbox(label="HuggingFace Dataset Path")
	hf_split_input = gr.Textbox(label="Dataset split (only if providing HuggingFace Dataset)", placeholder="e.g., validation, test")
	field_name_input = gr.Textbox(label="Context or Question Field Name", placeholder="e.g., context, question, ...")

	submit_button = gr.Button("Submit for Contamination Check")
	result_output = gr.Textbox(label="Submission Status", interactive=False)

	submit_button.click(
	fn=record_submission,
	inputs=[benchmark_name_input, contributor_input, jsonl_input, hf_path_input, hf_split_input, field_name_input],
	outputs=result_output
	)

	interface.launch()