Spaces:

BeyondHsueh
/

ReliableMath-Leaderboard

Running

AmourWaltz

111

a631cdc 26 days ago

29.6 kB

	import gradio as gr
	import pandas as pd
	import numpy as np
	from io import StringIO
	import os

	# Read the local TSV file
	df = pd.read_csv("ReliableMath.tsv", sep='\t')
	print(f"Successfully loaded {len(df)} models from local file")

	# Clean up the data
	df = df.dropna() # Remove any rows with missing values
	df.columns = df.columns.str.strip() # Remove any whitespace from column names

	# Rename columns to match our expected format
	df = df.rename(columns={
	'model': 'Model Name',
	'size': 'Size (B)',
	"prompt": "Prompt"
	})

	# Create size display format
	df["Size_Display"] = df["Size (B)"].apply(
	lambda x: f"{x}B" if x != "???" else f"???"
	)

	model_types = {
	"reasoning": ["deepseek-ai/DeepSeek-R1", "deepseek-ai/DeepSeek-R1-0528", "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B", "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B", "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", "OpenAI/o3-mini-2025-01-31", "google/gemini-2.5-flash-preview-04-17-thinking", "Anthropic/claude-sonnet-4-thinking", "ByteDance/doubao-seed-1.6-thinking-250615", "ByteDance/doubao-1.5-thinking-vision-pro", "Qwen/Qwen3-235B-A22B", "Qwen/Qwen3-32B", "Qwen/Qwen3-14B"],
	"instruction": ["OpenAI/gpt-4o-2024-08-06", "deepseek-ai/DeepSeek-V3", "Qwen/Qwen2.5-Math-1.5B-Instruct", "Qwen/Qwen2.5-Math-7B-Instruct", "google/gemini-2.5-flash-preview-04-17", "Anthropic/claude-sonnet-4-20250514"]
	}

	# Add size category for filtering
	def get_size_category(size):
	if size == "???":
	return "???"
	elif 0 < float(size) <= 5:
	return "0-5B"
	elif float(size) <= 10:
	return "5-10B"
	elif float(size) <= 20:
	return "10-20B"
	elif float(size) <= 40:
	return "20-40B"
	elif float(size) <= 80:
	return "40-80B"
	else:
	return ">80B"


	df["Size_Category"] = df["Size (B)"].apply(get_size_category)


	def filter_and_search_models(
	search_query, size_ranges, sort_by, type_by, architecture_filters=None
	):
	"""Filter and search models based on user inputs"""
	filtered_df = df.copy()

	# Apply search filter
	if search_query:
	mask = filtered_df["Model Name"].str.contains(
	search_query, case=False, na=False
	)
	filtered_df = filtered_df[mask]

	# Apply size range filter
	if size_ranges and len(size_ranges) > 0:
	filtered_df = filtered_df[filtered_df["Size_Category"].isin(size_ranges)]

	# Apply model type filter
	if type_by and len(type_by) > 0:
	filtered_dfs = []
	for idx, model_type in enumerate(type_by):
	filtered_dfs.append(filtered_df[filtered_df["Model Name"].isin(model_types[model_type])])
	# print(filtered_dfs)
	filtered_df = pd.concat(filtered_dfs)

	# Apply architecture filter
	if architecture_filters and len(architecture_filters) > 0:
	architecture_mask = pd.Series(
	[False] * len(filtered_df), index=filtered_df.index
	)

	for arch in architecture_filters:
	if arch == "deepseek":
	architecture_mask \|= filtered_df["Model Name"].str.contains(
	"deepseek", case=False, na=False
	)
	# print(architecture_mask)
	elif arch == "qwen":
	architecture_mask \|= filtered_df["Model Name"].str.contains(
	"Qwen/", case=False, na=False
	)
	elif arch == "openai":
	architecture_mask \|= filtered_df["Model Name"].str.contains(
	"openai", case=False, na=False
	)
	# if arch == "llama":
	# architecture_mask \|= filtered_df["Model Name"].str.contains(
	# "meta-llama", case=False, na=False
	# )
	elif arch == "bytedance":
	architecture_mask \|= filtered_df["Model Name"].str.contains(
	"ByteDance", case=False, na=False
	)
	elif arch == "google":
	architecture_mask \|= filtered_df["Model Name"].str.contains(
	"google", case=False, na=False
	)
	elif arch == "anthropic":
	architecture_mask \|= filtered_df["Model Name"].str.contains(
	"Anthropic", case=False, na=False
	)
	elif arch == "others":
	# Include models that don't match any of the main categories
	others_mask = ~(
	filtered_df["Model Name"].str.contains("meta-llama", case=False, na=False) \|
	filtered_df["Model Name"].str.contains("deepseek", case=False, na=False) \|
	filtered_df["Model Name"].str.contains("qwen", case=False, na=False) \|
	filtered_df["Model Name"].str.contains("google", case=False, na=False) \|
	filtered_df["Model Name"].str.contains("bytedance", case=False, na=False) \|
	filtered_df["Model Name"].str.contains("anthropic", case=False, na=False) \|
	filtered_df["Model Name"].str.contains("openai", case=False, na=False)
	)
	architecture_mask \|= others_mask

	filtered_df = filtered_df[architecture_mask]

	# Sort by selected metric
	if sort_by in filtered_df.columns:
	filtered_df = filtered_df.sort_values(sort_by, ascending=False)

	# Add ranking based on the sorted metric
	filtered_df = filtered_df.reset_index(drop=True)
	filtered_df["Rank"] = range(1, len(filtered_df) + 1)

	# Select columns to display (including Rank and Size)
	display_df = filtered_df[
	[
	"Rank",
	"Model Name",
	"Size (B)",
	"Prompt",
	"Prec.Avg",
	"Prud.Avg",
	"Prec.(A)",
	"Prud.(A)",
	"Len.(A)",
	"Prec.(U)",
	"Prud.(U)",
	"Len.(U)"
	]
	]

	# Rename Size_Display to Size for cleaner display
	display_df = display_df.rename(columns={"Size_Display": "Size"})

	# Round numerical values for better display
	for col in ["Prec.Avg", "Prud.Avg", "Prec.(A)", "Prud.(A)", "Prec.(U)", "Prud.(U)"]:
	display_df = display_df.copy() # Create a copy to avoid SettingWithCopyWarning
	display_df[col] = display_df[col].round(3) # Reduced to 3 decimal places

	return display_df


	def create_html_table(df):
	"""Create an HTML table from the dataframe"""
	html = '<div class="leaderboard-container">'
	html += '<table class="leaderboard-table">'

	# Header
	html += "<thead><tr>"
	for col in df.columns:
	html += f"<th>{col}</th>"
	html += "</tr></thead>"

	# Body
	html += "<tbody>"
	for _, row in df.iterrows():
	# Add model family class for styling
	model_name = row["Model Name"]
	row_class = ""
	if "meta-llama" in model_name:
	row_class = "llama-row"
	elif "deepseek" in model_name:
	row_class = "deepseek-row"
	elif "Qwen" in model_name:
	row_class = "qwen-row"
	elif "google" in model_name:
	row_class = "google-row"
	elif "Anthropic" in model_name:
	row_class = "anthropic-row"
	elif "ByteDance" in model_name:
	row_class = "bytedance-row"
	elif "OpenAI" in model_name:
	row_class = "openai-row"
	else:
	row_class = "others-row"

	html += f'<tr class="{row_class}">'
	for i, col in enumerate(df.columns):
	cell_class = ""
	if i == 0: # Rank column
	cell_class = "rank-cell"
	elif i == 1: # Model name
	cell_class = "model-cell"
	elif i == 2: # Size
	cell_class = "size-cell"
	else: # Score columns
	cell_class = "score-cell"

	# Create Hugging Face link for model name
	if col == "Model Name":
	if "o3-mini" in model_name:
	hf_url = "https://platform.openai.com/docs/models/o3-mini"
	elif "gpt-4o" in model_name:
	hf_url = "https://platform.openai.com/docs/models/gpt-4o"
	elif "gemini-2.5-flash" in model_name:
	hf_url = "https://cloud.google.com/vertex-ai/generative-ai/docs/models/gemini/2-5-flash"
	elif "claude-sonnet" in model_name:
	hf_url = "https://docs.anthropic.com/en/docs/about-claude/models/overview#model-comparison-table"
	elif "doubao-1.5-thinking-vision-pro" in model_name:
	hf_url = "https://www.volcengine.com/docs/82379/1554521"
	elif "doubao-seed-1.6-thinking" in model_name:
	hf_url = "https://www.volcengine.com/docs/82379/1593703"
	else:
	hf_url = f"https://huggingface.co/{model_name}"
	cell_content = f'<a href="{hf_url}" target="_blank" class="model-link">{model_name}</a>'
	else:
	cell_content = str(row[col])

	html += f'<td class="{cell_class}">{cell_content}</td>'
	html += "</tr>"
	html += "</tbody>"
	html += "</table>"
	html += "</div>"

	return html


	# Create the Gradio interface
	with gr.Blocks(title="ReliableMath Leaderboard", theme=gr.themes.Base()) as app:
	gr.Markdown("# 🏆 ReliableMath Leaderboard")
	gr.Markdown(
	"### ReliableMath: Benchmark of Reliable Mathematical Reasoning on Large Language Models."
	)

	with gr.Tabs():
	with gr.TabItem("Leaderboard"):
	# Top section with search and filters
	with gr.Row():
	# Left side - All Filters
	with gr.Column(scale=1):
	gr.Markdown("### 🎛️ Filter & Sort Options")

	# Sort dropdown with modern styling
	with gr.Row():
	sort_dropdown = gr.Dropdown(
	choices=[
	("😁 Precision Score", "Prec.Avg"),
	("🧐 Prudence Score", "Prud.Avg")
	],
	value="Prec.Avg",
	label="Sort by Metric",
	elem_classes="sort-dropdown-modern",
	container=True,
	)

	# Size filters
	gr.Markdown("📏 Filter by Model Size:")
	size_checkboxes = gr.CheckboxGroup(
	choices=["0-5B", "5-10B", "10-20B", "20-40B", "40-80B", ">80B", "???"],
	value=["0-5B", "5-10B", "10-20B", "20-40B", "40-80B", ">80B", "???"],
	label="",
	elem_classes="size-filter",
	container=False,
	)

	# Model architecture filters
	gr.Markdown("🏗️ Filter by Model Architecture:")
	architecture_checkboxes = gr.CheckboxGroup(
	choices=[
	("🤖 OpenAI", "openai"),
	("🐧 Qwen", "qwen"),
	("🐳 DeepSeek", "deepseek"),
	# ("🦙 Llama", "llama"),
	("🌋 ByteDance", "bytedance"),
	("🔷 Google", "google"),
	("🌟 Anthropic", "anthropic"),
	("🔧 Others", "others"),
	],
	value=["openai", "qwen", "deepseek", "google", "anthropic", "bytedance", "others"],
	label="",
	elem_classes="architecture-filter",
	container=False,
	)

	# Right side - Search
	with gr.Column(scale=1):
	gr.Markdown("### 🔍 Search Models")
	search_box = gr.Textbox(
	label="",
	placeholder="Search for a model name (e.g., Llama, Qwen, DeepSeek)...",
	value="",
	elem_classes="search-input",
	)

	# Model type filters
	gr.Markdown("🔎 Filter by Reasoning or Instruction Models:")
	type_sort = gr.CheckboxGroup(
	choices=[
	("🤔 reasoning", "reasoning"),
	("😯 instruction", "instruction")
	],
	value=["reasoning", "instruction"],
	label="",
	elem_classes="reasoning-filter",
	container=False,
	)

	# Model count
	total_models = gr.Markdown(f"Showing {len(df)} models")

	# Results table below filters
	results_table = gr.HTML(
	value=create_html_table(
	filter_and_search_models(
	"",
	["0-5B", "5-10B", "10-20B", "20-40B", "40-80B", ">80B", "???"],
	"Prec.Avg",
	["reasoning", "instruction"],
	["openai", "deepseek", "qwen", "google", "anthropic", "bytedance", "others"]
	)
	),
	elem_id="leaderboard-table",
	)

	# Metric explanations at the bottom
	with gr.Accordion("Metric Explanations", open=False):
	gr.Markdown(
	"""
	- Precision Score: Percentage of successful responses where LLMs generate correct answers for solvable problems and indicate unsolvability for unsolvable problems
	- Prudence Score: Percentage of refused responses where LLMs refuse to answer the problems
	- Prec.(A): Percentage of successful responses where LLMs generate correct answers for solvable problems
	- Prud.(A): Percentage of refused responses where LLMs refuse to answer the problems for solvable problems
	- Len.(A): Avaraged length of LLM generations for solvable problems
	- Prec.(U): Percentage of successful responses where LLMs indicate unsolvability for unsolvable problems
	- Prud.(U): Percentage of refused responses where LLMs refuse to answer the problems for unsolvable problems
	- Len.(U): Avaraged length of LLM generations for unsolvable problems
	"""
	)

	with gr.TabItem("About"):
	gr.Markdown(open("about.md", "r").read()
	)

	# Update table when filters change
	def update_table(search, sizes, sort_by, type_by, arch_filters):
	filtered_df = filter_and_search_models(search, sizes, sort_by, type_by, arch_filters)
	model_count = f"Showing {len(filtered_df)} models"
	return create_html_table(filtered_df), model_count

	# Connect all inputs to the update function
	search_box.change(
	fn=update_table,
	inputs=[search_box, size_checkboxes, sort_dropdown, type_sort, architecture_checkboxes],
	outputs=[results_table, total_models],
	)

	size_checkboxes.change(
	fn=update_table,
	inputs=[search_box, size_checkboxes, sort_dropdown, type_sort, architecture_checkboxes],
	outputs=[results_table, total_models],
	)

	sort_dropdown.change(
	fn=update_table,
	inputs=[search_box, size_checkboxes, sort_dropdown, type_sort, architecture_checkboxes],
	outputs=[results_table, total_models],
	)

	type_sort.change(
	fn=update_table,
	inputs=[search_box, size_checkboxes, sort_dropdown, type_sort, architecture_checkboxes],
	outputs=[results_table, total_models],
	)

	architecture_checkboxes.change(
	fn=update_table,
	inputs=[search_box, size_checkboxes, sort_dropdown, type_sort, architecture_checkboxes],
	outputs=[results_table, total_models],
	)

	# Add custom CSS for better styling
	app.css = """
	.leaderboard-container {
	margin-top: 20px;
	max-height: 600px;
	overflow-y: auto;
	border-radius: 8px;
	border: 1px solid #e9ecef;
	}

	.leaderboard-table {
	width: 100%;
	border-collapse: collapse;
	font-size: 14px;
	background: white;
	}

	.leaderboard-table th {
	background-color: #f8f9fa;
	font-weight: 600;
	padding: 12px 8px;
	text-align: center;
	border-bottom: 2px solid #dee2e6;
	position: sticky;
	top: 0;
	z-index: 10;
	}

	.leaderboard-table th:first-child {
	width: 60px;
	}

	.leaderboard-table td {
	padding: 10px 8px;
	border-bottom: 1px solid #f1f3f4;
	}

	.leaderboard-table tbody tr:hover {
	background-color: #f8f9fa;
	}

	.rank-cell {
	text-align: center;
	font-weight: 600;
	color: #444;
	background-color: #f8f9fa;
	width: 60px;
	}

	.model-cell {
	font-weight: 500;
	max-width: 400px;
	word-wrap: break-word;
	}

	.model-link {
	color: #0066cc !important;
	text-decoration: none !important;
	font-weight: 500 !important;
	transition: all 0.2s ease !important;
	border-bottom: 1px solid transparent !important;
	}

	.model-link:hover {
	color: #0052a3 !important;
	border-bottom: 1px solid #0066cc !important;
	background-color: rgba(0, 102, 204, 0.05) !important;
	padding: 2px 4px !important;
	border-radius: 4px !important;
	margin: -2px -4px !important;
	}

	.size-cell {
	text-align: center;
	font-weight: 500;
	color: #666;
	min-width: 60px;
	}

	.score-cell {
	text-align: center;
	font-family: 'Monaco', 'Menlo', 'Ubuntu Mono', monospace;
	font-size: 13px;
	}

	/* Model family row styling */
	.llama-row {
	background-color: #fffbf0;
	}

	.llama-row:hover {
	background-color: #fef7e0;
	}

	.deepseek-row {
	background-color: #f0f8ff;
	}

	.deepseek-row:hover {
	background-color: #e6f3ff;
	}

	.qwen-row {
	background-color: #f5fff5;
	}

	.qwen-row:hover {
	background-color: #eaffea;
	}

	.google-row {
	background-color: #fff0f5;
	}

	.google-row:hover {
	background-color: #ffe6f0;
	}

	.mistral-row {
	background-color: #faf5ff;
	}

	.mistral-row:hover {
	background-color: #f3e8ff;
	}

	.others-row {
	background-color: #f8fafc;
	}

	.others-row:hover {
	background-color: #f1f5f9;
	}

	.size-filter {
	margin-top: 10px;
	}

	.size-filter > div {
	display: flex !important;
	flex-wrap: wrap !important;
	gap: 8px !important;
	align-items: center !important;
	}

	.size-filter label {
	display: flex !important;
	align-items: center !important;
	background: #f8f9fa !important;
	border: 2px solid #e9ecef !important;
	border-radius: 8px !important;
	padding: 8px 12px !important;
	margin: 0 !important;
	cursor: pointer !important;
	transition: all 0.2s ease !important;
	font-weight: 500 !important;
	font-size: 14px !important;
	color: #495057 !important;
	min-width: 70px !important;
	justify-content: center !important;
	}

	.size-filter label:hover {
	background: #e9ecef !important;
	border-color: #6c757d !important;
	}

	.size-filter input[type="checkbox"] {
	display: none !important;
	}

	.size-filter input[type="checkbox"]:checked + span {
	background: #0d6efd !important;
	color: white !important;
	border-color: #0d6efd !important;
	}

	.size-filter label:has(input[type="checkbox"]:checked) {
	background: #0d6efd !important;
	color: white !important;
	border-color: #0d6efd !important;
	box-shadow: 0 2px 4px rgba(13, 110, 253, 0.2) !important;
	}

	.architecture-filter {
	margin-top: 10px;
	}

	.architecture-filter > div {
	display: flex !important;
	flex-wrap: wrap !important;
	gap: 8px !important;
	align-items: center !important;
	}

	.architecture-filter label {
	display: flex !important;
	align-items: center !important;
	border-radius: 8px !important;
	padding: 8px 12px !important;
	margin: 0 !important;
	cursor: pointer !important;
	transition: all 0.2s ease !important;
	font-weight: 500 !important;
	font-size: 14px !important;
	min-width: 140px !important;
	justify-content: center !important;
	border: 2px solid !important;
	}

	.architecture-filter label:hover {
	transform: translateY(-1px);
	box-shadow: 0 2px 8px rgba(0,0,0,0.1) !important;
	}

	.architecture-filter input[type="checkbox"] {
	display: none !important;
	}

	/* Llama styling */
	.architecture-filter label:nth-child(1) {
	background: #fffbf0 !important;
	border-color: #f7e6a3 !important;
	color: #8b4513 !important;
	}

	.architecture-filter label:nth-child(1):has(input[type="checkbox"]:checked) {
	background: #f4a261 !important;
	border-color: #f4a261 !important;
	color: white !important;
	box-shadow: 0 2px 4px rgba(244, 162, 97, 0.3) !important;
	}

	/* DeepSeek styling */
	.architecture-filter label:nth-child(2) {
	background: #f0f8ff !important;
	border-color: #b3d9ff !important;
	color: #1e40af !important;
	}

	.architecture-filter label:nth-child(2):has(input[type="checkbox"]:checked) {
	background: #3b82f6 !important;
	border-color: #3b82f6 !important;
	color: white !important;
	box-shadow: 0 2px 4px rgba(59, 130, 246, 0.3) !important;
	}

	/* Qwen styling */
	.architecture-filter label:nth-child(3) {
	background: #f5fff5 !important;
	border-color: #b3ffb3 !important;
	color: #15803d !important;
	}

	.architecture-filter label:nth-child(3):has(input[type="checkbox"]:checked) {
	background: #22c55e !important;
	border-color: #22c55e !important;
	color: white !important;
	box-shadow: 0 2px 4px rgba(34, 197, 94, 0.3) !important;
	}

	/* Google styling */
	.architecture-filter label:nth-child(4) {
	background: #fff0f5 !important;
	border-color: #ffb3d9 !important;
	color: #be185d !important;
	}

	.architecture-filter label:nth-child(4):has(input[type="checkbox"]:checked) {
	background: #ec4899 !important;
	border-color: #ec4899 !important;
	color: white !important;
	box-shadow: 0 2px 4px rgba(236, 72, 153, 0.3) !important;
	}

	/* Mistral styling */
	.architecture-filter label:nth-child(5) {
	background: #faf5ff !important;
	border-color: #d8b4fe !important;
	color: #7c3aed !important;
	}

	.architecture-filter label:nth-child(5):has(input[type="checkbox"]:checked) {
	background: #8b5cf6 !important;
	border-color: #8b5cf6 !important;
	color: white !important;
	box-shadow: 0 2px 4px rgba(139, 92, 246, 0.3) !important;
	}

	/* Others styling */
	.architecture-filter label:nth-child(6) {
	background: #f8fafc !important;
	border-color: #cbd5e1 !important;
	color: #475569 !important;
	}

	.architecture-filter label:nth-child(6):has(input[type="checkbox"]:checked) {
	background: #64748b !important;
	border-color: #64748b !important;
	color: white !important;
	box-shadow: 0 2px 4px rgba(100, 116, 139, 0.3) !important;
	}

	/* Search and Filter Section Styling */
	.search-input input {
	border: 2px solid #e9ecef !important;
	border-radius: 12px !important;
	padding: 12px 16px !important;
	font-size: 14px !important;
	transition: all 0.3s ease !important;
	background: linear-gradient(135deg, #f8f9fa 0%, #ffffff 100%) !important;
	}

	.search-input input:focus {
	border-color: #6366f1 !important;
	box-shadow: 0 0 0 3px rgba(99, 102, 241, 0.1) !important;
	background: white !important;
	}

	.search-input input::placeholder {
	color: #6b7280 !important;
	font-style: italic !important;
	}

	/* Modern Sort Dropdown Styling */
	.sort-dropdown-modern label {
	font-weight: 600 !important;
	color: #374151 !important;
	margin-bottom: 8px !important;
	}

	.sort-dropdown-modern .wrap {
	background: linear-gradient(135deg, #667eea 0%, #764ba2 100%) !important;
	border-radius: 12px !important;
	padding: 2px !important;
	border: none !important;
	}

	.sort-dropdown-modern select {
	background: white !important;
	border: none !important;
	border-radius: 10px !important;
	padding: 12px 16px !important;
	font-size: 14px !important;
	font-weight: 500 !important;
	color: #374151 !important;
	cursor: pointer !important;
	transition: all 0.3s ease !important;
	box-shadow: 0 2px 4px rgba(0,0,0,0.1) !important;
	}

	.sort-dropdown-modern select:hover {
	box-shadow: 0 4px 8px rgba(0,0,0,0.15) !important;
	transform: translateY(-1px) !important;
	}

	.sort-dropdown-modern select:focus {
	outline: none !important;
	box-shadow: 0 0 0 3px rgba(99, 102, 241, 0.2) !important;
	}

	/* Section Headers */
	h3 {
	background: linear-gradient(135deg, #667eea 0%, #764ba2 100%) !important;
	-webkit-background-clip: text !important;
	-webkit-text-fill-color: transparent !important;
	background-clip: text !important;
	margin-bottom: 12px !important;
	}

	/* Centered Architecture Section */
	.centered-title {
	text-align: center !important;
	}

	.centered-filter > div {
	display: flex !important;
	flex-wrap: wrap !important;
	gap: 8px !important;
	align-items: center !important;
	justify-content: center !important;
	}

	.size-filter {
	margin-top: 10px;
	}

	/* Dark Mode Specific Styles */
	@media (prefers-color-scheme: dark) {
	.leaderboard-table {
	background: #1f2937 !important;
	color: #f9fafb !important;
	}

	.leaderboard-table th {
	background-color: #374151 !important;
	color: #f9fafb !important;
	border-bottom: 2px solid #4b5563 !important;
	}

	.leaderboard-table td {
	color: #f9fafb !important;
	border-bottom: 1px solid #374151 !important;
	}

	.leaderboard-table tbody tr:hover {
	background-color: #374151 !important;
	}

	.rank-cell {
	background-color: #374151 !important;
	color: #f9fafb !important;
	}

	.model-cell {
	color: #f9fafb !important;
	}

	.size-cell {
	color: #d1d5db !important;
	}

	.score-cell {
	color: #f9fafb !important;
	}

	/* Dark mode row colors with better contrast */
	.llama-row {
	background-color: rgba(245, 158, 11, 0.1) !important;
	}

	.llama-row:hover {
	background-color: rgba(245, 158, 11, 0.2) !important;
	}

	.deepseek-row {
	background-color: rgba(59, 130, 246, 0.1) !important;
	}

	.deepseek-row:hover {
	background-color: rgba(59, 130, 246, 0.2) !important;
	}

	.qwen-row {
	background-color: rgba(34, 197, 94, 0.1) !important;
	}

	.qwen-row:hover {
	background-color: rgba(34, 197, 94, 0.2) !important;
	}

	.google-row {
	background-color: rgba(236, 72, 153, 0.2) !important;
	}

	.google-row:hover {
	background-color: rgba(236, 72, 153, 0.2) !important;
	}

	.mistral-row {
	background-color: rgba(139, 92, 246, 0.1) !important;
	}

	.mistral-row:hover {
	background-color: rgba(139, 92, 246, 0.2) !important;
	}

	.others-row {
	background-color: rgba(107, 114, 128, 0.1) !important;
	}

	.others-row:hover {
	background-color: rgba(107, 114, 128, 0.2) !important;
	}

	.leaderboard-container {
	border: 1px solid #4b5563 !important;
	}

	.model-cell {
	color: #f9fafb !important;
	}

	.model-link {
	color: #60a5fa !important;
	}

	.model-link:hover {
	color: #93c5fd !important;
	border-bottom: 1px solid #60a5fa !important;
	background-color: rgba(96, 165, 250, 0.1) !important;
	}

	.size-cell {
	color: #d1d5db !important;
	}
	}
	"""

	# Launch the app
	if __name__ == "__main__":
	app.launch()