|
import gradio as gr |
|
import pandas as pd |
|
import numpy as np |
|
from io import StringIO |
|
import os |
|
|
|
|
|
df = pd.read_csv("ReliableMath.tsv", sep='\t') |
|
print(f"Successfully loaded {len(df)} models from local file") |
|
|
|
|
|
df = df.dropna() |
|
df.columns = df.columns.str.strip() |
|
|
|
|
|
df = df.rename(columns={ |
|
'model': 'Model Name', |
|
'size': 'Size (B)', |
|
"prompt": "Prompt" |
|
}) |
|
|
|
|
|
df["Size_Display"] = df["Size (B)"].apply( |
|
lambda x: f"{x}B" if x != "???" else f"???" |
|
) |
|
|
|
model_types = { |
|
"reasoning": ["deepseek-ai/DeepSeek-R1", "deepseek-ai/DeepSeek-R1-0528", "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B", "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B", "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", "OpenAI/o3-mini-2025-01-31", "google/gemini-2.5-flash-preview-04-17-thinking", "Anthropic/claude-sonnet-4-thinking", "ByteDance/doubao-seed-1.6-thinking-250615", "ByteDance/doubao-1.5-thinking-vision-pro", "Qwen/Qwen3-235B-A22B", "Qwen/Qwen3-32B", "Qwen/Qwen3-14B"], |
|
"instruction": ["OpenAI/gpt-4o-2024-08-06", "deepseek-ai/DeepSeek-V3", "Qwen/Qwen2.5-Math-1.5B-Instruct", "Qwen/Qwen2.5-Math-7B-Instruct", "google/gemini-2.5-flash-preview-04-17", "Anthropic/claude-sonnet-4-20250514"] |
|
} |
|
|
|
|
|
def get_size_category(size): |
|
if size == "???": |
|
return "???" |
|
elif 0 < float(size) <= 5: |
|
return "0-5B" |
|
elif float(size) <= 10: |
|
return "5-10B" |
|
elif float(size) <= 20: |
|
return "10-20B" |
|
elif float(size) <= 40: |
|
return "20-40B" |
|
elif float(size) <= 80: |
|
return "40-80B" |
|
else: |
|
return ">80B" |
|
|
|
|
|
df["Size_Category"] = df["Size (B)"].apply(get_size_category) |
|
|
|
|
|
def filter_and_search_models( |
|
search_query, size_ranges, sort_by, type_by, architecture_filters=None |
|
): |
|
"""Filter and search models based on user inputs""" |
|
filtered_df = df.copy() |
|
|
|
|
|
if search_query: |
|
mask = filtered_df["Model Name"].str.contains( |
|
search_query, case=False, na=False |
|
) |
|
filtered_df = filtered_df[mask] |
|
|
|
|
|
if size_ranges and len(size_ranges) > 0: |
|
filtered_df = filtered_df[filtered_df["Size_Category"].isin(size_ranges)] |
|
|
|
|
|
if type_by and len(type_by) > 0: |
|
filtered_dfs = [] |
|
for idx, model_type in enumerate(type_by): |
|
filtered_dfs.append(filtered_df[filtered_df["Model Name"].isin(model_types[model_type])]) |
|
|
|
filtered_df = pd.concat(filtered_dfs) |
|
|
|
|
|
if architecture_filters and len(architecture_filters) > 0: |
|
architecture_mask = pd.Series( |
|
[False] * len(filtered_df), index=filtered_df.index |
|
) |
|
|
|
for arch in architecture_filters: |
|
if arch == "deepseek": |
|
architecture_mask |= filtered_df["Model Name"].str.contains( |
|
"deepseek", case=False, na=False |
|
) |
|
|
|
elif arch == "qwen": |
|
architecture_mask |= filtered_df["Model Name"].str.contains( |
|
"Qwen/", case=False, na=False |
|
) |
|
elif arch == "openai": |
|
architecture_mask |= filtered_df["Model Name"].str.contains( |
|
"openai", case=False, na=False |
|
) |
|
|
|
|
|
|
|
|
|
elif arch == "bytedance": |
|
architecture_mask |= filtered_df["Model Name"].str.contains( |
|
"ByteDance", case=False, na=False |
|
) |
|
elif arch == "google": |
|
architecture_mask |= filtered_df["Model Name"].str.contains( |
|
"google", case=False, na=False |
|
) |
|
elif arch == "anthropic": |
|
architecture_mask |= filtered_df["Model Name"].str.contains( |
|
"Anthropic", case=False, na=False |
|
) |
|
elif arch == "others": |
|
|
|
others_mask = ~( |
|
filtered_df["Model Name"].str.contains("meta-llama", case=False, na=False) | |
|
filtered_df["Model Name"].str.contains("deepseek", case=False, na=False) | |
|
filtered_df["Model Name"].str.contains("qwen", case=False, na=False) | |
|
filtered_df["Model Name"].str.contains("google", case=False, na=False) | |
|
filtered_df["Model Name"].str.contains("bytedance", case=False, na=False) | |
|
filtered_df["Model Name"].str.contains("anthropic", case=False, na=False) | |
|
filtered_df["Model Name"].str.contains("openai", case=False, na=False) |
|
) |
|
architecture_mask |= others_mask |
|
|
|
filtered_df = filtered_df[architecture_mask] |
|
|
|
|
|
if sort_by in filtered_df.columns: |
|
filtered_df = filtered_df.sort_values(sort_by, ascending=False) |
|
|
|
|
|
filtered_df = filtered_df.reset_index(drop=True) |
|
filtered_df["Rank"] = range(1, len(filtered_df) + 1) |
|
|
|
|
|
display_df = filtered_df[ |
|
[ |
|
"Rank", |
|
"Model Name", |
|
"Size (B)", |
|
"Prompt", |
|
"Prec.Avg", |
|
"Prud.Avg", |
|
"Prec.(A)", |
|
"Prud.(A)", |
|
"Len.(A)", |
|
"Prec.(U)", |
|
"Prud.(U)", |
|
"Len.(U)" |
|
] |
|
] |
|
|
|
|
|
display_df = display_df.rename(columns={"Size_Display": "Size"}) |
|
|
|
|
|
for col in ["Prec.Avg", "Prud.Avg", "Prec.(A)", "Prud.(A)", "Prec.(U)", "Prud.(U)"]: |
|
display_df = display_df.copy() |
|
display_df[col] = display_df[col].round(3) |
|
|
|
return display_df |
|
|
|
|
|
def create_html_table(df): |
|
"""Create an HTML table from the dataframe""" |
|
html = '<div class="leaderboard-container">' |
|
html += '<table class="leaderboard-table">' |
|
|
|
|
|
html += "<thead><tr>" |
|
for col in df.columns: |
|
html += f"<th>{col}</th>" |
|
html += "</tr></thead>" |
|
|
|
|
|
html += "<tbody>" |
|
for _, row in df.iterrows(): |
|
|
|
model_name = row["Model Name"] |
|
row_class = "" |
|
if "meta-llama" in model_name: |
|
row_class = "llama-row" |
|
elif "deepseek" in model_name: |
|
row_class = "deepseek-row" |
|
elif "Qwen" in model_name: |
|
row_class = "qwen-row" |
|
elif "google" in model_name: |
|
row_class = "google-row" |
|
elif "Anthropic" in model_name: |
|
row_class = "anthropic-row" |
|
elif "ByteDance" in model_name: |
|
row_class = "bytedance-row" |
|
elif "OpenAI" in model_name: |
|
row_class = "openai-row" |
|
else: |
|
row_class = "others-row" |
|
|
|
html += f'<tr class="{row_class}">' |
|
for i, col in enumerate(df.columns): |
|
cell_class = "" |
|
if i == 0: |
|
cell_class = "rank-cell" |
|
elif i == 1: |
|
cell_class = "model-cell" |
|
elif i == 2: |
|
cell_class = "size-cell" |
|
else: |
|
cell_class = "score-cell" |
|
|
|
|
|
if col == "Model Name": |
|
if "o3-mini" in model_name: |
|
hf_url = "https://platform.openai.com/docs/models/o3-mini" |
|
elif "gpt-4o" in model_name: |
|
hf_url = "https://platform.openai.com/docs/models/gpt-4o" |
|
elif "gemini-2.5-flash" in model_name: |
|
hf_url = "https://cloud.google.com/vertex-ai/generative-ai/docs/models/gemini/2-5-flash" |
|
elif "claude-sonnet" in model_name: |
|
hf_url = "https://docs.anthropic.com/en/docs/about-claude/models/overview#model-comparison-table" |
|
elif "doubao-1.5-thinking-vision-pro" in model_name: |
|
hf_url = "https://www.volcengine.com/docs/82379/1554521" |
|
elif "doubao-seed-1.6-thinking" in model_name: |
|
hf_url = "https://www.volcengine.com/docs/82379/1593703" |
|
else: |
|
hf_url = f"https://huggingface.co/{model_name}" |
|
cell_content = f'<a href="{hf_url}" target="_blank" class="model-link">{model_name}</a>' |
|
else: |
|
cell_content = str(row[col]) |
|
|
|
html += f'<td class="{cell_class}">{cell_content}</td>' |
|
html += "</tr>" |
|
html += "</tbody>" |
|
html += "</table>" |
|
html += "</div>" |
|
|
|
return html |
|
|
|
|
|
|
|
with gr.Blocks(title="ReliableMath Leaderboard", theme=gr.themes.Base()) as app: |
|
gr.Markdown("# 🏆 ReliableMath Leaderboard") |
|
gr.Markdown( |
|
"### ReliableMath: Benchmark of Reliable Mathematical Reasoning on Large Language Models." |
|
) |
|
|
|
with gr.Tabs(): |
|
with gr.TabItem("Leaderboard"): |
|
|
|
with gr.Row(): |
|
|
|
with gr.Column(scale=1): |
|
gr.Markdown("### 🎛️ **Filter & Sort Options**") |
|
|
|
|
|
with gr.Row(): |
|
sort_dropdown = gr.Dropdown( |
|
choices=[ |
|
("😁 Precision Score", "Prec.Avg"), |
|
("🧐 Prudence Score", "Prud.Avg") |
|
], |
|
value="Prec.Avg", |
|
label="Sort by Metric", |
|
elem_classes="sort-dropdown-modern", |
|
container=True, |
|
) |
|
|
|
|
|
gr.Markdown("**📏 Filter by Model Size:**") |
|
size_checkboxes = gr.CheckboxGroup( |
|
choices=["0-5B", "5-10B", "10-20B", "20-40B", "40-80B", ">80B", "???"], |
|
value=["0-5B", "5-10B", "10-20B", "20-40B", "40-80B", ">80B", "???"], |
|
label="", |
|
elem_classes="size-filter", |
|
container=False, |
|
) |
|
|
|
|
|
gr.Markdown("**🏗️ Filter by Model Architecture:**") |
|
architecture_checkboxes = gr.CheckboxGroup( |
|
choices=[ |
|
("🤖 OpenAI", "openai"), |
|
("🐧 Qwen", "qwen"), |
|
("🐳 DeepSeek", "deepseek"), |
|
|
|
("🌋 ByteDance", "bytedance"), |
|
("🔷 Google", "google"), |
|
("🌟 Anthropic", "anthropic"), |
|
("🔧 Others", "others"), |
|
], |
|
value=["openai", "qwen", "deepseek", "google", "anthropic", "bytedance", "others"], |
|
label="", |
|
elem_classes="architecture-filter", |
|
container=False, |
|
) |
|
|
|
|
|
with gr.Column(scale=1): |
|
gr.Markdown("### 🔍 **Search Models**") |
|
search_box = gr.Textbox( |
|
label="", |
|
placeholder="Search for a model name (e.g., Llama, Qwen, DeepSeek)...", |
|
value="", |
|
elem_classes="search-input", |
|
) |
|
|
|
|
|
gr.Markdown("**🔎 Filter by Reasoning or Instruction Models:**") |
|
type_sort = gr.CheckboxGroup( |
|
choices=[ |
|
("🤔 reasoning", "reasoning"), |
|
("😯 instruction", "instruction") |
|
], |
|
value=["reasoning", "instruction"], |
|
label="", |
|
elem_classes="reasoning-filter", |
|
container=False, |
|
) |
|
|
|
|
|
total_models = gr.Markdown(f"**Showing {len(df)} models**") |
|
|
|
|
|
results_table = gr.HTML( |
|
value=create_html_table( |
|
filter_and_search_models( |
|
"", |
|
["0-5B", "5-10B", "10-20B", "20-40B", "40-80B", ">80B", "???"], |
|
"Prec.Avg", |
|
["reasoning", "instruction"], |
|
["openai", "deepseek", "qwen", "google", "anthropic", "bytedance", "others"] |
|
) |
|
), |
|
elem_id="leaderboard-table", |
|
) |
|
|
|
|
|
with gr.Accordion("Metric Explanations", open=False): |
|
gr.Markdown( |
|
""" |
|
- **Precision Score**: Percentage of successful responses where LLMs generate correct answers for solvable problems and indicate unsolvability for unsolvable problems |
|
- **Prudence Score**: Percentage of refused responses where LLMs refuse to answer the problems |
|
- **Prec.(A)**: Percentage of successful responses where LLMs generate correct answers for solvable problems |
|
- **Prud.(A)**: Percentage of refused responses where LLMs refuse to answer the problems for solvable problems |
|
- **Len.(A)**: Avaraged length of LLM generations for solvable problems |
|
- **Prec.(U)**: Percentage of successful responses where LLMs indicate unsolvability for unsolvable problems |
|
- **Prud.(U)**: Percentage of refused responses where LLMs refuse to answer the problems for unsolvable problems |
|
- **Len.(U)**: Avaraged length of LLM generations for unsolvable problems |
|
""" |
|
) |
|
|
|
with gr.TabItem("About"): |
|
gr.Markdown(open("about.md", "r").read() |
|
) |
|
|
|
|
|
def update_table(search, sizes, sort_by, type_by, arch_filters): |
|
filtered_df = filter_and_search_models(search, sizes, sort_by, type_by, arch_filters) |
|
model_count = f"**Showing {len(filtered_df)} models**" |
|
return create_html_table(filtered_df), model_count |
|
|
|
|
|
search_box.change( |
|
fn=update_table, |
|
inputs=[search_box, size_checkboxes, sort_dropdown, type_sort, architecture_checkboxes], |
|
outputs=[results_table, total_models], |
|
) |
|
|
|
size_checkboxes.change( |
|
fn=update_table, |
|
inputs=[search_box, size_checkboxes, sort_dropdown, type_sort, architecture_checkboxes], |
|
outputs=[results_table, total_models], |
|
) |
|
|
|
sort_dropdown.change( |
|
fn=update_table, |
|
inputs=[search_box, size_checkboxes, sort_dropdown, type_sort, architecture_checkboxes], |
|
outputs=[results_table, total_models], |
|
) |
|
|
|
type_sort.change( |
|
fn=update_table, |
|
inputs=[search_box, size_checkboxes, sort_dropdown, type_sort, architecture_checkboxes], |
|
outputs=[results_table, total_models], |
|
) |
|
|
|
architecture_checkboxes.change( |
|
fn=update_table, |
|
inputs=[search_box, size_checkboxes, sort_dropdown, type_sort, architecture_checkboxes], |
|
outputs=[results_table, total_models], |
|
) |
|
|
|
|
|
app.css = """ |
|
.leaderboard-container { |
|
margin-top: 20px; |
|
max-height: 600px; |
|
overflow-y: auto; |
|
border-radius: 8px; |
|
border: 1px solid #e9ecef; |
|
} |
|
|
|
.leaderboard-table { |
|
width: 100%; |
|
border-collapse: collapse; |
|
font-size: 14px; |
|
background: white; |
|
} |
|
|
|
.leaderboard-table th { |
|
background-color: #f8f9fa; |
|
font-weight: 600; |
|
padding: 12px 8px; |
|
text-align: center; |
|
border-bottom: 2px solid #dee2e6; |
|
position: sticky; |
|
top: 0; |
|
z-index: 10; |
|
} |
|
|
|
.leaderboard-table th:first-child { |
|
width: 60px; |
|
} |
|
|
|
.leaderboard-table td { |
|
padding: 10px 8px; |
|
border-bottom: 1px solid #f1f3f4; |
|
} |
|
|
|
.leaderboard-table tbody tr:hover { |
|
background-color: #f8f9fa; |
|
} |
|
|
|
.rank-cell { |
|
text-align: center; |
|
font-weight: 600; |
|
color: #444; |
|
background-color: #f8f9fa; |
|
width: 60px; |
|
} |
|
|
|
.model-cell { |
|
font-weight: 500; |
|
max-width: 400px; |
|
word-wrap: break-word; |
|
} |
|
|
|
.model-link { |
|
color: #0066cc !important; |
|
text-decoration: none !important; |
|
font-weight: 500 !important; |
|
transition: all 0.2s ease !important; |
|
border-bottom: 1px solid transparent !important; |
|
} |
|
|
|
.model-link:hover { |
|
color: #0052a3 !important; |
|
border-bottom: 1px solid #0066cc !important; |
|
background-color: rgba(0, 102, 204, 0.05) !important; |
|
padding: 2px 4px !important; |
|
border-radius: 4px !important; |
|
margin: -2px -4px !important; |
|
} |
|
|
|
.size-cell { |
|
text-align: center; |
|
font-weight: 500; |
|
color: #666; |
|
min-width: 60px; |
|
} |
|
|
|
.score-cell { |
|
text-align: center; |
|
font-family: 'Monaco', 'Menlo', 'Ubuntu Mono', monospace; |
|
font-size: 13px; |
|
} |
|
|
|
/* Model family row styling */ |
|
.llama-row { |
|
background-color: #fffbf0; |
|
} |
|
|
|
.llama-row:hover { |
|
background-color: #fef7e0; |
|
} |
|
|
|
.deepseek-row { |
|
background-color: #f0f8ff; |
|
} |
|
|
|
.deepseek-row:hover { |
|
background-color: #e6f3ff; |
|
} |
|
|
|
.qwen-row { |
|
background-color: #f5fff5; |
|
} |
|
|
|
.qwen-row:hover { |
|
background-color: #eaffea; |
|
} |
|
|
|
.google-row { |
|
background-color: #fff0f5; |
|
} |
|
|
|
.google-row:hover { |
|
background-color: #ffe6f0; |
|
} |
|
|
|
.mistral-row { |
|
background-color: #faf5ff; |
|
} |
|
|
|
.mistral-row:hover { |
|
background-color: #f3e8ff; |
|
} |
|
|
|
.others-row { |
|
background-color: #f8fafc; |
|
} |
|
|
|
.others-row:hover { |
|
background-color: #f1f5f9; |
|
} |
|
|
|
.size-filter { |
|
margin-top: 10px; |
|
} |
|
|
|
.size-filter > div { |
|
display: flex !important; |
|
flex-wrap: wrap !important; |
|
gap: 8px !important; |
|
align-items: center !important; |
|
} |
|
|
|
.size-filter label { |
|
display: flex !important; |
|
align-items: center !important; |
|
background: #f8f9fa !important; |
|
border: 2px solid #e9ecef !important; |
|
border-radius: 8px !important; |
|
padding: 8px 12px !important; |
|
margin: 0 !important; |
|
cursor: pointer !important; |
|
transition: all 0.2s ease !important; |
|
font-weight: 500 !important; |
|
font-size: 14px !important; |
|
color: #495057 !important; |
|
min-width: 70px !important; |
|
justify-content: center !important; |
|
} |
|
|
|
.size-filter label:hover { |
|
background: #e9ecef !important; |
|
border-color: #6c757d !important; |
|
} |
|
|
|
.size-filter input[type="checkbox"] { |
|
display: none !important; |
|
} |
|
|
|
.size-filter input[type="checkbox"]:checked + span { |
|
background: #0d6efd !important; |
|
color: white !important; |
|
border-color: #0d6efd !important; |
|
} |
|
|
|
.size-filter label:has(input[type="checkbox"]:checked) { |
|
background: #0d6efd !important; |
|
color: white !important; |
|
border-color: #0d6efd !important; |
|
box-shadow: 0 2px 4px rgba(13, 110, 253, 0.2) !important; |
|
} |
|
|
|
.architecture-filter { |
|
margin-top: 10px; |
|
} |
|
|
|
.architecture-filter > div { |
|
display: flex !important; |
|
flex-wrap: wrap !important; |
|
gap: 8px !important; |
|
align-items: center !important; |
|
} |
|
|
|
.architecture-filter label { |
|
display: flex !important; |
|
align-items: center !important; |
|
border-radius: 8px !important; |
|
padding: 8px 12px !important; |
|
margin: 0 !important; |
|
cursor: pointer !important; |
|
transition: all 0.2s ease !important; |
|
font-weight: 500 !important; |
|
font-size: 14px !important; |
|
min-width: 140px !important; |
|
justify-content: center !important; |
|
border: 2px solid !important; |
|
} |
|
|
|
.architecture-filter label:hover { |
|
transform: translateY(-1px); |
|
box-shadow: 0 2px 8px rgba(0,0,0,0.1) !important; |
|
} |
|
|
|
.architecture-filter input[type="checkbox"] { |
|
display: none !important; |
|
} |
|
|
|
/* Llama styling */ |
|
.architecture-filter label:nth-child(1) { |
|
background: #fffbf0 !important; |
|
border-color: #f7e6a3 !important; |
|
color: #8b4513 !important; |
|
} |
|
|
|
.architecture-filter label:nth-child(1):has(input[type="checkbox"]:checked) { |
|
background: #f4a261 !important; |
|
border-color: #f4a261 !important; |
|
color: white !important; |
|
box-shadow: 0 2px 4px rgba(244, 162, 97, 0.3) !important; |
|
} |
|
|
|
/* DeepSeek styling */ |
|
.architecture-filter label:nth-child(2) { |
|
background: #f0f8ff !important; |
|
border-color: #b3d9ff !important; |
|
color: #1e40af !important; |
|
} |
|
|
|
.architecture-filter label:nth-child(2):has(input[type="checkbox"]:checked) { |
|
background: #3b82f6 !important; |
|
border-color: #3b82f6 !important; |
|
color: white !important; |
|
box-shadow: 0 2px 4px rgba(59, 130, 246, 0.3) !important; |
|
} |
|
|
|
/* Qwen styling */ |
|
.architecture-filter label:nth-child(3) { |
|
background: #f5fff5 !important; |
|
border-color: #b3ffb3 !important; |
|
color: #15803d !important; |
|
} |
|
|
|
.architecture-filter label:nth-child(3):has(input[type="checkbox"]:checked) { |
|
background: #22c55e !important; |
|
border-color: #22c55e !important; |
|
color: white !important; |
|
box-shadow: 0 2px 4px rgba(34, 197, 94, 0.3) !important; |
|
} |
|
|
|
/* Google styling */ |
|
.architecture-filter label:nth-child(4) { |
|
background: #fff0f5 !important; |
|
border-color: #ffb3d9 !important; |
|
color: #be185d !important; |
|
} |
|
|
|
.architecture-filter label:nth-child(4):has(input[type="checkbox"]:checked) { |
|
background: #ec4899 !important; |
|
border-color: #ec4899 !important; |
|
color: white !important; |
|
box-shadow: 0 2px 4px rgba(236, 72, 153, 0.3) !important; |
|
} |
|
|
|
/* Mistral styling */ |
|
.architecture-filter label:nth-child(5) { |
|
background: #faf5ff !important; |
|
border-color: #d8b4fe !important; |
|
color: #7c3aed !important; |
|
} |
|
|
|
.architecture-filter label:nth-child(5):has(input[type="checkbox"]:checked) { |
|
background: #8b5cf6 !important; |
|
border-color: #8b5cf6 !important; |
|
color: white !important; |
|
box-shadow: 0 2px 4px rgba(139, 92, 246, 0.3) !important; |
|
} |
|
|
|
/* Others styling */ |
|
.architecture-filter label:nth-child(6) { |
|
background: #f8fafc !important; |
|
border-color: #cbd5e1 !important; |
|
color: #475569 !important; |
|
} |
|
|
|
.architecture-filter label:nth-child(6):has(input[type="checkbox"]:checked) { |
|
background: #64748b !important; |
|
border-color: #64748b !important; |
|
color: white !important; |
|
box-shadow: 0 2px 4px rgba(100, 116, 139, 0.3) !important; |
|
} |
|
|
|
/* Search and Filter Section Styling */ |
|
.search-input input { |
|
border: 2px solid #e9ecef !important; |
|
border-radius: 12px !important; |
|
padding: 12px 16px !important; |
|
font-size: 14px !important; |
|
transition: all 0.3s ease !important; |
|
background: linear-gradient(135deg, #f8f9fa 0%, #ffffff 100%) !important; |
|
} |
|
|
|
.search-input input:focus { |
|
border-color: #6366f1 !important; |
|
box-shadow: 0 0 0 3px rgba(99, 102, 241, 0.1) !important; |
|
background: white !important; |
|
} |
|
|
|
.search-input input::placeholder { |
|
color: #6b7280 !important; |
|
font-style: italic !important; |
|
} |
|
|
|
/* Modern Sort Dropdown Styling */ |
|
.sort-dropdown-modern label { |
|
font-weight: 600 !important; |
|
color: #374151 !important; |
|
margin-bottom: 8px !important; |
|
} |
|
|
|
.sort-dropdown-modern .wrap { |
|
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%) !important; |
|
border-radius: 12px !important; |
|
padding: 2px !important; |
|
border: none !important; |
|
} |
|
|
|
.sort-dropdown-modern select { |
|
background: white !important; |
|
border: none !important; |
|
border-radius: 10px !important; |
|
padding: 12px 16px !important; |
|
font-size: 14px !important; |
|
font-weight: 500 !important; |
|
color: #374151 !important; |
|
cursor: pointer !important; |
|
transition: all 0.3s ease !important; |
|
box-shadow: 0 2px 4px rgba(0,0,0,0.1) !important; |
|
} |
|
|
|
.sort-dropdown-modern select:hover { |
|
box-shadow: 0 4px 8px rgba(0,0,0,0.15) !important; |
|
transform: translateY(-1px) !important; |
|
} |
|
|
|
.sort-dropdown-modern select:focus { |
|
outline: none !important; |
|
box-shadow: 0 0 0 3px rgba(99, 102, 241, 0.2) !important; |
|
} |
|
|
|
/* Section Headers */ |
|
h3 { |
|
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%) !important; |
|
-webkit-background-clip: text !important; |
|
-webkit-text-fill-color: transparent !important; |
|
background-clip: text !important; |
|
margin-bottom: 12px !important; |
|
} |
|
|
|
/* Centered Architecture Section */ |
|
.centered-title { |
|
text-align: center !important; |
|
} |
|
|
|
.centered-filter > div { |
|
display: flex !important; |
|
flex-wrap: wrap !important; |
|
gap: 8px !important; |
|
align-items: center !important; |
|
justify-content: center !important; |
|
} |
|
|
|
.size-filter { |
|
margin-top: 10px; |
|
} |
|
|
|
/* Dark Mode Specific Styles */ |
|
@media (prefers-color-scheme: dark) { |
|
.leaderboard-table { |
|
background: #1f2937 !important; |
|
color: #f9fafb !important; |
|
} |
|
|
|
.leaderboard-table th { |
|
background-color: #374151 !important; |
|
color: #f9fafb !important; |
|
border-bottom: 2px solid #4b5563 !important; |
|
} |
|
|
|
.leaderboard-table td { |
|
color: #f9fafb !important; |
|
border-bottom: 1px solid #374151 !important; |
|
} |
|
|
|
.leaderboard-table tbody tr:hover { |
|
background-color: #374151 !important; |
|
} |
|
|
|
.rank-cell { |
|
background-color: #374151 !important; |
|
color: #f9fafb !important; |
|
} |
|
|
|
.model-cell { |
|
color: #f9fafb !important; |
|
} |
|
|
|
.size-cell { |
|
color: #d1d5db !important; |
|
} |
|
|
|
.score-cell { |
|
color: #f9fafb !important; |
|
} |
|
|
|
/* Dark mode row colors with better contrast */ |
|
.llama-row { |
|
background-color: rgba(245, 158, 11, 0.1) !important; |
|
} |
|
|
|
.llama-row:hover { |
|
background-color: rgba(245, 158, 11, 0.2) !important; |
|
} |
|
|
|
.deepseek-row { |
|
background-color: rgba(59, 130, 246, 0.1) !important; |
|
} |
|
|
|
.deepseek-row:hover { |
|
background-color: rgba(59, 130, 246, 0.2) !important; |
|
} |
|
|
|
.qwen-row { |
|
background-color: rgba(34, 197, 94, 0.1) !important; |
|
} |
|
|
|
.qwen-row:hover { |
|
background-color: rgba(34, 197, 94, 0.2) !important; |
|
} |
|
|
|
.google-row { |
|
background-color: rgba(236, 72, 153, 0.2) !important; |
|
} |
|
|
|
.google-row:hover { |
|
background-color: rgba(236, 72, 153, 0.2) !important; |
|
} |
|
|
|
.mistral-row { |
|
background-color: rgba(139, 92, 246, 0.1) !important; |
|
} |
|
|
|
.mistral-row:hover { |
|
background-color: rgba(139, 92, 246, 0.2) !important; |
|
} |
|
|
|
.others-row { |
|
background-color: rgba(107, 114, 128, 0.1) !important; |
|
} |
|
|
|
.others-row:hover { |
|
background-color: rgba(107, 114, 128, 0.2) !important; |
|
} |
|
|
|
.leaderboard-container { |
|
border: 1px solid #4b5563 !important; |
|
} |
|
|
|
.model-cell { |
|
color: #f9fafb !important; |
|
} |
|
|
|
.model-link { |
|
color: #60a5fa !important; |
|
} |
|
|
|
.model-link:hover { |
|
color: #93c5fd !important; |
|
border-bottom: 1px solid #60a5fa !important; |
|
background-color: rgba(96, 165, 250, 0.1) !important; |
|
} |
|
|
|
.size-cell { |
|
color: #d1d5db !important; |
|
} |
|
} |
|
""" |
|
|
|
|
|
if __name__ == "__main__": |
|
app.launch() |
|
|