Spaces:
Running
Running
| import threading | |
| import gradio as gr | |
| import gradio.components as grc | |
| import pandas as pd | |
| import requests | |
| import uvicorn | |
| from apscheduler.schedulers.background import BackgroundScheduler | |
| from rich import print | |
| from src.about import ( | |
| CITATION_BUTTON_LABEL, | |
| CITATION_BUTTON_TEXT, | |
| EVALUATION_QUEUE_TEXT, | |
| INTRODUCTION_TEXT, | |
| LLM_BENCHMARKS_TEXT, | |
| TITLE, | |
| get_benchmarks, | |
| ) | |
| from src.backend.app import create_app | |
| from src.display.css_html_js import ( | |
| backend_status_indicator_css, | |
| backend_status_indicator_html, | |
| backend_status_js, | |
| custom_css, | |
| ) | |
| from src.display.utils import ( | |
| BASE_COLS, | |
| BENCHMARK_COLS, | |
| EVAL_COLS, | |
| EVAL_TYPES, | |
| NOT_SUPPORTED_COLS, | |
| AutoEvalColumn, | |
| ModelType, | |
| Precision, | |
| WeightType, | |
| ) | |
| from src.envs import API, settings | |
| from src.populate import get_evaluation_queue_df, get_leaderboard_df | |
| from src.prepare import prepare_space | |
| from src.submission.submit import add_new_submit | |
| prepare_space() | |
| BENCHMARKS = get_benchmarks() | |
| def restart_space(): | |
| API.restart_space(repo_id=settings.REPO_ID) | |
| print("///// --- Settings --- /////", settings.model_dump()) | |
| # LEADERBOARD_DF = get_leaderboard_df( | |
| # settings.EVAL_RESULTS_PATH, | |
| # settings.EVAL_REQUESTS_PATH, | |
| # COLS, | |
| # BENCHMARK_COLS, | |
| # ) | |
| ( | |
| finished_eval_queue_df, | |
| running_eval_queue_df, | |
| pending_eval_queue_df, | |
| ) = get_evaluation_queue_df(settings.EVAL_REQUESTS_PATH, EVAL_COLS) | |
| def format_dataframe_with_styler(df: pd.DataFrame): | |
| """ | |
| 使用 pandas Styler 格式化 DataFrame 中的数字列,保留1位小数 | |
| 返回 Styler 对象,可以直接传递给 Gradio Dataframe | |
| """ | |
| # 先截断数据(如果需要截断而不是四舍五入) | |
| df = df.copy() | |
| numeric_cols = {} | |
| for col in df.columns: | |
| if col in ['Model', 'T']: # 跳过非数字列 | |
| continue | |
| # 检查是否为数值类型 | |
| if pd.api.types.is_numeric_dtype(df[col]): | |
| # 记录需要格式化的列: 保留1位小数 | |
| numeric_cols[col] = "{:.1f}" | |
| # 使用 Styler 格式化显示 | |
| if numeric_cols: | |
| return df.style.format(numeric_cols) | |
| return df | |
| def filter_dataframe_by_columns(selected_cols: list[str], original_df: pd.DataFrame) -> pd.DataFrame: | |
| """ | |
| 根据选择的列过滤 DataFrame | |
| """ | |
| # # 始终包含基础列 'T' 和 'Model' | |
| # base_cols = ['T', 'Model'] | |
| base_cols = ['Model'] | |
| all_selected_cols = [col for col in base_cols if col in original_df.columns] | |
| # 添加用户选择的列(排除已存在的基础列) | |
| for col in selected_cols: | |
| if col in original_df.columns and col not in all_selected_cols: | |
| all_selected_cols.append(col) | |
| # 确保列的顺序:基础列在前,然后是按原始顺序的选中列 | |
| ordered_cols = [] | |
| for col in original_df.columns: | |
| if col in all_selected_cols: | |
| ordered_cols.append(col) | |
| # 确保总是返回 DataFrame,即使是单列也使用 [[]] 来保持 DataFrame 类型 | |
| if ordered_cols: | |
| filtered_df = original_df.loc[:, ordered_cols] | |
| else: | |
| filtered_df = original_df | |
| return filtered_df | |
| def filter_dataframe_by_precision(selected_precisions: list[str], df: pd.DataFrame) -> pd.DataFrame: | |
| """ | |
| 根据选择的 precision 筛选 DataFrame | |
| 如果没有选择 precision,返回空的 DataFrame | |
| """ | |
| if not selected_precisions: | |
| return df.iloc[0:0].copy() # 返回相同结构但为空的 DataFrame | |
| precision_col = AutoEvalColumn.precision.name | |
| if precision_col not in df.columns: | |
| return df | |
| # 筛选包含任一选定 precision 的行 | |
| mask = df[precision_col].isin(selected_precisions) | |
| filtered_df = df.loc[mask, :] | |
| return filtered_df | |
| def search_models_in_dataframe(search_text: str, df: pd.DataFrame) -> pd.DataFrame: | |
| """ | |
| 在 DataFrame 中搜索包含关键词的 Model 名称 | |
| 支持逗号分隔的多个关键词,匹配包含任一关键词的行 | |
| """ | |
| if not search_text or not search_text.strip(): | |
| return df | |
| # 分割逗号,去除空白并转换为小写用于匹配 | |
| import re | |
| keywords = [keyword.strip().lower() for keyword in search_text.split(',') if keyword.strip()] | |
| if not keywords: | |
| return df | |
| if 'Model' not in df.columns: | |
| return df | |
| # 匹配函数:从 HTML 中提取纯文本并检查是否包含关键词 | |
| def matches_search(model_cell): | |
| if pd.isna(model_cell): | |
| return False | |
| # 从 HTML 链接中提取纯文本(model_name) | |
| # 格式: <a ...>model_name</a> 或直接是文本 | |
| text = str(model_cell) | |
| # 提取 HTML 标签内的文本 | |
| # 匹配 <a>...</a> 标签内的内容,或直接使用文本 | |
| match = re.search(r'<a[^>]*>([^<]+)</a>', text, re.IGNORECASE) | |
| if match: | |
| model_name = match.group(1).lower() | |
| else: | |
| model_name = text.lower() | |
| # 检查是否包含任一关键词 | |
| return any(keyword in model_name for keyword in keywords) | |
| # 应用搜索过滤 | |
| mask = df['Model'].apply(matches_search) | |
| filtered_df = df.loc[mask, :] | |
| return filtered_df | |
| def init_leaderboard_tabs( | |
| dataframe: pd.DataFrame, | |
| cols: list[str], | |
| not_supported_cols: list[str], | |
| ): | |
| # 存储原始 DataFrame 以便后续过滤使用(使用闭包保存) | |
| original_df = dataframe.copy() | |
| available_precisions = sorted(original_df["Precision"].dropna().unique().tolist()) | |
| default_precision = ( | |
| ['bfloat16'] | |
| if 'bfloat16' in available_precisions | |
| else (available_precisions[:1] if available_precisions else []) | |
| ) | |
| # 初始化显示的列(包含基础列和默认选中的列) | |
| default_selected = [col for col in dataframe.columns if col in cols and col not in not_supported_cols] + [ | |
| 'Average ⬆️' | |
| ] | |
| # 先按 precision 筛选 original_df | |
| precision_filtered_df = filter_dataframe_by_precision(default_precision, original_df) | |
| # 根据默认选择再筛选一次 DataFrame | |
| initial_filtered_df = filter_dataframe_by_columns(default_selected, precision_filtered_df) | |
| # 使用 Styler 格式化数字列,保留1位小数 | |
| initial_styler = format_dataframe_with_styler(initial_filtered_df) | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| search = gr.Textbox(label="Search", placeholder="Separate multiple queries with commas") | |
| column_choices = [ | |
| col | |
| for col in dataframe.columns | |
| if col not in ['T', 'Model'] and (not not_supported_cols or col not in not_supported_cols) | |
| ] | |
| show_columns = gr.CheckboxGroup( | |
| choices=column_choices, | |
| label="Select Columns to Display", | |
| value=default_selected, | |
| interactive=True, | |
| ) | |
| with gr.Column(scale=1, visible=False): | |
| _model_type = gr.CheckboxGroup( | |
| [], | |
| label="Model Type", | |
| value=[], | |
| ) | |
| precision = gr.CheckboxGroup( | |
| choices=available_precisions, | |
| label="Precision", | |
| value=default_precision, | |
| interactive=True, | |
| ) | |
| _hide_models = gr.CheckboxGroup( | |
| ['Deleted/incomplete'], | |
| label="Hide Models", | |
| value=['Deleted/incomplete'], | |
| interactive=True, | |
| ) | |
| with gr.Row(): | |
| with gr.Column(scale=3): | |
| leaderboard = gr.Dataframe( | |
| value=initial_styler, # 使用 Styler 对象格式化显示 | |
| interactive=False, | |
| wrap=False, | |
| datatype='markdown', | |
| elem_id="auto-width-dataframe", | |
| ) | |
| # 统一的更新函数:同时处理 precision、列筛选和搜索 | |
| def update_dataframe(search_text: str, selected_cols: list[str], selected_precisions: list[str]): | |
| # 先按 precision 筛选 original_df | |
| precision_filtered_df = filter_dataframe_by_precision(selected_precisions, original_df) | |
| # 再按列筛选 | |
| column_filtered_df = filter_dataframe_by_columns(selected_cols, precision_filtered_df) | |
| # 最后按搜索关键词筛选 | |
| final_df = search_models_in_dataframe(search_text, column_filtered_df) | |
| # 使用 Styler 格式化数字列,保留1位小数 | |
| final_styler = format_dataframe_with_styler(final_df) | |
| return final_styler | |
| # 绑定搜索、列选择和 precision 的变化事件,动态更新 DataFrame | |
| search.change( | |
| fn=update_dataframe, | |
| inputs=[search, show_columns, precision], | |
| outputs=leaderboard, | |
| ) | |
| show_columns.change( | |
| fn=update_dataframe, | |
| inputs=[search, show_columns, precision], | |
| outputs=leaderboard, | |
| ) | |
| precision.change( | |
| fn=update_dataframe, | |
| inputs=[search, show_columns, precision], | |
| outputs=leaderboard, | |
| ) | |
| return leaderboard | |
| def main(): | |
| demo = gr.Blocks(css_paths=[custom_css, backend_status_indicator_css]) | |
| with demo: | |
| gr.HTML(TITLE) | |
| gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text") | |
| with gr.Tabs(elem_classes="tab-buttons") as _tabs: | |
| with gr.TabItem("📝 Overview", elem_id="benchmark-overview-tab", id=0): | |
| benchmark_cols = BENCHMARK_COLS.copy() | |
| print("benchmark_cols:", benchmark_cols) | |
| cols = BASE_COLS + benchmark_cols | |
| benchmark_df = get_leaderboard_df( | |
| settings.EVAL_RESULTS_PATH, | |
| settings.EVAL_REQUESTS_PATH, | |
| cols, | |
| benchmark_cols, | |
| ) | |
| _leaderboard = init_leaderboard_tabs(benchmark_df, benchmark_cols, NOT_SUPPORTED_COLS) | |
| i_bench = 1 | |
| if settings.ENABLE_BENCHMARK_TABS: | |
| for i_bench, benchmark in enumerate(sorted(BENCHMARKS), start=1): | |
| with gr.TabItem(f"🏅 {benchmark.title}", elem_id="llm-benchmark-tab-table", id=i_bench): | |
| print(f"benchmark.title: {benchmark.title!r}") | |
| benchmark_cols = [col for col in BENCHMARK_COLS if col.startswith(benchmark.title)] | |
| cols = BASE_COLS + benchmark_cols | |
| benchmark_df = get_leaderboard_df( | |
| settings.EVAL_RESULTS_PATH, | |
| settings.EVAL_REQUESTS_PATH, | |
| cols, | |
| benchmark_cols, | |
| ) | |
| _leaderboard = init_leaderboard_tabs(benchmark_df, benchmark_cols, NOT_SUPPORTED_COLS) | |
| with gr.TabItem("📝 About", elem_id="about-tab", id=i_bench + 1): | |
| gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text") | |
| if settings.ENABLE_SUBMISSION: | |
| with gr.TabItem("🚀 Submit here! ", elem_id="submit-tab", id=i_bench + 2): | |
| with gr.Column(): | |
| with gr.Row(): | |
| gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text") | |
| with gr.Column(): | |
| with gr.Accordion( | |
| f"✅ Finished Evaluations ({len(finished_eval_queue_df)})", | |
| open=False, | |
| ): | |
| with gr.Row(): | |
| _finished_eval_table = grc.Dataframe( | |
| value=finished_eval_queue_df, | |
| headers=EVAL_COLS, | |
| datatype=EVAL_TYPES, | |
| row_count=5, | |
| ) | |
| with gr.Accordion( | |
| f"🔄 Running Evaluation Queue ({len(running_eval_queue_df)})", | |
| open=False, | |
| ): | |
| with gr.Row(): | |
| _running_eval_table = grc.Dataframe( | |
| value=running_eval_queue_df, | |
| headers=EVAL_COLS, | |
| datatype=EVAL_TYPES, | |
| row_count=5, | |
| ) | |
| with gr.Accordion( | |
| f"⏳ Pending Evaluation Queue ({len(pending_eval_queue_df)})", | |
| open=False, | |
| ): | |
| with gr.Row(): | |
| _pending_eval_table = grc.Dataframe( | |
| value=pending_eval_queue_df, | |
| headers=EVAL_COLS, | |
| datatype=EVAL_TYPES, | |
| row_count=5, | |
| ) | |
| with gr.Row(): | |
| gr.Markdown("# ✉️✨ Submit your model here!", elem_classes="markdown-text") | |
| with gr.Row(): | |
| search_name = gr.Textbox(label="search model name", placeholder="user/model_name") | |
| with gr.Row(): | |
| table = gr.Dataframe( | |
| headers=["Model Name", "Pipeline", "Downloads", "Likes"], | |
| datatype=["str", "str", "number", "number"], | |
| interactive=False, | |
| wrap=True, | |
| label="click model name to select", | |
| ) | |
| with gr.Row(): | |
| with gr.Column(): | |
| model_name_textbox = gr.Textbox(label="Model name", placeholder="user/model_name") | |
| revision_name_textbox = gr.Textbox(label="Revision commit", placeholder="main") | |
| model_type = gr.Dropdown( | |
| choices=[t.to_str(" : ") for t in ModelType if t != ModelType.Unknown], | |
| label="Model type", | |
| multiselect=False, | |
| value=None, | |
| interactive=True, | |
| ) | |
| def search_models(query): | |
| if not query.strip(): | |
| return [] | |
| models = API.list_models(search=query, limit=10) | |
| results = [] | |
| for m in models: | |
| results.append([m.id, m.pipeline_tag or "N/A", m.downloads or 0, m.likes or 0]) | |
| return results | |
| def on_select(evt: gr.SelectData, data): | |
| row_idx = evt.index[0] # 获取点击行号 | |
| if row_idx < len(data): | |
| return data.iloc[row_idx, 0] # 返回模型名 | |
| return "" | |
| search_name.change(fn=search_models, inputs=search_name, outputs=table) | |
| table.select(fn=on_select, inputs=table, outputs=model_name_textbox) | |
| def file_to_json_str(file) -> str: | |
| """ | |
| 读取上传的 JSON 文件并返回字符串 | |
| 如果解析失败,抛出异常以阻止上传成功 | |
| """ | |
| if file is None: | |
| return "" | |
| try: | |
| import json | |
| # file 是文件路径字符串(当 type="filepath" 时) | |
| file_path = file if isinstance(file, str) else file.name | |
| with open(file_path, encoding='utf-8') as f: | |
| json_data = json.load(f) | |
| # 将 JSON 对象转换为格式化的字符串 | |
| json_str = json.dumps(json_data, indent=2, ensure_ascii=False) | |
| return json_str | |
| except Exception as e: | |
| raise gr.Error(f"Error reading JSON file: {str(e)}") | |
| json_upload = gr.File( | |
| label="Upload JSON file", | |
| file_types=[".json"], | |
| type="filepath", | |
| ) | |
| json_str = gr.Textbox( | |
| label="JSON Content", | |
| placeholder="JSON content will appear here after upload", | |
| lines=10, | |
| interactive=True, | |
| visible=False, | |
| ) | |
| json_upload.upload( | |
| fn=file_to_json_str, | |
| inputs=json_upload, | |
| outputs=json_str, | |
| ) | |
| with gr.Column(): | |
| precision = gr.Dropdown( | |
| choices=[i.value.name for i in Precision if i != Precision.Unknown], | |
| label="Precision", | |
| multiselect=False, | |
| value="float16", | |
| interactive=True, | |
| ) | |
| weight_type = gr.Dropdown( | |
| choices=[i.value.name for i in WeightType], | |
| label="Weights type", | |
| multiselect=False, | |
| value="Original", | |
| interactive=True, | |
| ) | |
| base_model_name_textbox = gr.Textbox(label="Base model (for delta or adapter weights)") | |
| commit_textbox = gr.Textbox(label="Commits") | |
| submit_button = gr.Button("Submit Eval") | |
| submission_result = gr.Markdown() | |
| submit_button.click( | |
| add_new_submit, | |
| [ | |
| model_name_textbox, | |
| base_model_name_textbox, | |
| revision_name_textbox, | |
| precision, | |
| weight_type, | |
| json_str, | |
| commit_textbox, | |
| ], | |
| submission_result, | |
| ) | |
| # Backend status indicator | |
| backend_status = gr.HTML( | |
| value=get_backend_status_undefined_html(), | |
| elem_id="backend-status-container", | |
| ) | |
| # trigger button to bind the click event | |
| status_trigger = gr.Button(elem_id="backend-status-trigger-btn", visible=False) | |
| status_trigger.click( | |
| fn=lambda: check_backend_health()[1], | |
| inputs=None, | |
| outputs=backend_status, | |
| ) | |
| # load external JavaScript file | |
| js_content = backend_status_js() | |
| status_trigger_js_html = f'<script>{js_content}</script>' | |
| gr.HTML(status_trigger_js_html, visible=False) | |
| demo.load( | |
| fn=lambda: check_backend_health()[1], | |
| inputs=None, | |
| outputs=backend_status, | |
| ) | |
| with gr.Row(): | |
| with gr.Accordion("📙 Citation", open=False): | |
| _citation_button = gr.Textbox( | |
| value=CITATION_BUTTON_TEXT, | |
| label=CITATION_BUTTON_LABEL, | |
| lines=20, | |
| elem_id="citation-button", | |
| show_copy_button=True, | |
| ) | |
| return demo | |
| def get_backend_status_undefined_html() -> str: | |
| """ | |
| 返回未定义状态(首次检查前)的 HTML | |
| """ | |
| return backend_status_indicator_html("undefined") | |
| def check_backend_health() -> tuple[bool, str]: | |
| """ | |
| 查询后端健康状态 | |
| 返回: (is_healthy, status_html) | |
| """ | |
| try: | |
| response = requests.get(f"http://localhost:{settings.BACKEND_PORT}/api/v1/health/", timeout=2) | |
| if response.status_code == 200: | |
| data = response.json() | |
| if data.get("code") == 0: | |
| return ( | |
| True, | |
| backend_status_indicator_html("healthy"), | |
| ) | |
| return ( | |
| False, | |
| backend_status_indicator_html("unhealthy"), | |
| ) | |
| except Exception: | |
| return ( | |
| False, | |
| backend_status_indicator_html("unhealthy"), | |
| ) | |
| if __name__ == "__main__": | |
| demo = main() | |
| # Backend server - 在单独的线程中运行 | |
| app = create_app() | |
| def run_fastapi(): | |
| host = settings.BACKEND_HOST | |
| port = settings.BACKEND_PORT | |
| print("Starting FastAPI server:") | |
| uvicorn.run( | |
| app, | |
| host=host, | |
| port=port, | |
| log_level="debug", | |
| access_log=True, | |
| ) | |
| fastapi_thread = threading.Thread(target=run_fastapi, daemon=True) | |
| fastapi_thread.start() | |
| # Gradio server - 在主线程中运行(阻塞) | |
| scheduler = BackgroundScheduler() | |
| scheduler.add_job(restart_space, "interval", seconds=1800) | |
| scheduler.start() | |
| demo.queue(default_concurrency_limit=40).launch() | |