advanced

Running on CPU Upgrade

App Files Files Community

Alina Lozovskaya commited on Mar 5

Commit

d1ed69b

1 Parent(s): 4111351

First commit

Browse files

Files changed (5) hide show

.gitignore +45 -0
Dockerfile +30 -0
app.py +152 -0
pyproject.toml +26 -0
uv.lock +0 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,45 @@

+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+logs/
+log_backups/
+plots/
+files/
+.gradio/
+fr.sh
+repo_contents.txt
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/

Dockerfile ADDED Viewed

	@@ -0,0 +1,30 @@

+# Use Python 3.12.1 slim image as base
+FROM python:3.12.1-slim
+# Install dependencies required for UV and Python packages
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    curl ca-certificates git && \
+    rm -rf /var/lib/apt/lists/*
+# Install UV (fast Python dependency manager)
+RUN curl -LsSf https://astral.sh/uv/install.sh | sh
+# Ensure UV is available in PATH
+ENV PATH="/root/.local/bin:$PATH"
+# Set working directory
+WORKDIR /app
+# Copy pyproject and install dependencies using UV
+COPY pyproject.toml .
+RUN uv venv && uv sync
+# Copy application code
+COPY app.py .
+# Expose Gradio app port
+EXPOSE 7860
+ENV GRADIO_SERVER_NAME="0.0.0.0"
+# Entrypoint to run the Gradio app
+ENTRYPOINT ["uv", "run", "python", "app.py"]

app.py ADDED Viewed

	@@ -0,0 +1,152 @@

+import os
+import time
+import pathlib
+import threading
+import shutil
+import gradio as gr
+import yaml
+import io
+from loguru import logger
+from yourbench.pipeline import run_pipeline
+UPLOAD_DIRECTORY = pathlib.Path("/app/uploaded_files")
+UPLOAD_DIRECTORY.mkdir(parents=True, exist_ok=True)
+CONFIG_PATH = pathlib.Path("/app/yourbench_config.yml")
+yourbench_log_stream = io.StringIO()
+def custom_log_handler(message):
+    yourbench_log_stream.write(message + "\n")
+    # yourbench_log_stream.flush()
+def get_log_content():
+    yourbench_log_stream.seek(0)
+    content = yourbench_log_stream.read()
+    print(len(content))
+    return content
+logger.add(custom_log_handler, filter="yourbench")
+def start_task():
+    # Start the long-running task in a separate thread
+    task_thread = threading.Thread(target=run_pipeline, args=(CONFIG_PATH,), daemon=True)
+    task_thread.start()
+    task_thread.join()
+def generate_config(
+        hf_token,
+        hf_org,
+        model_name,
+        provider,
+        base_url,
+        api_key,
+        max_concurrent_requests,
+        ingestion_source,
+        ingestion_output,
+        run_ingestion,
+        summarization_source,
+        summarization_output,
+        run_summarization
+    ):
+    """Generates a config.yaml based on user inputs"""
+    config = {
+        "hf_configuration": {
+            "token": hf_token,
+            "private": True,
+            "hf_organization": hf_org
+        },
+        "model_list": [{
+            "model_name": model_name,
+            "provider": provider,
+            "base_url": base_url,
+            "api_key": api_key,
+            "max_concurrent_requests": max_concurrent_requests
+        }],
+        "pipeline": {
+            "ingestion": {
+                "source_documents_dir": ingestion_source,
+                "output_dir": ingestion_output,
+                "run": run_ingestion
+            },
+            "summarization": {
+                "source_dataset_name": summarization_source,
+                "output_dataset_name": summarization_output,
+                "run": run_summarization
+            }
+        }
+    }
+    return yaml.dump(config, default_flow_style=False)
+def save_config(yaml_text):
+    with open(CONFIG_PATH, "w") as file:
+        file.write(yaml_text)
+    return "✅ Config saved as config.yaml!"
+def save_files(files: list[str]):
+    saved_paths = []
+    for file in files:
+        file_path = pathlib.Path(file)
+        save_path = UPLOAD_DIRECTORY / file_path.name
+        shutil.move(str(file_path), str(save_path))
+        saved_paths.append(str(save_path))
+    return f"Files have been successfully saved to: {', '.join(saved_paths)}"
+def start_youbench():
+    run_pipeline(CONFIG_PATH, debug=False)
+app = gr.Blocks()
+with app:
+    gr.Markdown("## YourBench Configuration")
+    with gr.Tab("HF Configuration"):
+        hf_token = gr.Textbox(label="HF Token")
+        hf_org = gr.Textbox(label="HF Organization")
+    with gr.Tab("Model Settings"):
+        model_name = gr.Textbox(label="Model Name")
+        provider = gr.Dropdown(["openrouter", "openai", "huggingface"], value="huggingface", label="Provider")
+        base_url = gr.Textbox(label="Base URL")
+        api_key = gr.Textbox(label="API Key")
+        max_concurrent_requests = gr.Dropdown([8, 16, 32], value=16, label="Max Concurrent Requests")
+    with gr.Tab("Pipeline Stages"):
+        ingestion_source = gr.Textbox(label="Ingestion Source Directory")
+        ingestion_output = gr.Textbox(label="Ingestion Output Directory")
+        run_ingestion = gr.Checkbox(label="Run Ingestion", value=False)
+        summarization_source = gr.Textbox(label="Summarization Source Dataset")
+        summarization_output = gr.Textbox(label="Summarization Output Dataset")
+        run_summarization = gr.Checkbox(label="Run Summarization", value=False)
+    with gr.Tab("Config"):
+        config_output = gr.Code(label="Generated Config", language="yaml")
+        preview_button = gr.Button("Generate Config")
+        save_button = gr.Button("Save Config")
+        preview_button.click(generate_config,
+                            inputs=[hf_token, hf_org, model_name, provider, base_url, api_key,
+                                    max_concurrent_requests, ingestion_source, ingestion_output,
+                                    run_ingestion, summarization_source, summarization_output, run_summarization],
+                            outputs=config_output)
+        save_button.click(save_config, inputs=[config_output], outputs=[gr.Textbox(label="Save Status")])
+    with gr.Tab("Files"):
+        file_input = gr.File(label="Upload text files", file_count="multiple", file_types=[".txt", ".md", ".html"])
+        file_explorer = gr.FileExplorer(root_dir=UPLOAD_DIRECTORY, interactive=False, label="Current Files")
+        output = gr.Textbox(label="Log")
+        file_input.upload(save_files, file_input, output)
+    with gr.Tab("Run Generation"):
+        log_output = gr.Code(label="Log Output", language=None,lines=20, interactive=False)
+        start_button = gr.Button("Start Long-Running Task")
+        timer = gr.Timer(0.5, active=True)
+        timer.tick(get_log_content, outputs=log_output)
+        start_button.click(start_task)
+app.launch()

pyproject.toml ADDED Viewed

	@@ -0,0 +1,26 @@

+[project]
+name = "yourbench-space"
+version = "0.1.0"
+requires-python = ">=3.12, <3.13"
+dependencies = [
+    "yourbench @ git+https://github.com/huggingface/yourbench.git@v0.2-alpha-summarization",
+    "asyncio>=3.4.3",
+    "datasets>=3.3.0",
+    "gradio>=5.20.0",
+    "hf-transfer>=0.1.9",
+    "langfuse>=2.59.3",
+    "litellm>=1.61.16",
+    "loguru>=0.7.3",
+    "markitdown>=0.0.1a4",
+    "matplotlib>=3.10.0",
+    "openai>=1.63.0",
+    "python-dotenv>=1.0.1",
+    "torch>=2.6.0",
+    "tqdm>=4.67.1",
+    "transformers>=4.48.3",
+]
+[build-system]
+requires = ["setuptools>=61.0"]
+build-backend = "setuptools.build_meta"

uv.lock ADDED Viewed

The diff for this file is too large to render. See raw diff