Spaces:

PrunaAI
/

InferBench

Running

App Files Files Community

davidberenstein1957 commited on Jul 15

Commit

bcec9c2

1 Parent(s): 8c2887a

feat: add Gradio dashboard and leaderboard functionality with updated dependencies

Browse files

Files changed (4) hide show

README.md +34 -0
dashboard/app.py +195 -0
dashboard/data/text_to_image.jsonl +9 -0
pyproject.toml +2 -0

README.md CHANGED Viewed

@@ -1,4 +1,20 @@
 # InferBench
 Evaluate the quality and efficiency of image gen api's.
 ## Installation
@@ -35,3 +51,21 @@ This is how you would evaluate the benchmarks once you have all images:
 ```
 python evaluate.py replicate draw_bench genai_bench geneval hps parti
 ```

+---
+title: InferBench
+emoji: 🥇
+colorFrom: green
+colorTo: indigo
+sdk: gradio
+app_file: dashboard/app.py
+pinned: true
+license: apache-2.0
+short_description: A cost/quality/speed Leaderboard for Inference Providers!
+sdk_version: 5.19.0
+tags:
+- leaderboard
+---
 # InferBench
 Evaluate the quality and efficiency of image gen api's.
 ## Installation
 ```
 python evaluate.py replicate draw_bench genai_bench geneval hps parti
 ```
+## Dashboard
+To run the dashboard, you can use the following command:
+```
+python dashboard/app.py
+```
+To deploy the dashboard, you can use the following commands
+````
+git remote add hf https://huggingface.co/spaces/PrunaAI/InferBench
+````
+```
+git push hf main
+```

dashboard/app.py ADDED Viewed

	@@ -0,0 +1,195 @@

+from pathlib import Path
+import gradio as gr
+import pandas as pd
+from gradio_leaderboard import Leaderboard
+custom_css = """
+.logo {
+    width: 300px;
+    height: auto;
+    max-width: 100%;
+    margin: 0 auto;
+    object-fit: contain;
+    padding-bottom: 0;
+}
+.text {
+    font-size: 16px !important;
+}
+.tabs button {
+    font-size: 20px;
+}
+.subtabs button {
+    font-size: 20px;
+}
+h1, h2 {
+    margin: 0;
+    padding-top: 0;
+}
+"""
+# override method to avoid bugg
+Leaderboard.raise_error_if_incorrect_config = lambda self: None
+abs_path = Path(__file__).parent / "data"
+# Load the JSONL file into a pandas DataFrame using the json library
+df = pd.read_json(abs_path / "text_to_image.jsonl", lines=True)
+df["URL"] = df.apply(
+    lambda row: f'<a target="_blank" href="{row["URL"]}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">link</a>',
+    axis=1,
+)
+df = df[
+    [
+        "URL",
+        "Platform",
+        "Owner",
+        "Device",
+        "Model",
+        "Optimization",
+        "Median Inference Time",
+        "Price per Image",
+    ]
+    + [
+        col
+        for col in df.columns.tolist()
+        if col
+        not in [
+            "URL",
+            "Model",
+            "Median Inference Time",
+            "Price per Image",
+            "Platform",
+            "Owner",
+            "Device",
+            "Optimization",
+        ]
+    ]
+]
+df = df.sort_values(by="GenEval", ascending=False)
+with gr.Blocks("ParityError/Interstellar", fill_width=True, css=custom_css) as demo:
+    gr.HTML(
+        """
+            <div style="text-align: center;">
+                <img src="https://huggingface.co/datasets/PrunaAI/documentation-images/resolve/main/inferbench/logo2-cropped.png" style="width: 200px; height: auto; max-width: 100%; margin: 0 auto;">
+                <h1>🏋️ InferBench 🏋️</h1>
+                <h2>A cost/quality/speed Leaderboard for Inference Providers!</h2>
+            </div>
+            """
+    )
+    with gr.Tabs():
+        with gr.TabItem("Text-to-Image Leaderboard"):
+            Leaderboard(
+                value=df,
+                select_columns=df.columns.tolist(),
+                datatype=[
+                    "markdown",
+                    "markdown",
+                    "markdown",
+                    "markdown",
+                    "markdown",
+                    "markdown",
+                ]
+                + ["number"] * (len(df.columns.tolist()) - 6),
+                filter_columns=[
+                    "Platform",
+                    "Owner",
+                    "Device",
+                    "Model",
+                    "Optimization",
+                ],
+            )
+            gr.Markdown(
+                """
+                > **💡 Note:** Each efficiency metric and quality metric captures only one dimension of model capacity. Rankings may vary when considering other metrics.
+                """
+            )
+        with gr.TabItem("About"):
+            with gr.Row():
+                with gr.Column():
+                    gr.Markdown(
+                        """
+                        # 📊 Text-to-Image Leaderboard
+                        This leaderboard compares the performance of different text-to-image providers.
+                        We started with a comprehensive benchmark comparing our very own FLUX-juiced with the “FLUX.1 [dev]” endpoints offered by:
+                        - Replicate: https://replicate.com/black-forest-labs/flux-dev
+                        - Fal: https://fal.ai/models/fal-ai/flux/dev
+                        - Fireworks AI: https://fireworks.ai/models/fireworks/flux-1-dev-fp8
+                        - Together AI: https://www.together.ai/models/flux-1-dev
+                        We also included the following non-FLUX providers:
+                        - AWS Nova Canvas: https://aws.amazon.com/ai/generative-ai/nova/creative/
+                        All of these inference providers offer implementations but they don’t always communicate about the optimisation methods used in the background, and most endpoint have different response times and performance measures.
+                        For comparison purposes we used the same generation set-up for all the providers.
+                        - 28 inference steps
+                        - 1024×1024 resolution
+                        - Guidance scale of 3.5
+                        - H100 GPU (80GB)—only reported by Replicate
+                        Although we did test with this specific Pruna configuration and hardware, the applied compression methods work with different config and hardware too!
+                        > We published a full blog post on [the creation of our FLUX-juiced endpoint](https://www.pruna.ai/blog/flux-juiced-the-fastest-image-generation-endpoint).
+                        """
+                    )
+                with gr.Column():
+                    gr.Markdown(
+                        """
+                        # 🧃 FLUX.1-dev (juiced)
+                        FLUX.1-dev (juiced) is our optimized version of FLUX.1-dev, delivering up to **2.6x faster inference** than the official Replicate API, **without sacrificing image quality**.
+                        Under the hood, it uses a custom combination of:
+                        - **Graph compilation** for optimized execution paths
+                        - **Inference-time caching** for repeated operations
+                        We won’t go deep into the internals here, but here’s the gist:
+                        > We combine compiler-level execution graph optimization with selective caching of heavy operations (like attention layers), allowing inference to skip redundant computations without any loss in fidelity.
+                        These techniques are generalized and plug-and-play via the **Pruna Pro** pipeline, and can be applied to nearly any diffusion-based image model—not just FLUX. For a free but still very juicy model you can use our open source solution.
+                        > 🧪 Try FLUX-juiced now → [replicate.com/prunaai/flux.1-juiced](https://replicate.com/prunaai/flux.1-juiced)
+                        ## Sample Images
+                        The prompts were randomly sampled from the [parti-prompts dataset](https://github.com/google-research/parti). The reported times represent the full duration of each API call.
+                        > **For samples, check out the [Pruna Notion page](https://pruna.notion.site/FLUX-1-dev-vs-Pruna-s-FLUX-juiced-1d270a039e5f80c6a2a3c00fc0d75ef0)**
+                        """
+                    )
+        with gr.Accordion("🌍 Join the Pruna AI community!", open=False):
+            gr.HTML(
+                """
+                    <a rel="nofollow" href="https://twitter.com/PrunaAI"><img alt="Twitter" src="https://img.shields.io/twitter/follow/PrunaAI?style=social"></a>
+                    <a rel="nofollow" href="https://github.com/PrunaAI/pruna"><img alt="GitHub" src="https://img.shields.io/github/stars/prunaai/pruna"></a>
+                    <a rel="nofollow" href="https://www.linkedin.com/company/93832878/admin/feed/posts/?feedType=following"><img alt="LinkedIn" src="https://img.shields.io/badge/LinkedIn-Connect-blue"></a>
+                    <a rel="nofollow" href="https://discord.com/invite/rskEr4BZJx"><img alt="Discord" src="https://img.shields.io/badge/Discord-Join%20Us-blue?style=social&amp;logo=discord"></a>
+                    <a rel="nofollow" href="https://www.reddit.com/r/PrunaAI/"><img alt="Reddit" src="https://img.shields.io/reddit/subreddit-subscribers/PrunaAI?style=social"></a>
+                """
+            )
+        with gr.Accordion("Citation", open=True):
+            gr.Markdown(
+                """
+                ```bibtex
+                @article{InferBench,
+                    title={InferBench: A Leaderboard for Inference Providers},
+                    author={PrunaAI},
+                    year={2025},
+                    howpublished={\\url{https://huggingface.co/spaces/PrunaAI/InferBench}}
+                }
+                ```
+                """
+            )
+if __name__ == "__main__":
+    demo.launch()

dashboard/data/text_to_image.jsonl ADDED Viewed

	@@ -0,0 +1,9 @@

+{"Platform": "Replicate", "Owner": "Pruna AI", "Device": "1xH100", "Model": "FLUX.1-dev", "Optimization": "none", "URL": "https://huggingface.co/black-forest-labs/FLUX.1-dev?library=diffusers", "GenEval": 67.98, "HPS (v2.1)": 30.36, "GenAI-Bench (VQA)": 0.74, "DrawBench (Image Reward)": 1.0072, "PartiPromts (ARNIQA)": 0.6758, "PartiPromts (ClipIQA)": 0.8968, "PartiPromts (ClipScore)": 27.4, "PartiPromts (Sharpness - Laplacian Variance)": 6833, "Median Inference Time": 6.88, "Price per Image": 0.025}
+{"Platform": "fal.ai", "Owner": "fal.ai", "Device": "Undisclosed", "Model": "FLUX.1-dev", "Optimization": "Undisclosed", "URL": "https://fal.ai/models/fal-ai/flux/dev", "GenEval": 68.72, "HPS (v2.1)": 29.97, "GenAI-Bench (VQA)": 0.7441, "DrawBench (Image Reward)": 1.0084, "PartiPromts (ARNIQA)": 0.6702, "PartiPromts (ClipIQA)": 0.8967, "PartiPromts (ClipScore)": 27.61, "PartiPromts (Sharpness - Laplacian Variance)": 7295, "Median Inference Time": 4.06, "Price per Image": 0.025}
+{"Platform": "Fireworks AI", "Owner": "Fireworks AI", "Device": "Undisclosed", "Model": "FLUX.1-dev", "Optimization": "fp8", "URL": "https://fireworks.ai/models/fireworks/flux-1-dev-fp8", "GenEval": 65.55, "HPS (v2.1)": 30.26, "GenAI-Bench (VQA)": 0.7455, "DrawBench (Image Reward)": 0.9467, "PartiPromts (ARNIQA)": 0.6639, "PartiPromts (ClipIQA)": 0.8478, "PartiPromts (ClipScore)": 27.24, "PartiPromts (Sharpness - Laplacian Variance)": 5625, "Median Inference Time": 4.66, "Price per Image": 0.014}
+{"Platform": "Replicate", "Owner": "Pruna AI", "Device": "1xH100", "Model": "FLUX.1-dev", "Optimization": "extra juiced", "URL": "https://replicate.com/prunaai/flux.1-juiced", "GenEval": 69.9, "HPS (v2.1)": 29.86, "GenAI-Bench (VQA)": 0.7466, "DrawBench (Image Reward)": 0.9458, "PartiPromts (ARNIQA)": 0.6591, "PartiPromts (ClipIQA)": 0.8887, "PartiPromts (ClipScore)": 27.6, "PartiPromts (Sharpness - Laplacian Variance)": 7997, "Median Inference Time": 2.6, "Price per Image": 0.004}
+{"Platform": "Replicate", "Owner": "Pruna AI", "Device": "1xH100", "Model": "FLUX.1-dev", "Optimization": "juiced", "URL": "https://replicate.com/prunaai/flux.1-juiced", "GenEval": 68.64, "HPS (v2.1)": 30.38, "GenAI-Bench (VQA)": 0.7408, "DrawBench (Image Reward)": 0.9657, "PartiPromts (ARNIQA)": 0.6762, "PartiPromts (ClipIQA)": 0.9014, "PartiPromts (ClipScore)": 27.55, "PartiPromts (Sharpness - Laplacian Variance)": 7627, "Median Inference Time": 3.14, "Price per Image": 0.0048}
+{"Platform": "Replicate", "Owner": "Pruna AI", "Device": "1xH100", "Model": "FLUX.1-dev", "Optimization": "lightly juiced", "URL": "https://replicate.com/prunaai/flux.1-lightly-juiced", "GenEval": 69.12, "HPS (v2.1)": 30.36, "GenAI-Bench (VQA)": 0.7405, "DrawBench (Image Reward)": 0.9972, "PartiPromts (ARNIQA)": 0.6789, "PartiPromts (ClipIQA)": 0.9031, "PartiPromts (ClipScore)": 27.56, "PartiPromts (Sharpness - Laplacian Variance)": 7849, "Median Inference Time": 3.57, "Price per Image": 0.0054}
+{"Platform": "Replicate", "Owner": "Black Forest Labs", "Device": "1xH100", "Model": "FLUX.1-dev", "Optimization": "go_fast", "URL": "https://replicate.com/black-forest-labs/flux-dev", "GenEval": 67.41, "HPS (v2.1)": 29.25, "GenAI-Bench (VQA)": 0.7547, "DrawBench (Image Reward)": 0.9282, "PartiPromts (ARNIQA)": 0.6356, "PartiPromts (ClipIQA)": 0.8609, "PartiPromts (ClipScore)": 27.56, "PartiPromts (Sharpness - Laplacian Variance)": 4872, "Median Inference Time": 3.38, "Price per Image": 0.025}
+{"Platform": "Together AI", "Owner": "Together AI", "Device": "Undisclosed", "Model": "FLUX.1-dev", "Optimization": "Undisclosed", "URL": "https://www.together.ai/models/flux-1-dev", "GenEval": 64.61, "HPS (v2.1)": 30.22, "GenAI-Bench (VQA)": 0.7339, "DrawBench (Image Reward)": 0.9463, "PartiPromts (ARNIQA)": 0.5752, "PartiPromts (ClipIQA)": 0.8709, "PartiPromts (ClipScore)": 27.31, "PartiPromts (Sharpness - Laplacian Variance)": 4501, "Median Inference Time": 3.38, "Price per Image": 0.025}
+{"Platform": "AWS", "Owner": "AWS", "Device": "Undisclosed", "Model": "AWS Nova Canvas", "Optimization": "Undisclosed", "URL": "https://aws.amazon.com/ai/generative-ai/nova/creative/", "GenEval": null, "HPS (v2.1)": null, "GenAI-Bench (VQA)": null, "DrawBench (Image Reward)": 1.07, "PartiPromts (ARNIQA)": 0.65, "PartiPromts (ClipIQA)": 0.954, "PartiPromts (ClipScore)": 28.1, "PartiPromts (Sharpness - Laplacian Variance)": 10514, "Median Inference Time": 3.65, "Price per Image": null}

pyproject.toml CHANGED Viewed

@@ -24,6 +24,8 @@ dependencies = [
     "diffusers<=0.31",
     "piq>=0.8.0",
     "boto3>=1.39.4",
 ]
 [tool.hatch.build.targets.wheel]

     "diffusers<=0.31",
     "piq>=0.8.0",
     "boto3>=1.39.4",
+    "gradio>=5.37.0",
+    "gradio-leaderboard>=0.0.14",
 ]
 [tool.hatch.build.targets.wheel]