Spaces:

PrunaAI
/

InferBench

Running

File size: 7,952 Bytes

1c9c07a
099bd02
 
8f93924
 
a538c9d
8f93924
1c9c07a
 
36e8f6e
a538c9d
36e8f6e
f7399f1
8f93924
1c9c07a
f7399f1
1c9c07a
14b802d
 
1c9c07a
 
 
f7399f1
 
099bd02
 
1c9c07a
d0765d4
f7399f1
 
 
 
721ac40
 
 
 
 
 
 
 
 
 
f7399f1
1c9c07a
 
099bd02
e82e1f8
1c9c07a
099bd02
1c9c07a
a65206d
1c9c07a
 
 
 
8f93924
099bd02
f7399f1
099bd02
 
36e8f6e
721ac40
 
f7399f1
721ac40
 
f7399f1
 
 
 
8f93924
6a09e8e
 
 
 
 
f60ead4
 
 
 
 
f7399f1
f60ead4
f7399f1
 
 
f60ead4
 
 
 
 
 
f7399f1
 
 
 
 
f60ead4
b42cb0b
f60ead4
 
 
 
 
 
b42cb0b
f60ead4
f7399f1
f60ead4
 
 
 
 
f7399f1
f60ead4
f7399f1
f60ead4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53b28ed
f60ead4
 
83c9fd8
1c9c07a
 
099bd02
1c9c07a
 
 
 
 
a65206d
8f93924
975aae7
 
 
1c9c07a
 
 
 
 
 
 
 
 
975aae7
099bd02

import json
from pathlib import Path

import gradio as gr
import pandas as pd
from gradio_leaderboard import Leaderboard

from assets import custom_css

# override method to avoid bugg
Leaderboard.raise_error_if_incorrect_config = lambda self: None

abs_path = Path(__file__).parent / "data"

# Load the JSONL file into a pandas DataFrame using the json library
with open(abs_path / "text_to_image.jsonl", "r") as file:
    json_data = file.read()
    partially_fixed_json_data = json_data.replace("}\n{", "},\n{")
    fixed_json_data = f"[{partially_fixed_json_data}]"
    json_data = json.loads(fixed_json_data)
df = pd.DataFrame(json_data)

df["URL"] = df.apply(
    lambda row: f'<a target="_blank" href="{row["URL"]}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">link</a>',
    axis=1,
)
df = df[
    ["URL", "Platform", "Owner", "Device", "Model", "Optimization", "Median Inference Time", "Price per Image"]
    + [
        col
        for col in df.columns.tolist()
        if col
        not in [
            "URL",
            "Model",
            "Median Inference Time",
            "Price per Image",
            "Platform",
            "Owner",
            "Device",
            "Optimization",
        ]
    ]
]
df = df.sort_values(by="GenEval", ascending=False)

with gr.Blocks("ParityError/Interstellar", fill_width=True, css=custom_css) as demo:
    gr.HTML(
        """
            <div style="text-align: center;">
                <img src="https://huggingface.co/datasets/PrunaAI/documentation-images/resolve/main/inferbench/logo2-cropped.png" style="width: 200px; height: auto; max-width: 100%; margin: 0 auto;">
                <h1>🏋️ InferBench 🏋️</h1>
                <h2>A cost/quality/speed Leaderboard for Inference Providers!</h2>
            </div>
            """
    )
    with gr.Tabs():
        with gr.TabItem("Text-to-Image Leaderboard"):
            Leaderboard(
                value=df,
                select_columns=df.columns.tolist(),
                datatype=["markdown", "markdown", "markdown", "markdown", "markdown", "markdown"]
                + ["number"] * (len(df.columns.tolist()) - 6),
                filter_columns=[
                    "Platform",
                    "Owner",
                    "Device",
                    "Model",
                    "Optimization",
                ],
            )
            gr.Markdown(
                """
                > **💡 Note:** Each efficiency metric and quality metric captures only one dimension of model capacity. Rankings may vary when considering other metrics.
                """
            )
        with gr.TabItem("About"):
            with gr.Row():
                with gr.Column():
                    gr.Markdown(
                        """
                        # 📊 Text-to-Image Leaderboard

                        This leaderboard compares the performance of different text-to-image providers.

                        We started with a comprehensive benchmark comparing our very own FLUX-juiced with the “FLUX.1 [dev]” endpoints offered by:

                        - Replicate: https://replicate.com/black-forest-labs/flux-dev
                        - Fal: https://fal.ai/models/fal-ai/flux/dev
                        - Fireworks AI: https://fireworks.ai/models/fireworks/flux-1-dev-fp8
                        - Together AI: https://www.together.ai/models/flux-1-dev

                        We also included the following non-FLUX providers:

                        - AWS Nova Canvas: https://aws.amazon.com/ai/generative-ai/nova/creative/

                        All of these inference providers offer implementations but they don’t always communicate about the optimisation methods used in the background, and most endpoint have different response times and performance measures.

                        For comparison purposes we used the same generation set-up for all the providers.

                        - 28 inference steps
                        - 1024×1024 resolution
                        - Guidance scale of 3.5
                        - H100 GPU (80GB)—only reported by Replicate

                        Although we did test with this specific Pruna configuration and hardware, the applied compression methods work with different config and hardware too!

                        > We published a full blog post on [the creation of our FLUX-juiced endpoint](https://www.pruna.ai/blog/flux-juiced-the-fastest-image-generation-endpoint).
                        """
                    )
                with gr.Column():
                    gr.Markdown(
                        """
                        # 🧃 FLUX.1-dev (juiced)

                        FLUX.1-dev (juiced) is our optimized version of FLUX.1-dev, delivering up to **2.6x faster inference** than the official Replicate API, **without sacrificing image quality**.

                        Under the hood, it uses a custom combination of:

                        - **Graph compilation** for optimized execution paths
                        - **Inference-time caching** for repeated operations

                        We won’t go deep into the internals here, but here’s the gist:

                        > We combine compiler-level execution graph optimization with selective caching of heavy operations (like attention layers), allowing inference to skip redundant computations without any loss in fidelity.

                        These techniques are generalized and plug-and-play via the **Pruna Pro** pipeline, and can be applied to nearly any diffusion-based image model—not just FLUX. For a free but still very juicy model you can use our open source solution.

                        > 🧪 Try FLUX-juiced now → [replicate.com/prunaai/flux.1-juiced](https://replicate.com/prunaai/flux.1-juiced)

                        ## Sample Images

                        The prompts were randomly sampled from the [parti-prompts dataset](https://github.com/google-research/parti). The reported times represent the full duration of each API call.

                        > **For samples, check out the [Pruna Notion page](https://pruna.notion.site/FLUX-1-dev-vs-Pruna-s-FLUX-juiced-1d270a039e5f80c6a2a3c00fc0d75ef0)**
                        """
                    )

        with gr.Accordion("🌍 Join the Pruna AI community!", open=False):
            gr.HTML(
                """
                    <a rel="nofollow" href="https://twitter.com/PrunaAI"><img alt="Twitter" src="https://img.shields.io/twitter/follow/PrunaAI?style=social"></a>
                    <a rel="nofollow" href="https://github.com/PrunaAI/pruna"><img alt="GitHub" src="https://img.shields.io/github/stars/prunaai/pruna"></a>
                    <a rel="nofollow" href="https://www.linkedin.com/company/93832878/admin/feed/posts/?feedType=following"><img alt="LinkedIn" src="https://img.shields.io/badge/LinkedIn-Connect-blue"></a>
                    <a rel="nofollow" href="https://discord.com/invite/rskEr4BZJx"><img alt="Discord" src="https://img.shields.io/badge/Discord-Join%20Us-blue?style=social&amp;logo=discord"></a>
                    <a rel="nofollow" href="https://www.reddit.com/r/PrunaAI/"><img alt="Reddit" src="https://img.shields.io/reddit/subreddit-subscribers/PrunaAI?style=social"></a>
                """
            )
        with gr.Accordion("Citation", open=True):
            gr.Markdown(
                """
                ```bibtex
                @article{InferBench,
                    title={InferBench: A Leaderboard for Inference Providers},
                    author={PrunaAI},
                    year={2025},
                    howpublished={\\url{https://huggingface.co/spaces/PrunaAI/InferBench}}
                }
                ```
                """
            )
if __name__ == "__main__":
    demo.launch()