Spaces:

prithivMLmods
/

Qwen-Image-Diffusion

Running on Zero

App Files Files Community

prithivMLmods commited on 7 days ago

Commit

5439aa4

verified ·

1 Parent(s): 10814c3

Update app.py

Browse files

Files changed (1) hide show

app.py +124 -119

app.py CHANGED Viewed

@@ -12,7 +12,7 @@ import zipfile
 from transformers import CLIPTextModel, CLIPTokenizer, T5EncoderModel, T5TokenizerFast
 # Description for the app
-DESCRIPTION = """## flux comparator hpc/."""
 # Helper functions
 def save_image(img):
@@ -28,24 +28,19 @@ def randomize_seed_fn(seed: int, randomize_seed: bool) -> int:
 MAX_SEED = np.iinfo(np.int32).max
 MAX_IMAGE_SIZE = 2048
-# Load pipelines for both models
-# Flux.1-dev-realism
-base_model_dev = "prithivMLmods/Flux.1-Merged" # Merge of (black-forest-labs/FLUX.1-dev + black-forest-labs/FLUX.1-schnell)
-pipe_dev = DiffusionPipeline.from_pretrained(base_model_dev, torch_dtype=torch.bfloat16)
-lora_repo = "strangerzonehf/Flux-Super-Realism-LoRA"
-trigger_word = "Super Realism"
-pipe_dev.load_lora_weights(lora_repo)
-pipe_dev.to("cuda")
-# Flux.1-krea
 dtype = torch.bfloat16
 device = "cuda" if torch.cuda.is_available() else "cpu"
 taef1 = AutoencoderTiny.from_pretrained("madebyollin/taef1", torch_dtype=dtype).to(device)
-# Merge of (black-forest-labs/FLUX.1-dev + https://huggingface.co/black-forest-labs/FLUX.1-Krea-dev)
 good_vae = AutoencoderKL.from_pretrained("prithivMLmods/Flux.1-Krea-Merged-Dev", subfolder="vae", torch_dtype=dtype).to(device)
 pipe_krea = DiffusionPipeline.from_pretrained("prithivMLmods/Flux.1-Krea-Merged-Dev", torch_dtype=dtype, vae=taef1).to(device)
-# Define the flux_pipe_call_that_returns_an_iterable_of_images for flux.1-krea
 @torch.inference_mode()
 def flux_pipe_call_that_returns_an_iterable_of_images(
     self,
@@ -167,7 +162,7 @@ def flux_pipe_call_that_returns_an_iterable_of_images(
 pipe_krea.flux_pipe_call_that_returns_an_iterable_of_images = flux_pipe_call_that_returns_an_iterable_of_images.__get__(pipe_krea)
-# Helper functions for flux.1-krea
 def calculate_shift(
     image_seq_len,
     base_seq_len: int = 256,
@@ -203,67 +198,48 @@ def retrieve_timesteps(
         timesteps = scheduler.timesteps
     return timesteps, num_inference_steps
-# Styles for flux.1-dev-realism
-style_list = [
-    {"name": "3840 x 2160", "prompt": "hyper-realistic 8K image of {prompt}. ultra-detailed, lifelike, high-resolution, sharp, vibrant colors, photorealistic", "negative_prompt": ""},
-    {"name": "2560 x 1440", "prompt": "hyper-realistic 4K image of {prompt}. ultra-detailed, lifelike, high-resolution, sharp, vibrant colors, photorealistic", "negative_prompt": ""},
-    {"name": "HD+", "prompt": "hyper-realistic 2K image of {prompt}. ultra-detailed, lifelike, high-resolution, sharp, vibrant colors, photorealistic", "negative_prompt": ""},
-    {"name": "Style Zero", "prompt": "{prompt}", "negative_prompt": ""},
-]
-styles = {k["name"]: (k["prompt"], k["negative_prompt"]) for k in style_list}
-DEFAULT_STYLE_NAME = "Style Zero"
-STYLE_NAMES = list(styles.keys())
-def apply_style(style_name: str, positive: str) -> Tuple[str, str]:
-    p, n = styles.get(style_name, styles[DEFAULT_STYLE_NAME])
-    return p.replace("{prompt}", positive), n
-# Generation function for flux.1-dev-realism
 @spaces.GPU
-def generate_dev(
     prompt: str,
-    negative_prompt: str = "",
-    use_negative_prompt: bool = False,
     seed: int = 0,
     width: int = 1024,
     height: int = 1024,
-    guidance_scale: float = 3,
     randomize_seed: bool = False,
-    style_name: str = DEFAULT_STYLE_NAME,
-    num_inference_steps: int = 30,
     num_images: int = 1,
     zip_images: bool = False,
     progress=gr.Progress(track_tqdm=True),
 ):
-    positive_prompt, style_negative_prompt = apply_style(style_name, prompt)
-    if use_negative_prompt:
-        final_negative_prompt = style_negative_prompt + " " + negative_prompt
-    else:
-        final_negative_prompt = style_negative_prompt
-    final_negative_prompt = final_negative_prompt.strip()
-    if trigger_word:
-        positive_prompt = f"{trigger_word} {positive_prompt}"
-    seed = int(randomize_seed_fn(seed, randomize_seed))
-    generator = torch.Generator(device="cuda").manual_seed(seed)
     start_time = time.time()
-    images = pipe_dev(
-        prompt=positive_prompt,
-        negative_prompt=final_negative_prompt if final_negative_prompt else None,
-        width=width,
-        height=height,
-        guidance_scale=guidance_scale,
-        num_inference_steps=num_inference_steps,
-        num_images_per_prompt=num_images,
-        generator=generator,
-        output_type="pil",
-    ).images
     end_time = time.time()
     duration = end_time - start_time
@@ -280,39 +256,38 @@ def generate_dev(
     return image_paths, seed, f"{duration:.2f}", zip_path
-# Generation function for flux.1-krea
 @spaces.GPU
-def generate_krea(
     prompt: str,
     seed: int = 0,
     width: int = 1024,
     height: int = 1024,
-    guidance_scale: float = 4.5,
     randomize_seed: bool = False,
-    num_inference_steps: int = 28,
     num_images: int = 1,
     zip_images: bool = False,
     progress=gr.Progress(track_tqdm=True),
 ):
     if randomize_seed:
         seed = random.randint(0, MAX_SEED)
-    generator = torch.Generator().manual_seed(seed)
     start_time = time.time()
-    images = []
-    for _ in range(num_images):
-        final_img = list(pipe_krea.flux_pipe_call_that_returns_an_iterable_of_images(
-            prompt=prompt,
-            guidance_scale=guidance_scale,
-            num_inference_steps=num_inference_steps,
-            width=width,
-            height=height,
-            generator=generator,
-            output_type="pil",
-            good_vae=good_vae,
-        ))[-1]  # Take the final image only
-        images.append(final_img)
     end_time = time.time()
     duration = end_time - start_time
@@ -329,7 +304,7 @@ def generate_krea(
     return image_paths, seed, f"{duration:.2f}", zip_path
-# Main generation function to handle model choice
 @spaces.GPU
 def generate(
     model_choice: str,
@@ -339,33 +314,31 @@ def generate(
     seed: int = 0,
     width: int = 1024,
     height: int = 1024,
-    guidance_scale: float = 3,
     randomize_seed: bool = False,
-    style_name: str = DEFAULT_STYLE_NAME,
-    num_inference_steps: int = 30,
     num_images: int = 1,
     zip_images: bool = False,
     progress=gr.Progress(track_tqdm=True),
 ):
-    if model_choice == "flux.1-dev-merged":
-        return generate_dev(
             prompt=prompt,
-            negative_prompt=negative_prompt,
-            use_negative_prompt=use_negative_prompt,
             seed=seed,
             width=width,
             height=height,
             guidance_scale=guidance_scale,
             randomize_seed=randomize_seed,
-            style_name=style_name,
             num_inference_steps=num_inference_steps,
             num_images=num_images,
             zip_images=zip_images,
             progress=progress,
         )
-    elif model_choice == "flux.1-krea-merged-dev":
-        return generate_krea(
             prompt=prompt,
             seed=seed,
             width=width,
             height=height,
@@ -379,12 +352,12 @@ def generate(
     else:
         raise ValueError("Invalid model choice")
-# Examples
 examples = [
-    "An attractive young woman with blue eyes lying face down on the bed, in the style of animated gifs, light white and light amber, jagged edges, the snapshot aesthetic, timeless beauty, goosepunk, sunrays shine upon it --no freckles --chaos 65 --ar 1:2 --profile yruxpc2 --stylize 750 --v 6.1",
-    "Headshot of handsome young man, wearing dark gray sweater with buttons and big shawl collar, brown hair and short beard, serious look on his face, black background, soft studio lighting, portrait photography --ar 85:128 --v 6.0 --style",
-    "Purple Dreamy, a medium-angle shot of a young woman with long brown hair, wearing a pair of eye-level glasses, stands in front of a backdrop of purple and white lights.",
-    "High-resolution photograph, woman, UHD, photorealistic, shot on a Sony A7III --chaos 20 --ar 1:2 --style raw --stylize 250"
 ]
 css = '''
@@ -415,21 +388,23 @@ with gr.Blocks(css=css, theme="bethecloud/storj_theme") as demo:
     result = gr.Gallery(label="Result", columns=1, show_label=False, preview=True)
     with gr.Row():
-    # Model choice radio button above additional options
         model_choice = gr.Radio(
-            choices=["flux.1-dev-merged", "flux.1-krea-merged-dev"],
             label="Select Model",
-            value="flux.1-dev-merged"
         )
     with gr.Accordion("Additional Options", open=False):
-        style_selection = gr.Dropdown(
-            label="Quality Style (for flux.1-dev-realism only)",
-            choices=STYLE_NAMES,
-            value=DEFAULT_STYLE_NAME,
-            interactive=True,
         )
-        use_negative_prompt = gr.Checkbox(label="Use negative prompt (for flux.1-dev-realism only)", value=False)
         negative_prompt = gr.Text(
             label="Negative prompt",
             max_lines=1,
@@ -461,7 +436,7 @@ with gr.Blocks(css=css, theme="bethecloud/storj_theme") as demo:
             )
         guidance_scale = gr.Slider(
             label="Guidance Scale",
-            minimum=0.1,
             maximum=20.0,
             step=0.1,
             value=3.5,
@@ -469,7 +444,7 @@ with gr.Blocks(css=css, theme="bethecloud/storj_theme") as demo:
         num_inference_steps = gr.Slider(
             label="Number of inference steps",
             minimum=1,
-            maximum=40,
             step=1,
             value=28,
         )
@@ -487,26 +462,48 @@ with gr.Blocks(css=css, theme="bethecloud/storj_theme") as demo:
         generation_time = gr.Textbox(label="Generation time (seconds)", interactive=False)
         zip_file = gr.File(label="Download ZIP")
-    gr.Examples(
-        examples=examples,
-        inputs=prompt,
-        outputs=[result, seed_display, generation_time, zip_file],
-        fn=generate,
-        cache_examples=False,
     )
     use_negative_prompt.change(
         fn=lambda x: gr.update(visible=x),
         inputs=use_negative_prompt,
-        outputs=negative_prompt,
-        api_name=False,
     )
     gr.on(
-        triggers=[
-            prompt.submit,
-            run_button.click,
-        ],
         fn=generate,
         inputs=[
             model_choice,
@@ -518,7 +515,6 @@ with gr.Blocks(css=css, theme="bethecloud/storj_theme") as demo:
             height,
             guidance_scale,
             randomize_seed,
-            style_selection,
             num_inference_steps,
             num_images,
             zip_images,
@@ -527,5 +523,14 @@ with gr.Blocks(css=css, theme="bethecloud/storj_theme") as demo:
         api_name="run",
     )
 if __name__ == "__main__":
     demo.queue(max_size=30).launch(mcp_server=True, ssr_mode=False, show_error=True)

 from transformers import CLIPTextModel, CLIPTokenizer, T5EncoderModel, T5TokenizerFast
 # Description for the app
+DESCRIPTION = """## flux-krea vs qwen"""
 # Helper functions
 def save_image(img):
 MAX_SEED = np.iinfo(np.int32).max
 MAX_IMAGE_SIZE = 2048
+# Load pipelines
 dtype = torch.bfloat16
 device = "cuda" if torch.cuda.is_available() else "cpu"
+# Flux.1-krea pipeline
 taef1 = AutoencoderTiny.from_pretrained("madebyollin/taef1", torch_dtype=dtype).to(device)
 good_vae = AutoencoderKL.from_pretrained("prithivMLmods/Flux.1-Krea-Merged-Dev", subfolder="vae", torch_dtype=dtype).to(device)
 pipe_krea = DiffusionPipeline.from_pretrained("prithivMLmods/Flux.1-Krea-Merged-Dev", torch_dtype=dtype, vae=taef1).to(device)
+# Qwen/Qwen-Image pipeline
+pipe_qwen = DiffusionPipeline.from_pretrained("Qwen/Qwen-Image", torch_dtype=dtype).to(device)
+# Define custom flux_pipe_call for Flux.1-krea
 @torch.inference_mode()
 def flux_pipe_call_that_returns_an_iterable_of_images(
     self,
 pipe_krea.flux_pipe_call_that_returns_an_iterable_of_images = flux_pipe_call_that_returns_an_iterable_of_images.__get__(pipe_krea)
+# Helper functions for Flux.1-krea
 def calculate_shift(
     image_seq_len,
     base_seq_len: int = 256,
         timesteps = scheduler.timesteps
     return timesteps, num_inference_steps
+# Aspect ratios
+aspect_ratios = {
+    "1:1": (1328, 1328),
+    "16:9": (1664, 928),
+    "9:16": (928, 1664),
+    "4:3": (1472, 1140),
+    "3:4": (1140, 1472)
+}
+# Generation function for Flux.1-krea
 @spaces.GPU
+def generate_krea(
     prompt: str,
     seed: int = 0,
     width: int = 1024,
     height: int = 1024,
+    guidance_scale: float = 4.5,
     randomize_seed: bool = False,
+    num_inference_steps: int = 28,
     num_images: int = 1,
     zip_images: bool = False,
     progress=gr.Progress(track_tqdm=True),
 ):
+    if randomize_seed:
+        seed = random.randint(0, MAX_SEED)
+    generator = torch.Generator(device).manual_seed(seed)
     start_time = time.time()
+    images = []
+    for _ in range(num_images):
+        final_img = list(pipe_krea.flux_pipe_call_that_returns_an_iterable_of_images(
+            prompt=prompt,
+            guidance_scale=guidance_scale,
+            num_inference_steps=num_inference_steps,
+            width=width,
+            height=height,
+            generator=generator,
+            output_type="pil",
+            good_vae=good_vae,
+        ))[-1]  # Take the final image only
+        images.append(final_img)
     end_time = time.time()
     duration = end_time - start_time
     return image_paths, seed, f"{duration:.2f}", zip_path
+# Generation function for Qwen/Qwen-Image
 @spaces.GPU
+def generate_qwen(
     prompt: str,
+    negative_prompt: str = "",
     seed: int = 0,
     width: int = 1024,
     height: int = 1024,
+    guidance_scale: float = 4.0,
     randomize_seed: bool = False,
+    num_inference_steps: int = 50,
     num_images: int = 1,
     zip_images: bool = False,
     progress=gr.Progress(track_tqdm=True),
 ):
     if randomize_seed:
         seed = random.randint(0, MAX_SEED)
+    generator = torch.Generator(device).manual_seed(seed)
     start_time = time.time()
+    images = pipe_qwen(
+        prompt=prompt,
+        negative_prompt=negative_prompt if negative_prompt else None,
+        height=height,
+        width=width,
+        guidance_scale=guidance_scale,
+        num_inference_steps=num_inference_steps,
+        num_images_per_prompt=num_images,
+        generator=generator,
+        output_type="pil",
+    ).images
     end_time = time.time()
     duration = end_time - start_time
     return image_paths, seed, f"{duration:.2f}", zip_path
+# Main generation function
 @spaces.GPU
 def generate(
     model_choice: str,
     seed: int = 0,
     width: int = 1024,
     height: int = 1024,
+    guidance_scale: float = 3.5,
     randomize_seed: bool = False,
+    num_inference_steps: int = 28,
     num_images: int = 1,
     zip_images: bool = False,
     progress=gr.Progress(track_tqdm=True),
 ):
+    if model_choice == "Flux.1-krea":
+        return generate_krea(
             prompt=prompt,
             seed=seed,
             width=width,
             height=height,
             guidance_scale=guidance_scale,
             randomize_seed=randomize_seed,
             num_inference_steps=num_inference_steps,
             num_images=num_images,
             zip_images=zip_images,
             progress=progress,
         )
+    elif model_choice == "Qwen Image":
+        final_negative_prompt = negative_prompt if use_negative_prompt else ""
+        return generate_qwen(
             prompt=prompt,
+            negative_prompt=final_negative_prompt,
             seed=seed,
             width=width,
             height=height,
     else:
         raise ValueError("Invalid model choice")
+# Examples
 examples = [
+    "An attractive young woman with blue eyes lying face down on the bed, light white and light amber, timeless beauty, sunrays shine upon it",
+    "Headshot of handsome young man, wearing dark gray sweater, brown hair and short beard, serious look, black background, soft studio lighting",
+    "A medium-angle shot of a young woman with long brown hair, wearing glasses, standing in front of purple and white lights",
+    "High-resolution photograph of a woman, photorealistic, vibrant colors"
 ]
 css = '''
     result = gr.Gallery(label="Result", columns=1, show_label=False, preview=True)
     with gr.Row():
         model_choice = gr.Radio(
+            choices=["Flux.1-krea", "Qwen Image"],
             label="Select Model",
+            value="Flux.1-krea"
         )
     with gr.Accordion("Additional Options", open=False):
+        aspect_ratio = gr.Dropdown(
+            label="Aspect Ratio",
+            choices=list(aspect_ratios.keys()),
+            value="1:1",
+        )
+        use_negative_prompt = gr.Checkbox(
+            label="Use negative prompt (Qwen Image only)",
+            value=False,
+            visible=False
         )
         negative_prompt = gr.Text(
             label="Negative prompt",
             max_lines=1,
             )
         guidance_scale = gr.Slider(
             label="Guidance Scale",
+            minimum=0.0,
             maximum=20.0,
             step=0.1,
             value=3.5,
         num_inference_steps = gr.Slider(
             label="Number of inference steps",
             minimum=1,
+            maximum=100,
             step=1,
             value=28,
         )
         generation_time = gr.Textbox(label="Generation time (seconds)", interactive=False)
         zip_file = gr.File(label="Download ZIP")
+    # Update aspect ratio
+    def set_dimensions(ar):
+        w, h = aspect_ratios[ar]
+        return gr.update(value=w), gr.update(value=h)
+    aspect_ratio.change(
+        fn=set_dimensions,
+        inputs=aspect_ratio,
+        outputs=[width, height]
     )
+    # Update model-specific settings
+    def update_settings(mc):
+        if mc == "Flux.1-krea":
+            return (
+                gr.update(value=28),
+                gr.update(value=3.5),
+                gr.update(visible=False)
+            )
+        elif mc == "Qwen Image":
+            return (
+                gr.update(value=50),
+                gr.update(value=4.0),
+                gr.update(visible=True)
+            )
+    model_choice.change(
+        fn=update_settings,
+        inputs=model_choice,
+        outputs=[num_inference_steps, guidance_scale, use_negative_prompt]
+    )
+    # Negative prompt visibility
     use_negative_prompt.change(
         fn=lambda x: gr.update(visible=x),
         inputs=use_negative_prompt,
+        outputs=negative_prompt
     )
+    # Run button and prompt submit
     gr.on(
+        triggers=[prompt.submit, run_button.click],
         fn=generate,
         inputs=[
             model_choice,
             height,
             guidance_scale,
             randomize_seed,
             num_inference_steps,
             num_images,
             zip_images,
         api_name="run",
     )
+    # Examples
+    gr.Examples(
+        examples=examples,
+        inputs=prompt,
+        outputs=[result, seed_display, generation_time, zip_file],
+        fn=generate,
+        cache_examples=False,
+    )
 if __name__ == "__main__":
     demo.queue(max_size=30).launch(mcp_server=True, ssr_mode=False, show_error=True)