Spaces:
Running
on
Zero
Running
on
Zero
feat: Enable MCP
Browse filesHello! This is an automated PR adding MCP compatibility to your AI App 🤖.
This PR introduces two improvements:
1. Adds docstrings to the functions in the app file that are directly connected to the Gradio UI, for the downstream LLM to use.
2. Enables the Model-Compute-Platform by adding `mcp_server=True` to the `.launch()` call.
No other logic has been changed. Please review and merge if it looks good!Learn more about MCP compatibility in Spaces here: https://huggingface.co/changelog/add-compatible-spaces-to-your-mcp-tools
app.py
CHANGED
@@ -227,6 +227,29 @@ def get_duration(
|
|
227 |
randomize_seed: bool,
|
228 |
progress=gr.Progress(track_tqdm=True)
|
229 |
):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
230 |
if width > 768 or height > 768:
|
231 |
return 210
|
232 |
else:
|
@@ -250,193 +273,28 @@ def run_diptych_prompting(
|
|
250 |
randomize_seed: bool,
|
251 |
progress=gr.Progress(track_tqdm=True)
|
252 |
):
|
253 |
-
|
254 |
-
|
255 |
-
else:
|
256 |
-
actual_seed = seed
|
257 |
-
|
258 |
-
if input_image is None: raise gr.Error("Please upload a reference image.")
|
259 |
-
if not full_prompt: raise gr.Error("Full Prompt is empty. Please fill out the prompt fields.")
|
260 |
-
|
261 |
-
# 1. Prepare dimensions and reference image
|
262 |
-
padded_width = width + pixel_offset * 2
|
263 |
-
padded_height = height + pixel_offset * 2
|
264 |
-
diptych_size = (padded_width * 2, padded_height)
|
265 |
-
reference_image = input_image.resize((padded_width, padded_height)).convert("RGB")
|
266 |
|
267 |
-
|
268 |
-
|
269 |
-
|
270 |
-
|
271 |
-
|
272 |
-
|
273 |
-
|
274 |
-
|
275 |
-
|
276 |
-
|
277 |
-
|
278 |
-
|
279 |
-
|
280 |
-
|
281 |
-
|
282 |
-
|
283 |
-
# 4. Setup Attention Processor
|
284 |
-
progress(0.3, desc="Setting up attention processors...")
|
285 |
-
new_attn_procs = base_attn_procs.copy()
|
286 |
-
for k in new_attn_procs:
|
287 |
-
new_attn_procs[k] = CustomFluxAttnProcessor2_0(height=padded_height // 16, width=padded_width * 2 // 16, attn_enforce=attn_enforce)
|
288 |
-
pipe.transformer.set_attn_processor(new_attn_procs)
|
289 |
-
|
290 |
-
# 5. Run Inference
|
291 |
-
progress(0.4, desc="Running diffusion process...")
|
292 |
-
generator = torch.Generator(device="cuda").manual_seed(actual_seed)
|
293 |
-
full_diptych_result = pipe(
|
294 |
-
prompt=full_prompt,
|
295 |
-
height=diptych_size[1],
|
296 |
-
width=diptych_size[0],
|
297 |
-
control_image=diptych_image_prompt,
|
298 |
-
control_mask=mask_image,
|
299 |
-
num_inference_steps=num_steps,
|
300 |
-
generator=generator,
|
301 |
-
controlnet_conditioning_scale=ctrl_scale,
|
302 |
-
guidance_scale=guidance,
|
303 |
-
negative_prompt="",
|
304 |
-
true_guidance_scale=real_guidance
|
305 |
-
).images[0]
|
306 |
-
|
307 |
-
# 6. Final cropping
|
308 |
-
progress(0.95, desc="Finalizing image...")
|
309 |
-
final_image = full_diptych_result.crop((padded_width, 0, padded_width * 2, padded_height))
|
310 |
-
final_image = final_image.crop((pixel_offset, pixel_offset, padded_width - pixel_offset, padded_height - pixel_offset))
|
311 |
-
|
312 |
-
# 7. Return all outputs
|
313 |
-
return final_image, processed_image, full_diptych_result, full_prompt, actual_seed
|
314 |
-
|
315 |
-
|
316 |
-
# --- Gradio UI Definition ---
|
317 |
-
css = '''
|
318 |
-
.gradio-container{max-width: 960px;margin: 0 auto}
|
319 |
-
'''
|
320 |
-
with gr.Blocks(theme=gr.themes.Soft(), css=css) as demo:
|
321 |
-
gr.Markdown(
|
322 |
-
"""
|
323 |
-
# Diptych Prompting: Zero-Shot Subject-Driven & Style-Driven Image Generation
|
324 |
-
### Demo for the paper "[Large-Scale Text-to-Image Model with Inpainting is a Zero-Shot Subject-Driven Image Generator](https://diptychprompting.github.io/)"
|
325 |
-
"""
|
326 |
-
)
|
327 |
-
with gr.Row():
|
328 |
-
with gr.Column(scale=1):
|
329 |
-
input_image = gr.Image(type="pil", label="Reference Image")
|
330 |
-
|
331 |
-
with gr.Group() as subject_driven_group:
|
332 |
-
subject_name = gr.Textbox(label="Subject Name", placeholder="e.g., a plush bear")
|
333 |
-
|
334 |
-
target_prompt = gr.Textbox(label="Target Prompt", placeholder="e.g., riding a skateboard on the moon")
|
335 |
-
|
336 |
-
run_button = gr.Button("Generate Image", variant="primary")
|
337 |
-
|
338 |
-
with gr.Accordion("Advanced Settings", open=False):
|
339 |
-
mode = gr.Radio(["Subject-Driven", "Style-Driven (unstable)"], label="Generation Mode", value="Subject-Driven")
|
340 |
-
with gr.Group(visible=False) as style_driven_group:
|
341 |
-
original_style_description = gr.Textbox(label="Original Image Description", placeholder="e.g., in watercolor painting style")
|
342 |
-
do_segmentation = gr.Checkbox(label="Do Segmentation", value=True)
|
343 |
-
attn_enforce = gr.Slider(minimum=1.0, maximum=2.0, value=1.3, step=0.05, label="Attention Enforcement")
|
344 |
-
full_prompt = gr.Textbox(label="Full Prompt (Auto-generated, editable)", lines=3)
|
345 |
-
ctrl_scale = gr.Slider(minimum=0.5, maximum=1.0, value=0.95, step=0.01, label="ControlNet Scale")
|
346 |
-
num_steps = gr.Slider(minimum=20, maximum=50, value=28, step=1, label="Inference Steps")
|
347 |
-
guidance = gr.Slider(minimum=1.0, maximum=10.0, value=3.5, step=0.1, label="Distilled Guidance Scale")
|
348 |
-
real_guidance = gr.Slider(minimum=1.0, maximum=10.0, value=4.5, step=0.1, label="Real Guidance Scale")
|
349 |
-
width = gr.Slider(minimum=512, maximum=1024, value=768, step=64, label="Image Width")
|
350 |
-
height = gr.Slider(minimum=512, maximum=1024, value=768, step=64, label="Image Height")
|
351 |
-
pixel_offset = gr.Slider(minimum=0, maximum=32, value=8, step=1, label="Padding (Pixel Offset)")
|
352 |
-
seed = gr.Slider(minimum=0, maximum=9223372036854775807, value=42, step=1, label="Seed")
|
353 |
-
randomize_seed = gr.Checkbox(label="Randomize Seed", value=True)
|
354 |
-
|
355 |
-
with gr.Column(scale=1):
|
356 |
-
output_image = gr.Image(type="pil", label="Generated Image")
|
357 |
-
with gr.Accordion("Other Outputs", open=False) as other_outputs_accordion:
|
358 |
-
processed_ref_image = gr.Image(label="Processed Reference (Left Panel)")
|
359 |
-
full_diptych_image = gr.Image(label="Full Diptych Output")
|
360 |
-
final_prompt_used = gr.Textbox(label="Final Prompt Used")
|
361 |
-
|
362 |
-
# --- UI Event Handlers ---
|
363 |
-
|
364 |
-
def toggle_mode_visibility(mode_choice):
|
365 |
-
"""Hides/shows the relevant input textboxes based on mode."""
|
366 |
-
if mode_choice == "Subject-Driven":
|
367 |
-
return gr.update(visible=True), gr.update(visible=False)
|
368 |
-
else:
|
369 |
-
return gr.update(visible=False), gr.update(visible=True)
|
370 |
-
|
371 |
-
def update_derived_fields(mode_choice, subject, style_desc, target):
|
372 |
-
"""Updates the full prompt and segmentation checkbox based on other inputs."""
|
373 |
-
if mode_choice == "Subject-Driven":
|
374 |
-
prompt = f"A diptych with two side-by-side images of same {subject}. On the left, a photo of {subject}. On the right, replicate this {subject} exactly but as {target}"
|
375 |
-
return gr.update(value=prompt), gr.update(value=True)
|
376 |
-
else: # Style-Driven
|
377 |
-
prompt = f"A diptych with two side-by-side images of same style. On the left, {style_desc}. On the right, replicate this style exactly but as {target}"
|
378 |
-
return gr.update(value=prompt), gr.update(value=False)
|
379 |
-
|
380 |
-
# --- UI Connections ---
|
381 |
-
|
382 |
-
# When mode changes, toggle visibility of the specific prompt fields
|
383 |
-
mode.change(
|
384 |
-
fn=toggle_mode_visibility,
|
385 |
-
inputs=mode,
|
386 |
-
outputs=[subject_driven_group, style_driven_group],
|
387 |
-
queue=False
|
388 |
-
)
|
389 |
-
|
390 |
-
# A list of all inputs that affect the full prompt or segmentation checkbox
|
391 |
-
prompt_component_inputs = [mode, subject_name, original_style_description, target_prompt]
|
392 |
-
# A list of the UI elements that are derived from the above inputs
|
393 |
-
derived_outputs = [full_prompt, do_segmentation]
|
394 |
-
|
395 |
-
# When any prompt component changes, update the derived fields
|
396 |
-
for component in prompt_component_inputs:
|
397 |
-
component.change(update_derived_fields, inputs=prompt_component_inputs, outputs=derived_outputs, queue=False, show_progress="hidden")
|
398 |
-
|
399 |
-
run_button.click(
|
400 |
-
fn=run_diptych_prompting,
|
401 |
-
inputs=[
|
402 |
-
input_image, subject_name, do_segmentation, full_prompt, attn_enforce,
|
403 |
-
ctrl_scale, width, height, pixel_offset, num_steps, guidance,
|
404 |
-
real_guidance, seed, randomize_seed
|
405 |
-
],
|
406 |
-
outputs=[output_image, processed_ref_image, full_diptych_image, final_prompt_used, seed]
|
407 |
-
)
|
408 |
-
def run_subject_driven_example(input_image, subject_name, target_prompt):
|
409 |
-
# Construct the full prompt for subject-driven mode
|
410 |
-
full_prompt = f"A diptych with two side-by-side images of same {subject_name}. On the left, a photo of {subject_name}. On the right, replicate this {subject_name} exactly but as {target_prompt}"
|
411 |
|
412 |
-
|
413 |
-
|
414 |
-
|
415 |
-
|
416 |
-
|
417 |
-
full_prompt=full_prompt,
|
418 |
-
attn_enforce=1.3,
|
419 |
-
ctrl_scale=0.95,
|
420 |
-
width=768,
|
421 |
-
height=768,
|
422 |
-
pixel_offset=8,
|
423 |
-
num_steps=28,
|
424 |
-
guidance=3.5,
|
425 |
-
real_guidance=4.5,
|
426 |
-
seed=42,
|
427 |
-
randomize_seed=False,
|
428 |
-
)
|
429 |
-
gr.Examples(
|
430 |
-
examples=[
|
431 |
-
["./assets/cat_squished.png", "a cat toy", "a cat toy riding a skate"],
|
432 |
-
["./assets/hf.png", "hugging face logo", "a hugging face logo on a hat"],
|
433 |
-
["./assets/bear_plushie.jpg", "a bear plushie", "a bear plushie drinking bubble tea"]
|
434 |
-
],
|
435 |
-
inputs=[input_image, subject_name, target_prompt],
|
436 |
-
outputs=[output_image, processed_ref_image, full_diptych_image, final_prompt_used, seed],
|
437 |
-
fn=run_subject_driven_example,
|
438 |
-
cache_examples="lazy"
|
439 |
-
)
|
440 |
-
|
441 |
-
if __name__ == "__main__":
|
442 |
-
demo.launch(share=True, debug=True)
|
|
|
227 |
randomize_seed: bool,
|
228 |
progress=gr.Progress(track_tqdm=True)
|
229 |
):
|
230 |
+
"""
|
231 |
+
Calculates the estimated GPU duration based on image dimensions.
|
232 |
+
|
233 |
+
Args:
|
234 |
+
input_image: The input reference image
|
235 |
+
subject_name: Name of the subject to segment
|
236 |
+
do_segmentation: Whether to perform segmentation
|
237 |
+
full_prompt: The complete prompt for generation
|
238 |
+
attn_enforce: Attention enforcement strength
|
239 |
+
ctrl_scale: ControlNet conditioning scale
|
240 |
+
width: Target image width
|
241 |
+
height: Target image height
|
242 |
+
pixel_offset: Padding in pixels
|
243 |
+
num_steps: Number of inference steps
|
244 |
+
guidance: Distilled guidance scale
|
245 |
+
real_guidance: Real guidance scale
|
246 |
+
seed: Random seed
|
247 |
+
randomize_seed: Whether to randomize the seed
|
248 |
+
progress: Progress tracker
|
249 |
+
|
250 |
+
Returns:
|
251 |
+
int: Estimated duration in seconds (210 for large images, 120 for smaller)
|
252 |
+
"""
|
253 |
if width > 768 or height > 768:
|
254 |
return 210
|
255 |
else:
|
|
|
273 |
randomize_seed: bool,
|
274 |
progress=gr.Progress(track_tqdm=True)
|
275 |
):
|
276 |
+
"""
|
277 |
+
Main function for diptych prompting image generation using FLUX.1-dev with ControlNet inpainting.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
278 |
|
279 |
+
Args:
|
280 |
+
input_image: The reference image to use as the left panel
|
281 |
+
subject_name: Name/description of the subject for segmentation
|
282 |
+
do_segmentation: Whether to segment the subject from the reference image
|
283 |
+
full_prompt: Complete prompt describing the desired diptych output
|
284 |
+
attn_enforce: Attention enforcement strength (1.0-2.0)
|
285 |
+
ctrl_scale: ControlNet conditioning scale (0.5-1.0)
|
286 |
+
width: Target width of the generated image
|
287 |
+
height: Target height of the generated image
|
288 |
+
pixel_offset: Padding pixels to add around the image
|
289 |
+
num_steps: Number of diffusion inference steps
|
290 |
+
guidance: Distilled guidance scale for generation
|
291 |
+
real_guidance: Real guidance scale for generation
|
292 |
+
seed: Random seed for reproducibility
|
293 |
+
randomize_seed: Whether to use a random seed
|
294 |
+
progress: Progress tracker for UI updates
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
295 |
|
296 |
+
Returns:
|
297 |
+
tuple: (final_image, processed_image, full_diptych_result, full_prompt, actual_seed)
|
298 |
+
- final_image: The generated right panel image
|
299 |
+
- processed_image: The processed reference image used
|
300 |
+
- full_diptych_result: The complete diptych
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|