multimodalart HF Staff commited on
Commit
4c7c87d
·
verified ·
1 Parent(s): 9acd353

feat: Enable MCP

Browse files

Hello! This is an automated PR adding MCP compatibility to your AI App 🤖.

![image.png](https://cdn-uploads.huggingface.co/production/uploads/624bebf604abc7ebb01789af/HQQK38I_MDXLDMYDYBq8H.png)This PR introduces two improvements:
1. Adds docstrings to the functions in the app file that are directly connected to the Gradio UI, for the downstream LLM to use.
2. Enables the Model-Compute-Platform by adding `mcp_server=True` to the `.launch()` call.

No other logic has been changed. Please review and merge if it looks good!Learn more about MCP compatibility in Spaces here: https://huggingface.co/changelog/add-compatible-spaces-to-your-mcp-tools

Files changed (1) hide show
  1. app.py +46 -188
app.py CHANGED
@@ -227,6 +227,29 @@ def get_duration(
227
  randomize_seed: bool,
228
  progress=gr.Progress(track_tqdm=True)
229
  ):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
230
  if width > 768 or height > 768:
231
  return 210
232
  else:
@@ -250,193 +273,28 @@ def run_diptych_prompting(
250
  randomize_seed: bool,
251
  progress=gr.Progress(track_tqdm=True)
252
  ):
253
- if randomize_seed:
254
- actual_seed = random.randint(0, 9223372036854775807)
255
- else:
256
- actual_seed = seed
257
-
258
- if input_image is None: raise gr.Error("Please upload a reference image.")
259
- if not full_prompt: raise gr.Error("Full Prompt is empty. Please fill out the prompt fields.")
260
-
261
- # 1. Prepare dimensions and reference image
262
- padded_width = width + pixel_offset * 2
263
- padded_height = height + pixel_offset * 2
264
- diptych_size = (padded_width * 2, padded_height)
265
- reference_image = input_image.resize((padded_width, padded_height)).convert("RGB")
266
 
267
- # 2. Process reference image based on segmentation flag
268
- progress(0, desc="Preparing reference image...")
269
- if do_segmentation:
270
- if not subject_name:
271
- raise gr.Error("Subject Name is required when 'Do Segmentation' is checked.")
272
- progress(0.05, desc="Segmenting reference image...")
273
- processed_image = segment_image(reference_image, subject_name, object_detector, segmentator, segment_processor)
274
- else:
275
- processed_image = reference_image
276
-
277
- # 3. Create diptych and mask
278
- progress(0.2, desc="Creating diptych and mask...")
279
- mask_image = np.concatenate([np.zeros((padded_height, padded_width, 3)), np.ones((padded_height, padded_width, 3)) * 255], axis=1)
280
- mask_image = Image.fromarray(mask_image.astype(np.uint8))
281
- diptych_image_prompt = make_diptych(processed_image)
282
-
283
- # 4. Setup Attention Processor
284
- progress(0.3, desc="Setting up attention processors...")
285
- new_attn_procs = base_attn_procs.copy()
286
- for k in new_attn_procs:
287
- new_attn_procs[k] = CustomFluxAttnProcessor2_0(height=padded_height // 16, width=padded_width * 2 // 16, attn_enforce=attn_enforce)
288
- pipe.transformer.set_attn_processor(new_attn_procs)
289
-
290
- # 5. Run Inference
291
- progress(0.4, desc="Running diffusion process...")
292
- generator = torch.Generator(device="cuda").manual_seed(actual_seed)
293
- full_diptych_result = pipe(
294
- prompt=full_prompt,
295
- height=diptych_size[1],
296
- width=diptych_size[0],
297
- control_image=diptych_image_prompt,
298
- control_mask=mask_image,
299
- num_inference_steps=num_steps,
300
- generator=generator,
301
- controlnet_conditioning_scale=ctrl_scale,
302
- guidance_scale=guidance,
303
- negative_prompt="",
304
- true_guidance_scale=real_guidance
305
- ).images[0]
306
-
307
- # 6. Final cropping
308
- progress(0.95, desc="Finalizing image...")
309
- final_image = full_diptych_result.crop((padded_width, 0, padded_width * 2, padded_height))
310
- final_image = final_image.crop((pixel_offset, pixel_offset, padded_width - pixel_offset, padded_height - pixel_offset))
311
-
312
- # 7. Return all outputs
313
- return final_image, processed_image, full_diptych_result, full_prompt, actual_seed
314
-
315
-
316
- # --- Gradio UI Definition ---
317
- css = '''
318
- .gradio-container{max-width: 960px;margin: 0 auto}
319
- '''
320
- with gr.Blocks(theme=gr.themes.Soft(), css=css) as demo:
321
- gr.Markdown(
322
- """
323
- # Diptych Prompting: Zero-Shot Subject-Driven & Style-Driven Image Generation
324
- ### Demo for the paper "[Large-Scale Text-to-Image Model with Inpainting is a Zero-Shot Subject-Driven Image Generator](https://diptychprompting.github.io/)"
325
- """
326
- )
327
- with gr.Row():
328
- with gr.Column(scale=1):
329
- input_image = gr.Image(type="pil", label="Reference Image")
330
-
331
- with gr.Group() as subject_driven_group:
332
- subject_name = gr.Textbox(label="Subject Name", placeholder="e.g., a plush bear")
333
-
334
- target_prompt = gr.Textbox(label="Target Prompt", placeholder="e.g., riding a skateboard on the moon")
335
-
336
- run_button = gr.Button("Generate Image", variant="primary")
337
-
338
- with gr.Accordion("Advanced Settings", open=False):
339
- mode = gr.Radio(["Subject-Driven", "Style-Driven (unstable)"], label="Generation Mode", value="Subject-Driven")
340
- with gr.Group(visible=False) as style_driven_group:
341
- original_style_description = gr.Textbox(label="Original Image Description", placeholder="e.g., in watercolor painting style")
342
- do_segmentation = gr.Checkbox(label="Do Segmentation", value=True)
343
- attn_enforce = gr.Slider(minimum=1.0, maximum=2.0, value=1.3, step=0.05, label="Attention Enforcement")
344
- full_prompt = gr.Textbox(label="Full Prompt (Auto-generated, editable)", lines=3)
345
- ctrl_scale = gr.Slider(minimum=0.5, maximum=1.0, value=0.95, step=0.01, label="ControlNet Scale")
346
- num_steps = gr.Slider(minimum=20, maximum=50, value=28, step=1, label="Inference Steps")
347
- guidance = gr.Slider(minimum=1.0, maximum=10.0, value=3.5, step=0.1, label="Distilled Guidance Scale")
348
- real_guidance = gr.Slider(minimum=1.0, maximum=10.0, value=4.5, step=0.1, label="Real Guidance Scale")
349
- width = gr.Slider(minimum=512, maximum=1024, value=768, step=64, label="Image Width")
350
- height = gr.Slider(minimum=512, maximum=1024, value=768, step=64, label="Image Height")
351
- pixel_offset = gr.Slider(minimum=0, maximum=32, value=8, step=1, label="Padding (Pixel Offset)")
352
- seed = gr.Slider(minimum=0, maximum=9223372036854775807, value=42, step=1, label="Seed")
353
- randomize_seed = gr.Checkbox(label="Randomize Seed", value=True)
354
-
355
- with gr.Column(scale=1):
356
- output_image = gr.Image(type="pil", label="Generated Image")
357
- with gr.Accordion("Other Outputs", open=False) as other_outputs_accordion:
358
- processed_ref_image = gr.Image(label="Processed Reference (Left Panel)")
359
- full_diptych_image = gr.Image(label="Full Diptych Output")
360
- final_prompt_used = gr.Textbox(label="Final Prompt Used")
361
-
362
- # --- UI Event Handlers ---
363
-
364
- def toggle_mode_visibility(mode_choice):
365
- """Hides/shows the relevant input textboxes based on mode."""
366
- if mode_choice == "Subject-Driven":
367
- return gr.update(visible=True), gr.update(visible=False)
368
- else:
369
- return gr.update(visible=False), gr.update(visible=True)
370
-
371
- def update_derived_fields(mode_choice, subject, style_desc, target):
372
- """Updates the full prompt and segmentation checkbox based on other inputs."""
373
- if mode_choice == "Subject-Driven":
374
- prompt = f"A diptych with two side-by-side images of same {subject}. On the left, a photo of {subject}. On the right, replicate this {subject} exactly but as {target}"
375
- return gr.update(value=prompt), gr.update(value=True)
376
- else: # Style-Driven
377
- prompt = f"A diptych with two side-by-side images of same style. On the left, {style_desc}. On the right, replicate this style exactly but as {target}"
378
- return gr.update(value=prompt), gr.update(value=False)
379
-
380
- # --- UI Connections ---
381
-
382
- # When mode changes, toggle visibility of the specific prompt fields
383
- mode.change(
384
- fn=toggle_mode_visibility,
385
- inputs=mode,
386
- outputs=[subject_driven_group, style_driven_group],
387
- queue=False
388
- )
389
-
390
- # A list of all inputs that affect the full prompt or segmentation checkbox
391
- prompt_component_inputs = [mode, subject_name, original_style_description, target_prompt]
392
- # A list of the UI elements that are derived from the above inputs
393
- derived_outputs = [full_prompt, do_segmentation]
394
-
395
- # When any prompt component changes, update the derived fields
396
- for component in prompt_component_inputs:
397
- component.change(update_derived_fields, inputs=prompt_component_inputs, outputs=derived_outputs, queue=False, show_progress="hidden")
398
-
399
- run_button.click(
400
- fn=run_diptych_prompting,
401
- inputs=[
402
- input_image, subject_name, do_segmentation, full_prompt, attn_enforce,
403
- ctrl_scale, width, height, pixel_offset, num_steps, guidance,
404
- real_guidance, seed, randomize_seed
405
- ],
406
- outputs=[output_image, processed_ref_image, full_diptych_image, final_prompt_used, seed]
407
- )
408
- def run_subject_driven_example(input_image, subject_name, target_prompt):
409
- # Construct the full prompt for subject-driven mode
410
- full_prompt = f"A diptych with two side-by-side images of same {subject_name}. On the left, a photo of {subject_name}. On the right, replicate this {subject_name} exactly but as {target_prompt}"
411
 
412
- # Call the main function with all arguments, using defaults for subject-driven mode
413
- return run_diptych_prompting(
414
- input_image=input_image,
415
- subject_name=subject_name,
416
- do_segmentation=True,
417
- full_prompt=full_prompt,
418
- attn_enforce=1.3,
419
- ctrl_scale=0.95,
420
- width=768,
421
- height=768,
422
- pixel_offset=8,
423
- num_steps=28,
424
- guidance=3.5,
425
- real_guidance=4.5,
426
- seed=42,
427
- randomize_seed=False,
428
- )
429
- gr.Examples(
430
- examples=[
431
- ["./assets/cat_squished.png", "a cat toy", "a cat toy riding a skate"],
432
- ["./assets/hf.png", "hugging face logo", "a hugging face logo on a hat"],
433
- ["./assets/bear_plushie.jpg", "a bear plushie", "a bear plushie drinking bubble tea"]
434
- ],
435
- inputs=[input_image, subject_name, target_prompt],
436
- outputs=[output_image, processed_ref_image, full_diptych_image, final_prompt_used, seed],
437
- fn=run_subject_driven_example,
438
- cache_examples="lazy"
439
- )
440
-
441
- if __name__ == "__main__":
442
- demo.launch(share=True, debug=True)
 
227
  randomize_seed: bool,
228
  progress=gr.Progress(track_tqdm=True)
229
  ):
230
+ """
231
+ Calculates the estimated GPU duration based on image dimensions.
232
+
233
+ Args:
234
+ input_image: The input reference image
235
+ subject_name: Name of the subject to segment
236
+ do_segmentation: Whether to perform segmentation
237
+ full_prompt: The complete prompt for generation
238
+ attn_enforce: Attention enforcement strength
239
+ ctrl_scale: ControlNet conditioning scale
240
+ width: Target image width
241
+ height: Target image height
242
+ pixel_offset: Padding in pixels
243
+ num_steps: Number of inference steps
244
+ guidance: Distilled guidance scale
245
+ real_guidance: Real guidance scale
246
+ seed: Random seed
247
+ randomize_seed: Whether to randomize the seed
248
+ progress: Progress tracker
249
+
250
+ Returns:
251
+ int: Estimated duration in seconds (210 for large images, 120 for smaller)
252
+ """
253
  if width > 768 or height > 768:
254
  return 210
255
  else:
 
273
  randomize_seed: bool,
274
  progress=gr.Progress(track_tqdm=True)
275
  ):
276
+ """
277
+ Main function for diptych prompting image generation using FLUX.1-dev with ControlNet inpainting.
 
 
 
 
 
 
 
 
 
 
 
278
 
279
+ Args:
280
+ input_image: The reference image to use as the left panel
281
+ subject_name: Name/description of the subject for segmentation
282
+ do_segmentation: Whether to segment the subject from the reference image
283
+ full_prompt: Complete prompt describing the desired diptych output
284
+ attn_enforce: Attention enforcement strength (1.0-2.0)
285
+ ctrl_scale: ControlNet conditioning scale (0.5-1.0)
286
+ width: Target width of the generated image
287
+ height: Target height of the generated image
288
+ pixel_offset: Padding pixels to add around the image
289
+ num_steps: Number of diffusion inference steps
290
+ guidance: Distilled guidance scale for generation
291
+ real_guidance: Real guidance scale for generation
292
+ seed: Random seed for reproducibility
293
+ randomize_seed: Whether to use a random seed
294
+ progress: Progress tracker for UI updates
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
295
 
296
+ Returns:
297
+ tuple: (final_image, processed_image, full_diptych_result, full_prompt, actual_seed)
298
+ - final_image: The generated right panel image
299
+ - processed_image: The processed reference image used
300
+ - full_diptych_result: The complete diptych