ihomeAI_pic_merge

Running on Zero

File size: 15,757 Bytes

# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import spaces
import argparse

import os
import shutil
import cv2
import gradio as gr
import numpy as np
import torch
from facexlib.utils.face_restoration_helper import FaceRestoreHelper
import huggingface_hub
from huggingface_hub import hf_hub_download
from PIL import Image
from torchvision.transforms.functional import normalize

from dreamo.dreamo_pipeline import DreamOPipeline
from dreamo.utils import img2tensor, resize_numpy_image_area, tensor2img, resize_numpy_image_long
from tools import BEN2

parser = argparse.ArgumentParser()
parser.add_argument('--port', type=int, default=8080)
parser.add_argument('--no_turbo', action='store_true')
args = parser.parse_args()

huggingface_hub.login(os.getenv('HF_TOKEN'))

try:
    shutil.rmtree('gradio_cached_examples')
except FileNotFoundError:
    print("cache folder not exist")

class Generator:
    def __init__(self):
        device = torch.device('cuda')
        # preprocessing models
        # background remove model: BEN2
        self.bg_rm_model = BEN2.BEN_Base().to(device).eval()
        hf_hub_download(repo_id='PramaLLC/BEN2', filename='BEN2_Base.pth', local_dir='models')
        self.bg_rm_model.loadcheckpoints('models/BEN2_Base.pth')
        # face crop and align tool: facexlib
        self.face_helper = FaceRestoreHelper(
            upscale_factor=1,
            face_size=512,
            crop_ratio=(1, 1),
            det_model='retinaface_resnet50',
            save_ext='png',
            device=device,
        )

        # load dreamo
        model_root = 'black-forest-labs/FLUX.1-dev'
        dreamo_pipeline = DreamOPipeline.from_pretrained(model_root, torch_dtype=torch.bfloat16)
        dreamo_pipeline.load_dreamo_model(device, use_turbo=not args.no_turbo)
        self.dreamo_pipeline = dreamo_pipeline.to(device)

    @torch.no_grad()
    def get_align_face(self, img):
        # the face preprocessing code is same as PuLID
        self.face_helper.clean_all()
        image_bgr = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
        self.face_helper.read_image(image_bgr)
        self.face_helper.get_face_landmarks_5(only_center_face=True)
        self.face_helper.align_warp_face()
        if len(self.face_helper.cropped_faces) == 0:
            return None
        align_face = self.face_helper.cropped_faces[0]

        input = img2tensor(align_face, bgr2rgb=True).unsqueeze(0) / 255.0
        input = input.to(torch.device("cuda"))
        parsing_out = self.face_helper.face_parse(normalize(input, [0.485, 0.456, 0.406], [0.229, 0.224, 0.225]))[0]
        parsing_out = parsing_out.argmax(dim=1, keepdim=True)
        bg_label = [0, 16, 18, 7, 8, 9, 14, 15]
        bg = sum(parsing_out == i for i in bg_label).bool()
        white_image = torch.ones_like(input)
        # only keep the face features
        face_features_image = torch.where(bg, white_image, input)
        face_features_image = tensor2img(face_features_image, rgb2bgr=False)

        return face_features_image


generator = Generator()


@spaces.GPU
@torch.inference_mode()
def generate_image(
    ref_image1,
    ref_image2,
    ref_task1,
    ref_task2,
    prompt,
    seed,
    width=1024,
    height=1024,
    ref_res=512,
    num_steps=12,
    guidance=3.5,
    true_cfg=1,
    cfg_start_step=0,
    cfg_end_step=0,
    neg_prompt='',
    neg_guidance=3.5,
    first_step_guidance=0,
):
    print(prompt)
    ref_conds = []
    debug_images = []

    ref_images = [ref_image1, ref_image2]
    ref_tasks = [ref_task1, ref_task2]

    for idx, (ref_image, ref_task) in enumerate(zip(ref_images, ref_tasks)):
        if ref_image is not None:
            if ref_task == "id":
                ref_image = resize_numpy_image_long(ref_image, 1024)
                ref_image = generator.get_align_face(ref_image)
            elif ref_task != "style":
                ref_image = generator.bg_rm_model.inference(Image.fromarray(ref_image))
            if ref_task != "id":
                ref_image = resize_numpy_image_area(np.array(ref_image), ref_res * ref_res)
            debug_images.append(ref_image)
            ref_image = img2tensor(ref_image, bgr2rgb=False).unsqueeze(0) / 255.0
            ref_image = 2 * ref_image - 1.0
            ref_conds.append(
                {
                    'img': ref_image,
                    'task': ref_task,
                    'idx': idx + 1,
                }
            )

    seed = int(seed)
    if seed == -1:
        seed = torch.Generator(device="cpu").seed()

    image = generator.dreamo_pipeline(
        prompt=prompt,
        width=width,
        height=height,
        num_inference_steps=num_steps,
        guidance_scale=guidance,
        ref_conds=ref_conds,
        generator=torch.Generator(device="cpu").manual_seed(seed),
        true_cfg_scale=true_cfg,
        true_cfg_start_step=cfg_start_step,
        true_cfg_end_step=cfg_end_step,
        negative_prompt=neg_prompt,
        neg_guidance_scale=neg_guidance,
        first_step_guidance_scale=first_step_guidance if first_step_guidance > 0 else guidance,
    ).images[0]

    return image, debug_images, seed


_HEADER_ = '''
<div style="text-align: center; max-width: 650px; margin: 0 auto;">
    <h1 style="font-size: 2.5rem; font-weight: 700; margin-bottom: 1rem; display: contents;">ihome AI Design</h1>
    <p style="font-size: 1rem; margin-bottom: 1.5rem;"> ihome AI Design: 家居AI图片处理</a> | </p>
</div>

核心功能：轻松“换装”您的家居场景！您可以上传一张家居单品（如沙发、灯具）的图片，通过文字描述您想要的房间风格，模型会将其无缝融入到一个全新的家居环境照片中，或者添加到您指定的现有房间照片里。
重要提示： 请务必先尝试下方的示例，这将帮助您更好地理解我们模型在家居场景生成与修改方面的能力以及目前支持的操作类型。
为每个输入选择合适的任务类型：
若为单个家居物品（如沙发、椅子、灯具等）： 选择 “家居物品放置”(Item Placement) 模式。我们会自动识别并尝试移除该物品的原始背景。您只需在提示词 (prompt) 中描述希望它融入的房间环境和风格（例如：“将这个沙发放在一个现代简约风格的客厅窗边”），即可生成全新的家居场景图。
若为完整家居场景图（用于风格参考或局部修改）： 选择 “场景风格编辑”(Scene Style/Edit) 模式。此模式下，原图的整体环境和风格将被保留。
如需进行风格化生成 (例如，将一个空房间照片变得充满某种特定风格并添置家具)，您需要在提示词前加上：'generate a same style image.' (生成一张同样风格的图片) 来激活此任务，并描述您想添加或修改的内容。
如需在现有场景中直接添加或替换物品，请直接在提示词中描述您的具体操作（例如：“在图片中的壁炉旁添加这个上传的椅子”或“把图片中的茶几替换成一个圆形的木质茶几”）。
关键参数 - 引导强度 (Guidance Scale): 默认值为 3.5。
如果您发现家具材质显得过于光亮、不真实，或者物品与环境光照融合不自然，可以适当降低引导强度 (例如调整到 3.0)。
反之，如果家具或场景细节呈现不佳，或者物品摆放出现扭曲变形、与场景透视不符，可以尝试提高引导强度 (例如调整到 4.0)。
加速推理: 我们采用了 FLUX-turbo LoRA 技术，将采样步数从25步减少到12步（相较于FLUX-dev版本）。此外，我们还蒸馏了CFG LoRA，通过免除真正的CFG计算，实现了近两倍的步数削减，大幅提升了生成速度
'''  # noqa E501

_CITE_ = r"""
---


"""  # noqa E501


def create_demo():

    with gr.Blocks() as demo:
        gr.Markdown(_HEADER_)

        with gr.Row():
            with gr.Column():
                with gr.Row():
                    ref_image1 = gr.Image(label="ref image 1", type="numpy", height=256)
                    ref_image2 = gr.Image(label="ref image 2", type="numpy", height=256)
                with gr.Row():
                    ref_task1 = gr.Dropdown(choices=["ip", "id", "style"], value="ip", label="task for ref image 1")
                    ref_task2 = gr.Dropdown(choices=["ip", "id", "style"], value="ip", label="task for ref image 2")
                prompt = gr.Textbox(label="Prompt", value="a person playing guitar in the street")
                width = gr.Slider(768, 1024, 1024, step=16, label="Width")
                height = gr.Slider(768, 1024, 1024, step=16, label="Height")
                num_steps = gr.Slider(8, 30, 12, step=1, label="Number of steps")
                guidance = gr.Slider(1.0, 10.0, 3.5, step=0.1, label="Guidance")
                seed = gr.Textbox(label="Seed (-1 for random)", value="-1")
                with gr.Accordion("Advanced Options", open=False, visible=False):
                    ref_res = gr.Slider(512, 1024, 512, step=16, label="resolution for ref image")
                    neg_prompt = gr.Textbox(label="Neg Prompt", value="")
                    neg_guidance = gr.Slider(1.0, 10.0, 3.5, step=0.1, label="Neg Guidance")
                    true_cfg = gr.Slider(1, 5, 1, step=0.1, label="true cfg")
                    cfg_start_step = gr.Slider(0, 30, 0, step=1, label="cfg start step")
                    cfg_end_step = gr.Slider(0, 30, 0, step=1, label="cfg end step")
                    first_step_guidance = gr.Slider(0, 10, 0, step=0.1, label="first step guidance")
                generate_btn = gr.Button("Generate")
                gr.Markdown(_CITE_)

            with gr.Column():
                output_image = gr.Image(label="Generated Image", format='png')
                debug_image = gr.Gallery(
                    label="Preprocessing output (including possible face crop and background remove)",
                    elem_id="gallery",
                )
                seed_output = gr.Textbox(label="Used Seed")

        with gr.Row(), gr.Column():
            gr.Markdown("## Examples")
            example_inps = [

                [
                    'example_inputs/room.jpg',
                    'example_inputs/bed.jpg',
                    'ip',
                    'ip',
                    '''
**Primary Goal:** Integrate the modern brown platform bed from [IMG_BED_REF] into the empty room scene depicted in [IMG_ROOM_REF]. The final composition should be photorealistic, with the bed appearing as a natural and central piece of furniture within the room.

**Scene Context ([IMG_ROOM_REF] - Image 1):**
The room in [IMG_ROOM_REF] is an empty space characterized by:
*   **Walls:** Light blue painted walls.
*   **Flooring:** Beige carpet, with visible sunlight patterns indicating strong light from the left.
*   **Lighting:** Bright natural daylight streaming in from large sliding glass doors on the left, casting defined shadows across the floor.
*   **Existing Elements:** Baseboard heaters along the walls, a doorway on the right-side wall.
*   **Overall Ambiance:** Bright, airy, and unfurnished, with a clear light source direction.

**Subject to Integrate ([IMG_BED_REF] - Image 2):**
The bed in [IMG_BED_REF] is a:
*   **Type:** Modern platform bed.
*   **Style:** Minimalist with clean lines and a low, wide headboard.
*   **Material & Color:** Brown frame (appears to be leather or faux leather). Bedding includes white sheets, beige and brown blankets/throws, and a textured grey round accent pillow.
*   **Key Visual Features:** Low profile, integrated headboard sections.

**Integration Directives:**
1.  **Placement:** Position the modern brown platform bed from [IMG_BED_REF] on the beige carpet in [IMG_ROOM_REF]. The headboard should be placed against the far wall (the wall directly opposite the large sliding glass doors). Center the bed along this wall, leaving some space on either side if appropriate for a bedroom layout.
2.  **Lighting and Shadows:**
    *   The bed, especially the side facing the sliding glass doors (left side of the bed from a viewer's perspective facing the headboard), should be strongly illuminated by the natural daylight, consistent with the existing light patterns in [IMG_ROOM_REF].
    *   Render realistic, defined shadows cast by the bed onto the beige carpet. These shadows should extend towards the right and slightly towards the viewer, aligning with the direction of the existing sunlight and shadows in [IMG_ROOM_REF]. Pay attention to how light interacts with the different textures of the bed frame and bedding.
3.  **Scale and Perspective:**
    *   Ensure the bed is scaled appropriately for a master or main bedroom, fitting naturally within the visible dimensions of the room in [IMG_ROOM_REF].
    *   The perspective of the inserted bed must align accurately with the camera angle and perspective of the room in [IMG_ROOM_REF].
4.  **Color and Material Harmony:**
    *   The brown color of the bed frame and the colors of the bedding should appear natural and correctly lit under the bright daylight conditions of [IMG_ROOM_REF]. The highlights and shaded areas on the bed should reflect the strong directional lighting.
5.  **Occlusion:** The bed should correctly occlude the portion of the far wall and the baseboard heater behind it.
6.  **Details Preservation and Adaptation:** Preserve the defining characteristics of the bed and its bedding from [IMG_BED_REF], ensuring it convincingly integrates into the new empty room environment.

**Output Requirements:**
*   High-fidelity, photorealistic image.
*   Seamless and natural integration of the bed.
*   Maintain the overall bright, sunlit style and mood of the room in [IMG_ROOM_REF] after the insertion.

**Reference Image Mapping (for system processing):**
*   [IMG_ROOM_REF]: Corresponds to the input image of the empty room (image 1).
*   [IMG_BED_REF]: Corresponds to the input image of the bed (image 2)
''',
                    8303780338601106219,
                ],
            ]
            gr.Examples(
                examples=example_inps,
                inputs=[ref_image1, ref_image2, ref_task1, ref_task2, prompt, seed],
                label='row 1-4: IP task; row 5: ID task; row 6-7: Style task. row 8-9: Try-On task; row 10-12: Multi IP',
                cache_examples='lazy',
                outputs=[output_image, debug_image, seed_output],
                fn=generate_image,
            )

        generate_btn.click(
            fn=generate_image,
            inputs=[
                ref_image1,
                ref_image2,
                ref_task1,
                ref_task2,
                prompt,
                seed,
                width,
                height,
                ref_res,
                num_steps,
                guidance,
                true_cfg,
                cfg_start_step,
                cfg_end_step,
                neg_prompt,
                neg_guidance,
                first_step_guidance,
            ],
            outputs=[output_image, debug_image, seed_output],
        )

    return demo


if __name__ == '__main__':
    demo = create_demo()
    demo.launch()