File size: 15,757 Bytes
92c3d7f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6391fa9
92c3d7f
 
 
 
 
 
 
 
 
 
 
ecd462b
92c3d7f
 
 
 
ecd462b
92c3d7f
 
 
 
ecd462b
 
 
 
92c3d7f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ecd462b
92c3d7f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
84ade0a
33976bc
 
 
 
 
 
 
 
 
 
 
92c3d7f
 
 
 
 
 
 
 
 
 
 
ecd462b
92c3d7f
 
 
ecd462b
 
92c3d7f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cc04eca
 
92c3d7f
 
cc04eca
 
 
 
 
 
 
 
 
 
 
92c3d7f
 
 
 
 
25dcda0
92c3d7f
 
 
 
 
 
 
 
 
 
 
6391fa9
 
92c3d7f
 
 
 
 
 
 
 
122eb28
92c3d7f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
972d7b9
92c3d7f
972d7b9
 
92c3d7f
 
972d7b9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
92c3d7f
 
 
 
 
 
6391fa9
92c3d7f
fde6683
 
92c3d7f
 
 
 
 
 
 
 
 
 
122eb28
92c3d7f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16f8b88
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import spaces
import argparse

import os
import shutil
import cv2
import gradio as gr
import numpy as np
import torch
from facexlib.utils.face_restoration_helper import FaceRestoreHelper
import huggingface_hub
from huggingface_hub import hf_hub_download
from PIL import Image
from torchvision.transforms.functional import normalize

from dreamo.dreamo_pipeline import DreamOPipeline
from dreamo.utils import img2tensor, resize_numpy_image_area, tensor2img, resize_numpy_image_long
from tools import BEN2

parser = argparse.ArgumentParser()
parser.add_argument('--port', type=int, default=8080)
parser.add_argument('--no_turbo', action='store_true')
args = parser.parse_args()

huggingface_hub.login(os.getenv('HF_TOKEN'))

try:
    shutil.rmtree('gradio_cached_examples')
except FileNotFoundError:
    print("cache folder not exist")

class Generator:
    def __init__(self):
        device = torch.device('cuda')
        # preprocessing models
        # background remove model: BEN2
        self.bg_rm_model = BEN2.BEN_Base().to(device).eval()
        hf_hub_download(repo_id='PramaLLC/BEN2', filename='BEN2_Base.pth', local_dir='models')
        self.bg_rm_model.loadcheckpoints('models/BEN2_Base.pth')
        # face crop and align tool: facexlib
        self.face_helper = FaceRestoreHelper(
            upscale_factor=1,
            face_size=512,
            crop_ratio=(1, 1),
            det_model='retinaface_resnet50',
            save_ext='png',
            device=device,
        )

        # load dreamo
        model_root = 'black-forest-labs/FLUX.1-dev'
        dreamo_pipeline = DreamOPipeline.from_pretrained(model_root, torch_dtype=torch.bfloat16)
        dreamo_pipeline.load_dreamo_model(device, use_turbo=not args.no_turbo)
        self.dreamo_pipeline = dreamo_pipeline.to(device)

    @torch.no_grad()
    def get_align_face(self, img):
        # the face preprocessing code is same as PuLID
        self.face_helper.clean_all()
        image_bgr = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
        self.face_helper.read_image(image_bgr)
        self.face_helper.get_face_landmarks_5(only_center_face=True)
        self.face_helper.align_warp_face()
        if len(self.face_helper.cropped_faces) == 0:
            return None
        align_face = self.face_helper.cropped_faces[0]

        input = img2tensor(align_face, bgr2rgb=True).unsqueeze(0) / 255.0
        input = input.to(torch.device("cuda"))
        parsing_out = self.face_helper.face_parse(normalize(input, [0.485, 0.456, 0.406], [0.229, 0.224, 0.225]))[0]
        parsing_out = parsing_out.argmax(dim=1, keepdim=True)
        bg_label = [0, 16, 18, 7, 8, 9, 14, 15]
        bg = sum(parsing_out == i for i in bg_label).bool()
        white_image = torch.ones_like(input)
        # only keep the face features
        face_features_image = torch.where(bg, white_image, input)
        face_features_image = tensor2img(face_features_image, rgb2bgr=False)

        return face_features_image


generator = Generator()


@spaces.GPU
@torch.inference_mode()
def generate_image(
    ref_image1,
    ref_image2,
    ref_task1,
    ref_task2,
    prompt,
    seed,
    width=1024,
    height=1024,
    ref_res=512,
    num_steps=12,
    guidance=3.5,
    true_cfg=1,
    cfg_start_step=0,
    cfg_end_step=0,
    neg_prompt='',
    neg_guidance=3.5,
    first_step_guidance=0,
):
    print(prompt)
    ref_conds = []
    debug_images = []

    ref_images = [ref_image1, ref_image2]
    ref_tasks = [ref_task1, ref_task2]

    for idx, (ref_image, ref_task) in enumerate(zip(ref_images, ref_tasks)):
        if ref_image is not None:
            if ref_task == "id":
                ref_image = resize_numpy_image_long(ref_image, 1024)
                ref_image = generator.get_align_face(ref_image)
            elif ref_task != "style":
                ref_image = generator.bg_rm_model.inference(Image.fromarray(ref_image))
            if ref_task != "id":
                ref_image = resize_numpy_image_area(np.array(ref_image), ref_res * ref_res)
            debug_images.append(ref_image)
            ref_image = img2tensor(ref_image, bgr2rgb=False).unsqueeze(0) / 255.0
            ref_image = 2 * ref_image - 1.0
            ref_conds.append(
                {
                    'img': ref_image,
                    'task': ref_task,
                    'idx': idx + 1,
                }
            )

    seed = int(seed)
    if seed == -1:
        seed = torch.Generator(device="cpu").seed()

    image = generator.dreamo_pipeline(
        prompt=prompt,
        width=width,
        height=height,
        num_inference_steps=num_steps,
        guidance_scale=guidance,
        ref_conds=ref_conds,
        generator=torch.Generator(device="cpu").manual_seed(seed),
        true_cfg_scale=true_cfg,
        true_cfg_start_step=cfg_start_step,
        true_cfg_end_step=cfg_end_step,
        negative_prompt=neg_prompt,
        neg_guidance_scale=neg_guidance,
        first_step_guidance_scale=first_step_guidance if first_step_guidance > 0 else guidance,
    ).images[0]

    return image, debug_images, seed


_HEADER_ = '''
<div style="text-align: center; max-width: 650px; margin: 0 auto;">
    <h1 style="font-size: 2.5rem; font-weight: 700; margin-bottom: 1rem; display: contents;">ihome AI Design</h1>
    <p style="font-size: 1rem; margin-bottom: 1.5rem;"> ihome AI Design: 家居AI图片处理</a> | </p>
</div>

核心功能:轻松“换装”您的家居场景!您可以上传一张家居单品(如沙发、灯具)的图片,通过文字描述您想要的房间风格,模型会将其无缝融入到一个全新的家居环境照片中,或者添加到您指定的现有房间照片里。
重要提示: 请务必先尝试下方的示例,这将帮助您更好地理解我们模型在家居场景生成与修改方面的能力以及目前支持的操作类型。
为每个输入选择合适的任务类型:
若为单个家居物品(如沙发、椅子、灯具等): 选择 “家居物品放置”(Item Placement) 模式。我们会自动识别并尝试移除该物品的原始背景。您只需在提示词 (prompt) 中描述希望它融入的房间环境和风格(例如:“将这个沙发放在一个现代简约风格的客厅窗边”),即可生成全新的家居场景图。
若为完整家居场景图(用于风格参考或局部修改): 选择 “场景风格编辑”(Scene Style/Edit) 模式。此模式下,原图的整体环境和风格将被保留。
如需进行风格化生成 (例如,将一个空房间照片变得充满某种特定风格并添置家具),您需要在提示词前加上:'generate a same style image.' (生成一张同样风格的图片) 来激活此任务,并描述您想添加或修改的内容。
如需在现有场景中直接添加或替换物品,请直接在提示词中描述您的具体操作(例如:“在图片中的壁炉旁添加这个上传的椅子”或“把图片中的茶几替换成一个圆形的木质茶几”)。
关键参数 - 引导强度 (Guidance Scale): 默认值为 3.5。
如果您发现家具材质显得过于光亮、不真实,或者物品与环境光照融合不自然,可以适当降低引导强度 (例如调整到 3.0)。
反之,如果家具或场景细节呈现不佳,或者物品摆放出现扭曲变形、与场景透视不符,可以尝试提高引导强度 (例如调整到 4.0)。
加速推理: 我们采用了 FLUX-turbo LoRA 技术,将采样步数从25步减少到12步(相较于FLUX-dev版本)。此外,我们还蒸馏了CFG LoRA,通过免除真正的CFG计算,实现了近两倍的步数削减,大幅提升了生成速度
'''  # noqa E501

_CITE_ = r"""
---


"""  # noqa E501


def create_demo():

    with gr.Blocks() as demo:
        gr.Markdown(_HEADER_)

        with gr.Row():
            with gr.Column():
                with gr.Row():
                    ref_image1 = gr.Image(label="ref image 1", type="numpy", height=256)
                    ref_image2 = gr.Image(label="ref image 2", type="numpy", height=256)
                with gr.Row():
                    ref_task1 = gr.Dropdown(choices=["ip", "id", "style"], value="ip", label="task for ref image 1")
                    ref_task2 = gr.Dropdown(choices=["ip", "id", "style"], value="ip", label="task for ref image 2")
                prompt = gr.Textbox(label="Prompt", value="a person playing guitar in the street")
                width = gr.Slider(768, 1024, 1024, step=16, label="Width")
                height = gr.Slider(768, 1024, 1024, step=16, label="Height")
                num_steps = gr.Slider(8, 30, 12, step=1, label="Number of steps")
                guidance = gr.Slider(1.0, 10.0, 3.5, step=0.1, label="Guidance")
                seed = gr.Textbox(label="Seed (-1 for random)", value="-1")
                with gr.Accordion("Advanced Options", open=False, visible=False):
                    ref_res = gr.Slider(512, 1024, 512, step=16, label="resolution for ref image")
                    neg_prompt = gr.Textbox(label="Neg Prompt", value="")
                    neg_guidance = gr.Slider(1.0, 10.0, 3.5, step=0.1, label="Neg Guidance")
                    true_cfg = gr.Slider(1, 5, 1, step=0.1, label="true cfg")
                    cfg_start_step = gr.Slider(0, 30, 0, step=1, label="cfg start step")
                    cfg_end_step = gr.Slider(0, 30, 0, step=1, label="cfg end step")
                    first_step_guidance = gr.Slider(0, 10, 0, step=0.1, label="first step guidance")
                generate_btn = gr.Button("Generate")
                gr.Markdown(_CITE_)

            with gr.Column():
                output_image = gr.Image(label="Generated Image", format='png')
                debug_image = gr.Gallery(
                    label="Preprocessing output (including possible face crop and background remove)",
                    elem_id="gallery",
                )
                seed_output = gr.Textbox(label="Used Seed")

        with gr.Row(), gr.Column():
            gr.Markdown("## Examples")
            example_inps = [

                [
                    'example_inputs/room.jpg',
                    'example_inputs/bed.jpg',
                    'ip',
                    'ip',
                    '''
**Primary Goal:** Integrate the modern brown platform bed from [IMG_BED_REF] into the empty room scene depicted in [IMG_ROOM_REF]. The final composition should be photorealistic, with the bed appearing as a natural and central piece of furniture within the room.

**Scene Context ([IMG_ROOM_REF] - Image 1):**
The room in [IMG_ROOM_REF] is an empty space characterized by:
*   **Walls:** Light blue painted walls.
*   **Flooring:** Beige carpet, with visible sunlight patterns indicating strong light from the left.
*   **Lighting:** Bright natural daylight streaming in from large sliding glass doors on the left, casting defined shadows across the floor.
*   **Existing Elements:** Baseboard heaters along the walls, a doorway on the right-side wall.
*   **Overall Ambiance:** Bright, airy, and unfurnished, with a clear light source direction.

**Subject to Integrate ([IMG_BED_REF] - Image 2):**
The bed in [IMG_BED_REF] is a:
*   **Type:** Modern platform bed.
*   **Style:** Minimalist with clean lines and a low, wide headboard.
*   **Material & Color:** Brown frame (appears to be leather or faux leather). Bedding includes white sheets, beige and brown blankets/throws, and a textured grey round accent pillow.
*   **Key Visual Features:** Low profile, integrated headboard sections.

**Integration Directives:**
1.  **Placement:** Position the modern brown platform bed from [IMG_BED_REF] on the beige carpet in [IMG_ROOM_REF]. The headboard should be placed against the far wall (the wall directly opposite the large sliding glass doors). Center the bed along this wall, leaving some space on either side if appropriate for a bedroom layout.
2.  **Lighting and Shadows:**
    *   The bed, especially the side facing the sliding glass doors (left side of the bed from a viewer's perspective facing the headboard), should be strongly illuminated by the natural daylight, consistent with the existing light patterns in [IMG_ROOM_REF].
    *   Render realistic, defined shadows cast by the bed onto the beige carpet. These shadows should extend towards the right and slightly towards the viewer, aligning with the direction of the existing sunlight and shadows in [IMG_ROOM_REF]. Pay attention to how light interacts with the different textures of the bed frame and bedding.
3.  **Scale and Perspective:**
    *   Ensure the bed is scaled appropriately for a master or main bedroom, fitting naturally within the visible dimensions of the room in [IMG_ROOM_REF].
    *   The perspective of the inserted bed must align accurately with the camera angle and perspective of the room in [IMG_ROOM_REF].
4.  **Color and Material Harmony:**
    *   The brown color of the bed frame and the colors of the bedding should appear natural and correctly lit under the bright daylight conditions of [IMG_ROOM_REF]. The highlights and shaded areas on the bed should reflect the strong directional lighting.
5.  **Occlusion:** The bed should correctly occlude the portion of the far wall and the baseboard heater behind it.
6.  **Details Preservation and Adaptation:** Preserve the defining characteristics of the bed and its bedding from [IMG_BED_REF], ensuring it convincingly integrates into the new empty room environment.

**Output Requirements:**
*   High-fidelity, photorealistic image.
*   Seamless and natural integration of the bed.
*   Maintain the overall bright, sunlit style and mood of the room in [IMG_ROOM_REF] after the insertion.

**Reference Image Mapping (for system processing):**
*   [IMG_ROOM_REF]: Corresponds to the input image of the empty room (image 1).
*   [IMG_BED_REF]: Corresponds to the input image of the bed (image 2)
''',
                    8303780338601106219,
                ],
            ]
            gr.Examples(
                examples=example_inps,
                inputs=[ref_image1, ref_image2, ref_task1, ref_task2, prompt, seed],
                label='row 1-4: IP task; row 5: ID task; row 6-7: Style task. row 8-9: Try-On task; row 10-12: Multi IP',
                cache_examples='lazy',
                outputs=[output_image, debug_image, seed_output],
                fn=generate_image,
            )

        generate_btn.click(
            fn=generate_image,
            inputs=[
                ref_image1,
                ref_image2,
                ref_task1,
                ref_task2,
                prompt,
                seed,
                width,
                height,
                ref_res,
                num_steps,
                guidance,
                true_cfg,
                cfg_start_step,
                cfg_end_step,
                neg_prompt,
                neg_guidance,
                first_step_guidance,
            ],
            outputs=[output_image, debug_image, seed_output],
        )

    return demo


if __name__ == '__main__':
    demo = create_demo()
    demo.launch()