# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import spaces import argparse import os import shutil import cv2 import gradio as gr import numpy as np import torch from facexlib.utils.face_restoration_helper import FaceRestoreHelper import huggingface_hub from huggingface_hub import hf_hub_download from PIL import Image from torchvision.transforms.functional import normalize from dreamo.dreamo_pipeline import DreamOPipeline from dreamo.utils import img2tensor, resize_numpy_image_area, tensor2img, resize_numpy_image_long from tools import BEN2 parser = argparse.ArgumentParser() parser.add_argument('--port', type=int, default=8080) parser.add_argument('--no_turbo', action='store_true') args = parser.parse_args() huggingface_hub.login(os.getenv('HF_TOKEN')) try: shutil.rmtree('gradio_cached_examples') except FileNotFoundError: print("cache folder not exist") class Generator: def __init__(self): device = torch.device('cuda') # preprocessing models # background remove model: BEN2 self.bg_rm_model = BEN2.BEN_Base().to(device).eval() hf_hub_download(repo_id='PramaLLC/BEN2', filename='BEN2_Base.pth', local_dir='models') self.bg_rm_model.loadcheckpoints('models/BEN2_Base.pth') # face crop and align tool: facexlib self.face_helper = FaceRestoreHelper( upscale_factor=1, face_size=512, crop_ratio=(1, 1), det_model='retinaface_resnet50', save_ext='png', device=device, ) # load dreamo model_root = 'black-forest-labs/FLUX.1-dev' dreamo_pipeline = DreamOPipeline.from_pretrained(model_root, torch_dtype=torch.bfloat16) dreamo_pipeline.load_dreamo_model(device, use_turbo=not args.no_turbo) self.dreamo_pipeline = dreamo_pipeline.to(device) @torch.no_grad() def get_align_face(self, img): # the face preprocessing code is same as PuLID self.face_helper.clean_all() image_bgr = cv2.cvtColor(img, cv2.COLOR_RGB2BGR) self.face_helper.read_image(image_bgr) self.face_helper.get_face_landmarks_5(only_center_face=True) self.face_helper.align_warp_face() if len(self.face_helper.cropped_faces) == 0: return None align_face = self.face_helper.cropped_faces[0] input = img2tensor(align_face, bgr2rgb=True).unsqueeze(0) / 255.0 input = input.to(torch.device("cuda")) parsing_out = self.face_helper.face_parse(normalize(input, [0.485, 0.456, 0.406], [0.229, 0.224, 0.225]))[0] parsing_out = parsing_out.argmax(dim=1, keepdim=True) bg_label = [0, 16, 18, 7, 8, 9, 14, 15] bg = sum(parsing_out == i for i in bg_label).bool() white_image = torch.ones_like(input) # only keep the face features face_features_image = torch.where(bg, white_image, input) face_features_image = tensor2img(face_features_image, rgb2bgr=False) return face_features_image generator = Generator() @spaces.GPU @torch.inference_mode() def generate_image( ref_image1, ref_image2, ref_task1, ref_task2, prompt, seed, width=1024, height=1024, ref_res=512, num_steps=12, guidance=3.5, true_cfg=1, cfg_start_step=0, cfg_end_step=0, neg_prompt='', neg_guidance=3.5, first_step_guidance=0, ): print(prompt) ref_conds = [] debug_images = [] ref_images = [ref_image1, ref_image2] ref_tasks = [ref_task1, ref_task2] for idx, (ref_image, ref_task) in enumerate(zip(ref_images, ref_tasks)): if ref_image is not None: if ref_task == "id": ref_image = resize_numpy_image_long(ref_image, 1024) ref_image = generator.get_align_face(ref_image) elif ref_task != "style": ref_image = generator.bg_rm_model.inference(Image.fromarray(ref_image)) if ref_task != "id": ref_image = resize_numpy_image_area(np.array(ref_image), ref_res * ref_res) debug_images.append(ref_image) ref_image = img2tensor(ref_image, bgr2rgb=False).unsqueeze(0) / 255.0 ref_image = 2 * ref_image - 1.0 ref_conds.append( { 'img': ref_image, 'task': ref_task, 'idx': idx + 1, } ) seed = int(seed) if seed == -1: seed = torch.Generator(device="cpu").seed() image = generator.dreamo_pipeline( prompt=prompt, width=width, height=height, num_inference_steps=num_steps, guidance_scale=guidance, ref_conds=ref_conds, generator=torch.Generator(device="cpu").manual_seed(seed), true_cfg_scale=true_cfg, true_cfg_start_step=cfg_start_step, true_cfg_end_step=cfg_end_step, negative_prompt=neg_prompt, neg_guidance_scale=neg_guidance, first_step_guidance_scale=first_step_guidance if first_step_guidance > 0 else guidance, ).images[0] return image, debug_images, seed _HEADER_ = '''