kofaceid

Running on Zero

App Files Files Community

aiqtech commited on 7 days ago

Commit

fe7b6d8

verified ·

1 Parent(s): fedbad2

Update app.py

Browse files

Files changed (1) hide show

app.py +67 -61

app.py CHANGED Viewed

@@ -16,69 +16,67 @@ from kolors.models.unet_2d_condition import UNet2DConditionModel
 from diffusers import EulerDiscreteScheduler
 from PIL import Image
 from insightface.app import FaceAnalysis
-from insightface.data import get_image as ins_get_image
-# Hugging Face 토큰으로 로그인
 HF_TOKEN = os.getenv("HF_TOKEN")
 if HF_TOKEN:
     login(token=HF_TOKEN)
     print("Successfully logged in to Hugging Face Hub")
-# 모델 다운로드 (CPU에서)
 print("Downloading models...")
 ckpt_dir = snapshot_download(repo_id="Kwai-Kolors/Kolors", token=HF_TOKEN)
 ckpt_dir_faceid = snapshot_download(repo_id="Kwai-Kolors/Kolors-IP-Adapter-FaceID-Plus", token=HF_TOKEN)
-# CPU에서 모델 초기화
 print("Loading models on CPU first...")
 text_encoder = ChatGLMModel.from_pretrained(
     f'{ckpt_dir}/text_encoder',
     torch_dtype=torch.float16,
-    token=HF_TOKEN,
-    trust_remote_code=True,
-    device_map=None  # CPU에서 먼저 로드
 )
 tokenizer = ChatGLMTokenizer.from_pretrained(
     f'{ckpt_dir}/text_encoder',
-    token=HF_TOKEN,
     trust_remote_code=True
 )
 vae = AutoencoderKL.from_pretrained(
     f"{ckpt_dir}/vae",
-    torch_dtype=torch.float16,
-    token=HF_TOKEN
 )
 scheduler = EulerDiscreteScheduler.from_pretrained(f"{ckpt_dir}/scheduler")
 unet = UNet2DConditionModel.from_pretrained(
     f"{ckpt_dir}/unet",
-    torch_dtype=torch.float16,
-    token=HF_TOKEN
 )
-# CLIP 모델 로딩
-try:
-    clip_image_encoder = CLIPVisionModelWithProjection.from_pretrained(
-        'openai/clip-vit-large-patch14-336',
-        torch_dtype=torch.float16,
-        ignore_mismatched_sizes=True,
-        token=HF_TOKEN,
-        use_safetensors=True
-    )
-except:
-    clip_image_encoder = CLIPVisionModelWithProjection.from_pretrained(
-        'openai/clip-vit-large-patch14-336',
-        torch_dtype=torch.float16,
-        ignore_mismatched_sizes=True,
-        token=HF_TOKEN
-    )
 clip_image_processor = CLIPImageProcessor(size=336, crop_size=336)
-# Pipeline 생성 (CPU에서)
 pipe = StableDiffusionXLPipeline(
     vae=vae,
     text_encoder=text_encoder,
@@ -90,22 +88,21 @@ pipe = StableDiffusionXLPipeline(
     force_zeros_for_empty_prompt=False,
 )
-print("Models loaded on CPU successfully!")
 class FaceInfoGenerator():
     def __init__(self, root_dir="./.insightface/"):
-        # CPU만 사용하도록 설정
         self.app = FaceAnalysis(
             name='antelopev2',
             root=root_dir,
-            providers=['CPUExecutionProvider']  # CPU만 사용
         )
         self.app.prepare(ctx_id=0, det_size=(640, 640))
     def get_faceinfo_one_img(self, face_image):
         if face_image is None:
             return None
         face_info = self.app.get(cv2.cvtColor(np.array(face_image), cv2.COLOR_RGB2BGR))
         if len(face_info) == 0:
@@ -131,8 +128,7 @@ def face_bbox_to_square(bbox):
 MAX_SEED = np.iinfo(np.int32).max
 face_info_generator = FaceInfoGenerator()
-# GPU 함수는 @spaces.GPU 데코레이터 내에서만 GPU 사용
-@spaces.GPU(duration=120)  # GPU 시간 늘림
 def infer(prompt,
           image=None,
           negative_prompt="low quality, blurry, distorted",
@@ -145,10 +141,10 @@ def infer(prompt,
         gr.Warning("Please upload an image with a face.")
         return None, 0
-    # Face detection (CPU에서)
     face_info = face_info_generator.get_faceinfo_one_img(image)
     if face_info is None:
-        raise gr.Error("No face detected in the image. Please provide an image with a clear face.")
     face_bbox_square = face_bbox_to_square(face_info["bbox"])
     crop_image = image.crop(face_bbox_square)
@@ -156,15 +152,19 @@ def infer(prompt,
     crop_image = [crop_image]
     face_embeds = torch.from_numpy(np.array([face_info["embedding"]]))
-    # GPU로 이동 (spaces.GPU 내에서만)
-    device = "cuda"
     global pipe
-    # 모델을 GPU로 이��
-    pipe = pipe.to(device)
     face_embeds = face_embeds.to(device, dtype=torch.float16)
-    # IP Adapter 로딩
     pipe.load_ip_adapter_faceid_plus(f'{ckpt_dir_faceid}/ipa-faceid-plus.bin', device=device)
     pipe.set_face_fidelity_scale(0.8)
@@ -173,9 +173,9 @@ def infer(prompt,
     generator = torch.Generator(device=device).manual_seed(seed)
-    # 이미지 생성
     with torch.no_grad():
-        with torch.autocast(device):
             result = pipe(
                 prompt=prompt,
                 negative_prompt=negative_prompt,
@@ -189,34 +189,41 @@ def infer(prompt,
                 face_insightface_embeds=face_embeds
             ).images[0]
     return result, seed
 css = """
 footer {
     visibility: hidden;
 }
-#col-left {
-    margin: 0 auto;
     max-width: 640px;
-}
-#col-right {
     margin: 0 auto;
-    max-width: 640px;
 }
 """
 with gr.Blocks(theme="soft", css=css) as Kolors:
     gr.HTML(
         """
         <div style='text-align: center;'>
             <h1>🎨 Kolors Face ID - AI Portrait Generator</h1>
-            <p>Upload a face photo and create stunning AI portraits with text prompts!</p>
             <div style='display:flex; justify-content:center; gap:12px; margin-top:20px;'>
                 <a href="https://huggingface.co/spaces/openfree/Best-AI" target="_blank">
-                    <img src="https://img.shields.io/static/v1?label=OpenFree&message=BEST%20AI%20Services&color=%230000ff&labelColor=%23000080&logo=huggingface&logoColor=%23ffa500&style=for-the-badge" alt="OpenFree badge">
                 </a>
                 <a href="https://discord.gg/openfreeai" target="_blank">
-                    <img src="https://img.shields.io/static/v1?label=Discord&message=Openfree%20AI&color=%230000ff&labelColor=%23800080&logo=discord&logoColor=white&style=for-the-badge" alt="Discord badge">
                 </a>
             </div>
         </div>
@@ -227,27 +234,26 @@ with gr.Blocks(theme="soft", css=css) as Kolors:
         with gr.Column(elem_id="col-left"):
             prompt = gr.Textbox(
                 label="Prompt",
-                placeholder="e.g., A professional portrait in business attire, studio lighting",
                 lines=3,
-                value="A professional portrait photo, high quality, detailed face"
             )
-            image = gr.Image(label="Upload Face Image", type="pil", height=400)
             with gr.Accordion("Advanced Settings", open=False):
                 negative_prompt = gr.Textbox(
                     label="Negative prompt",
-                    value="low quality, blurry, distorted, disfigured"
                 )
                 seed = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=66)
                 randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
-                with gr.Row():
-                    guidance_scale = gr.Slider(label="Guidance scale", minimum=0.0, maximum=10.0, step=0.1, value=5.0)
-                    num_inference_steps = gr.Slider(label="Inference steps", minimum=10, maximum=50, step=1, value=25)
-            button = gr.Button("🎨 Generate Portrait", variant="primary", scale=1)
         with gr.Column(elem_id="col-right"):
-            result = gr.Image(label="Generated Portrait", show_label=True)
             seed_used = gr.Number(label="Seed Used", precision=0)
     button.click(

 from diffusers import EulerDiscreteScheduler
 from PIL import Image
 from insightface.app import FaceAnalysis
+# Login with HF token
 HF_TOKEN = os.getenv("HF_TOKEN")
 if HF_TOKEN:
     login(token=HF_TOKEN)
     print("Successfully logged in to Hugging Face Hub")
+# Download models
 print("Downloading models...")
 ckpt_dir = snapshot_download(repo_id="Kwai-Kolors/Kolors", token=HF_TOKEN)
 ckpt_dir_faceid = snapshot_download(repo_id="Kwai-Kolors/Kolors-IP-Adapter-FaceID-Plus", token=HF_TOKEN)
 print("Loading models on CPU first...")
+# Fix for ChatGLMTokenizer - monkey patch the _pad method
+original_chatglm_pad = ChatGLMTokenizer._pad if hasattr(ChatGLMTokenizer, '_pad') else None
+def fixed_pad(self, *args, **kwargs):
+    # Remove the unexpected 'padding_side' argument if present
+    kwargs.pop('padding_side', None)
+    if original_chatglm_pad:
+        return original_chatglm_pad(self, *args, **kwargs)
+    else:
+        return super(ChatGLMTokenizer, self)._pad(*args, **kwargs)
+ChatGLMTokenizer._pad = fixed_pad
+# Load models
 text_encoder = ChatGLMModel.from_pretrained(
     f'{ckpt_dir}/text_encoder',
     torch_dtype=torch.float16,
+    trust_remote_code=True
 )
 tokenizer = ChatGLMTokenizer.from_pretrained(
     f'{ckpt_dir}/text_encoder',
     trust_remote_code=True
 )
 vae = AutoencoderKL.from_pretrained(
     f"{ckpt_dir}/vae",
+    torch_dtype=torch.float16
 )
 scheduler = EulerDiscreteScheduler.from_pretrained(f"{ckpt_dir}/scheduler")
 unet = UNet2DConditionModel.from_pretrained(
     f"{ckpt_dir}/unet",
+    torch_dtype=torch.float16
 )
+# Load CLIP
+clip_image_encoder = CLIPVisionModelWithProjection.from_pretrained(
+    'openai/clip-vit-large-patch14-336',
+    torch_dtype=torch.float16,
+    use_safetensors=True
+)
 clip_image_processor = CLIPImageProcessor(size=336, crop_size=336)
+# Create pipeline
 pipe = StableDiffusionXLPipeline(
     vae=vae,
     text_encoder=text_encoder,
     force_zeros_for_empty_prompt=False,
 )
+print("Models loaded successfully!")
 class FaceInfoGenerator():
     def __init__(self, root_dir="./.insightface/"):
         self.app = FaceAnalysis(
             name='antelopev2',
             root=root_dir,
+            providers=['CPUExecutionProvider']
         )
         self.app.prepare(ctx_id=0, det_size=(640, 640))
     def get_faceinfo_one_img(self, face_image):
         if face_image is None:
             return None
         face_info = self.app.get(cv2.cvtColor(np.array(face_image), cv2.COLOR_RGB2BGR))
         if len(face_info) == 0:
 MAX_SEED = np.iinfo(np.int32).max
 face_info_generator = FaceInfoGenerator()
+@spaces.GPU(duration=120)
 def infer(prompt,
           image=None,
           negative_prompt="low quality, blurry, distorted",
         gr.Warning("Please upload an image with a face.")
         return None, 0
+    # Face detection on CPU
     face_info = face_info_generator.get_faceinfo_one_img(image)
     if face_info is None:
+        raise gr.Error("No face detected. Please upload an image with a clear face.")
     face_bbox_square = face_bbox_to_square(face_info["bbox"])
     crop_image = image.crop(face_bbox_square)
     crop_image = [crop_image]
     face_embeds = torch.from_numpy(np.array([face_info["embedding"]]))
+    # Move to GPU
+    device = torch.device("cuda")
     global pipe
+    # Move models to GPU
+    pipe.vae = pipe.vae.to(device)
+    pipe.text_encoder = pipe.text_encoder.to(device)
+    pipe.unet = pipe.unet.to(device)
+    pipe.face_clip_encoder = pipe.face_clip_encoder.to(device)
     face_embeds = face_embeds.to(device, dtype=torch.float16)
+    # Load IP adapter
     pipe.load_ip_adapter_faceid_plus(f'{ckpt_dir_faceid}/ipa-faceid-plus.bin', device=device)
     pipe.set_face_fidelity_scale(0.8)
     generator = torch.Generator(device=device).manual_seed(seed)
+    # Generate image
     with torch.no_grad():
+        with torch.autocast(device_type="cuda", dtype=torch.float16):
             result = pipe(
                 prompt=prompt,
                 negative_prompt=negative_prompt,
                 face_insightface_embeds=face_embeds
             ).images[0]
+    # Move models back to CPU to free GPU memory
+    pipe.vae = pipe.vae.to("cpu")
+    pipe.text_encoder = pipe.text_encoder.to("cpu")
+    pipe.unet = pipe.unet.to("cpu")
+    pipe.face_clip_encoder = pipe.face_clip_encoder.to("cpu")
+    torch.cuda.empty_cache()
     return result, seed
 css = """
 footer {
     visibility: hidden;
 }
+#col-left, #col-right {
     max-width: 640px;
     margin: 0 auto;
+}
+.gr-button {
+    max-width: 100%;
 }
 """
+# Gradio interface
 with gr.Blocks(theme="soft", css=css) as Kolors:
     gr.HTML(
         """
         <div style='text-align: center;'>
             <h1>🎨 Kolors Face ID - AI Portrait Generator</h1>
+            <p>Upload a face photo and create stunning AI portraits!</p>
             <div style='display:flex; justify-content:center; gap:12px; margin-top:20px;'>
                 <a href="https://huggingface.co/spaces/openfree/Best-AI" target="_blank">
+                    <img src="https://img.shields.io/badge/OpenFree-BEST%20AI-blue?style=for-the-badge" alt="OpenFree">
                 </a>
                 <a href="https://discord.gg/openfreeai" target="_blank">
+                    <img src="https://img.shields.io/badge/Discord-OpenFree%20AI-purple?style=for-the-badge&logo=discord" alt="Discord">
                 </a>
             </div>
         </div>
         with gr.Column(elem_id="col-left"):
             prompt = gr.Textbox(
                 label="Prompt",
+                placeholder="Describe the portrait style you want...",
                 lines=3,
+                value="A professional portrait photo, high quality"
             )
+            image = gr.Image(label="Upload Face Image", type="pil", height=300)
             with gr.Accordion("Advanced Settings", open=False):
                 negative_prompt = gr.Textbox(
                     label="Negative prompt",
+                    value="low quality, blurry, distorted"
                 )
                 seed = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=66)
                 randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
+                guidance_scale = gr.Slider(label="Guidance", minimum=1, maximum=10, step=0.5, value=5)
+                num_inference_steps = gr.Slider(label="Steps", minimum=10, maximum=50, step=5, value=25)
+            button = gr.Button("🎨 Generate Portrait", variant="primary")
         with gr.Column(elem_id="col-right"):
+            result = gr.Image(label="Generated Portrait")
             seed_used = gr.Number(label="Seed Used", precision=0)
     button.click(