Spaces:

HorizonRobotics
/

EmbodiedGen-Image-to-3D

Running on Zero

App Files Files Community

xinjie.wang commited on 16 days ago

Commit

ee63191

1 Parent(s): c1dc250

update

Browse files

Files changed (3) hide show

common.py +0 -32
embodied_gen/models/text_model.py +38 -1
embodied_gen/models/texture_model.py +5 -0

common.py CHANGED Viewed

@@ -136,32 +136,6 @@ def patched_setup_functions(self):
 Gaussian.setup_functions = patched_setup_functions
-def download_kolors_weights() -> None:
-    logger.info(f"Download kolors weights from huggingface...")
-    subprocess.run(
-        [
-            "huggingface-cli",
-            "download",
-            "--resume-download",
-            "Kwai-Kolors/Kolors",
-            "--local-dir",
-            "weights/Kolors",
-        ],
-        check=True,
-    )
-    subprocess.run(
-        [
-            "huggingface-cli",
-            "download",
-            "--resume-download",
-            "Kwai-Kolors/Kolors-IP-Adapter-Plus",
-            "--local-dir",
-            "weights/Kolors-IP-Adapter-Plus",
-        ],
-        check=True,
-    )
 if os.getenv("GRADIO_APP") == "imageto3d":
     RBG_REMOVER = RembgRemover()
     RBG14_REMOVER = BMGG14Remover()
@@ -185,9 +159,6 @@ elif os.getenv("GRADIO_APP") == "textto3d":
     )
     # PIPELINE.cuda()
     text_model_dir = "weights/Kolors"
-    if not os.path.exists(text_model_dir):
-        download_kolors_weights()
     PIPELINE_IMG_IP = build_text2img_ip_pipeline(text_model_dir, ref_scale=0.3)
     PIPELINE_IMG = build_text2img_pipeline(text_model_dir)
     SEG_CHECKER = ImageSegChecker(GPT_CLIENT)
@@ -198,9 +169,6 @@ elif os.getenv("GRADIO_APP") == "textto3d":
         os.path.dirname(os.path.abspath(__file__)), "sessions/textto3d"
     )
 elif os.getenv("GRADIO_APP") == "texture_edit":
-    if not os.path.exists("weights/Kolors"):
-        download_kolors_weights()
     PIPELINE_IP = build_texture_gen_pipe(
         base_ckpt_dir="./weights",
         ip_adapt_scale=0.7,

 Gaussian.setup_functions = patched_setup_functions
 if os.getenv("GRADIO_APP") == "imageto3d":
     RBG_REMOVER = RembgRemover()
     RBG14_REMOVER = BMGG14Remover()
     )
     # PIPELINE.cuda()
     text_model_dir = "weights/Kolors"
     PIPELINE_IMG_IP = build_text2img_ip_pipeline(text_model_dir, ref_scale=0.3)
     PIPELINE_IMG = build_text2img_pipeline(text_model_dir)
     SEG_CHECKER = ImageSegChecker(GPT_CLIENT)
         os.path.dirname(os.path.abspath(__file__)), "sessions/textto3d"
     )
 elif os.getenv("GRADIO_APP") == "texture_edit":
     PIPELINE_IP = build_texture_gen_pipe(
         base_ckpt_dir="./weights",
         ip_adapt_scale=0.7,

embodied_gen/models/text_model.py CHANGED Viewed

@@ -16,8 +16,9 @@
 import logging
 import random
 import numpy as np
 import torch
 from diffusers import (
@@ -47,14 +48,47 @@ __all__ = [
     "build_text2img_ip_pipeline",
     "build_text2img_pipeline",
     "text2img_gen",
 ]
 def build_text2img_ip_pipeline(
     ckpt_dir: str,
     ref_scale: float,
     device: str = "cuda",
 ) -> StableDiffusionXLPipelineIP:
     text_encoder = ChatGLMModel.from_pretrained(
         f"{ckpt_dir}/text_encoder", torch_dtype=torch.float16
     ).half()
@@ -106,6 +140,9 @@ def build_text2img_pipeline(
     ckpt_dir: str,
     device: str = "cuda",
 ) -> StableDiffusionXLPipeline:
     text_encoder = ChatGLMModel.from_pretrained(
         f"{ckpt_dir}/text_encoder", torch_dtype=torch.float16
     ).half()

 import logging
+import os
 import random
+import subprocess
 import numpy as np
 import torch
 from diffusers import (
     "build_text2img_ip_pipeline",
     "build_text2img_pipeline",
     "text2img_gen",
+    "download_kolors_weights",
 ]
+def download_kolors_weights(local_dir: str = "weights/Kolors") -> None:
+    logger.info(f"Download kolors weights from huggingface...")
+    os.makedirs(local_dir, exist_ok=True)
+    subprocess.run(
+        [
+            "huggingface-cli",
+            "download",
+            "--resume-download",
+            "Kwai-Kolors/Kolors",
+            "--local-dir",
+            local_dir,
+        ],
+        check=True,
+    )
+    ip_adapter_path = f"{local_dir}/../Kolors-IP-Adapter-Plus"
+    subprocess.run(
+        [
+            "huggingface-cli",
+            "download",
+            "--resume-download",
+            "Kwai-Kolors/Kolors-IP-Adapter-Plus",
+            "--local-dir",
+            ip_adapter_path,
+        ],
+        check=True,
+    )
 def build_text2img_ip_pipeline(
     ckpt_dir: str,
     ref_scale: float,
     device: str = "cuda",
 ) -> StableDiffusionXLPipelineIP:
+    if not os.path.exists(ckpt_dir):
+        download_kolors_weights(ckpt_dir)
     text_encoder = ChatGLMModel.from_pretrained(
         f"{ckpt_dir}/text_encoder", torch_dtype=torch.float16
     ).half()
     ckpt_dir: str,
     device: str = "cuda",
 ) -> StableDiffusionXLPipeline:
+    if not os.path.exists(ckpt_dir):
+        download_kolors_weights(ckpt_dir)
     text_encoder = ChatGLMModel.from_pretrained(
         f"{ckpt_dir}/text_encoder", torch_dtype=torch.float16
     ).half()

embodied_gen/models/texture_model.py CHANGED Viewed

@@ -28,6 +28,8 @@ from kolors.pipelines.pipeline_controlnet_xl_kolors_img2img import (
     StableDiffusionXLControlNetImg2ImgPipeline,
 )
 from transformers import CLIPImageProcessor, CLIPVisionModelWithProjection
 __all__ = [
     "build_texture_gen_pipe",
@@ -40,6 +42,9 @@ def build_texture_gen_pipe(
     ip_adapt_scale: float = 0,
     device: str = "cuda",
 ) -> DiffusionPipeline:
     tokenizer = ChatGLMTokenizer.from_pretrained(
         f"{base_ckpt_dir}/Kolors/text_encoder"
     )

     StableDiffusionXLControlNetImg2ImgPipeline,
 )
 from transformers import CLIPImageProcessor, CLIPVisionModelWithProjection
+from embodied_gen.models.text_model import download_kolors_weights
 __all__ = [
     "build_texture_gen_pipe",
     ip_adapt_scale: float = 0,
     device: str = "cuda",
 ) -> DiffusionPipeline:
+    if not os.path.exists(f"{base_ckpt_dir}/Kolors"):
+        download_kolors_weights(f"{base_ckpt_dir}/Kolors")
     tokenizer = ChatGLMTokenizer.from_pretrained(
         f"{base_ckpt_dir}/Kolors/text_encoder"
     )