Spaces:

HorizonRobotics
/

EmbodiedGen-Text-to-3D

Running on Zero

App Files Files Community

xinjie.wang commited on Jun 10

Commit

6d38e38

1 Parent(s): 1043d26

update

Browse files

Files changed (4) hide show

app.py +39 -18
common.py +11 -1
embodied_gen/models/text_model.py +4 -3
embodied_gen/scripts/imageto3d.py +3 -1

app.py CHANGED Viewed

@@ -25,6 +25,7 @@ from common import (
     MAX_SEED,
     VERSION,
     active_btn_by_text_content,
     end_session,
     extract_3d_representations_v2,
     extract_urdf,
@@ -37,17 +38,33 @@ from common import (
     start_session,
     text2image_fn,
 )
-from gradio.themes import Default
-from gradio.themes.utils.colors import slate
-with gr.Blocks(
-    delete_cache=(43200, 43200), theme=Default(primary_hue=slate)
-) as demo:
     gr.Markdown(
-        f"""
-        ## ***EmbodiedGen***: Text-to-3D Asset \n
-        version: {VERSION} \n
-    """
     )
     gr.HTML(image_css)
     gr.HTML(lighting_css)
@@ -107,7 +124,7 @@ with gr.Blocks(
                 )
             generate_img_btn = gr.Button(
-                "Generate Images(~1min)",
                 variant="primary",
                 interactive=False,
             )
@@ -163,12 +180,14 @@ with gr.Blocks(
                     )
             generate_btn = gr.Button(
-                "Generate 3D(~0.5 mins)", variant="primary", interactive=False
             )
             model_output_obj = gr.Textbox(label="raw mesh .obj", visible=False)
             with gr.Row():
                 extract_rep3d_btn = gr.Button(
-                    "Extract 3D Representation(~1 mins)",
                     variant="primary",
                     interactive=False,
                 )
@@ -189,13 +208,15 @@ with gr.Blocks(
                 )
             with gr.Row():
                 extract_urdf_btn = gr.Button(
-                    "Extract URDF with physics(~1 mins)",
                     variant="primary",
                     interactive=False,
                 )
             with gr.Row():
                 download_urdf = gr.DownloadButton(
-                    label="Download URDF", variant="primary", interactive=False
                 )
         with gr.Column(scale=3):
@@ -286,12 +307,12 @@ with gr.Blocks(
                 est_mu_text = gr.Textbox(
                     label="Friction coefficient", interactive=False
                 )
             prompt_examples = [
-                "satin gold tea cup with saucer",
-                "small brown leather bag",
                 "Miniature cup with floral design",
-                "带木质底座, 具有经纬线的地球仪",
                 "橙色电动手钻, 有磨损细节",
                 "手工制作的皮革笔记本",
                 "写实风格机甲3D全身模型, 主体色调为深灰色和荧光黄",

     MAX_SEED,
     VERSION,
     active_btn_by_text_content,
+    custom_theme,
     end_session,
     extract_3d_representations_v2,
     extract_urdf,
     start_session,
     text2image_fn,
 )
+with gr.Blocks(delete_cache=(43200, 43200), theme=custom_theme) as demo:
     gr.Markdown(
+        """
+        ## ***EmbodiedGen***: Text-to-3D Asset
+        **🔖 Version**: {VERSION}
+        <p style="display: flex; gap: 10px; flex-wrap: nowrap;">
+            <a href="https://horizonrobotics.github.io/robot_lab/embodied_gen/index.html">
+                <img alt="🌐 Project Page" src="https://img.shields.io/badge/🌐-Project_Page-blue">
+            </a>
+            <a href="https://arxiv.org/abs/xxxx.xxxxx">
+                <img alt="📄 arXiv" src="https://img.shields.io/badge/📄-arXiv-b31b1b">
+            </a>
+            <a href="https://github.com/horizon-research/EmbodiedGen">
+                <img alt="💻 GitHub" src="https://img.shields.io/badge/GitHub-000000?logo=github">
+            </a>
+            <a href="https://www.youtube.com/watch?v=SnHhzHeb_aI">
+                <img alt="🎥 Video" src="https://img.shields.io/badge/🎥-Video-red">
+            </a>
+        </p>
+        📝 Create 3D assets from text descriptions for a wide range of geometry and styles.
+        """.format(
+            VERSION=VERSION
+        ),
+        elem_classes=["header"],
     )
     gr.HTML(image_css)
     gr.HTML(lighting_css)
                 )
             generate_img_btn = gr.Button(
+                "🎨 1. Generate Images(~1min)",
                 variant="primary",
                 interactive=False,
             )
                     )
             generate_btn = gr.Button(
+                "🚀 2. Generate 3D(~0.5 mins)",
+                variant="primary",
+                interactive=False,
             )
             model_output_obj = gr.Textbox(label="raw mesh .obj", visible=False)
             with gr.Row():
                 extract_rep3d_btn = gr.Button(
+                    "🔍 3. Extract 3D Representation(~1 mins)",
                     variant="primary",
                     interactive=False,
                 )
                 )
             with gr.Row():
                 extract_urdf_btn = gr.Button(
+                    "🧩 4. Extract URDF with physics(~1 mins)",
                     variant="primary",
                     interactive=False,
                 )
             with gr.Row():
                 download_urdf = gr.DownloadButton(
+                    label="⬇️ 5. Download URDF",
+                    variant="primary",
+                    interactive=False,
                 )
         with gr.Column(scale=3):
                 est_mu_text = gr.Textbox(
                     label="Friction coefficient", interactive=False
                 )
             prompt_examples = [
+                "satin gold tea cup with saucer",
+                "brown leather bag",
                 "Miniature cup with floral design",
+                "带木质底座, 具有经纬线的地球仪",
                 "橙色电动手钻, 有磨损细节",
                 "手工制作的皮革笔记本",
                 "写实风格机甲3D全身模型, 主体色调为深灰色和荧光黄",

common.py CHANGED Viewed

@@ -30,6 +30,8 @@ import torch
 import torch.nn.functional as F
 import trimesh
 from easydict import EasyDict as edict
 from PIL import Image
 from embodied_gen.data.backproject_v2 import entrypoint as backproject_api
 from embodied_gen.data.differentiable_render import entrypoint as render_api
@@ -233,6 +235,14 @@ height: 100% !important;
 </style>
 """
 def start_session(req: gr.Request) -> None:
     user_dir = os.path.join(TMP_DIR, str(req.session_hash))
@@ -670,8 +680,8 @@ def text2image_fn(
     ip_adapt_scale: float = 0.3,
     image_wh: int | tuple[int, int] = [1024, 1024],
     rmbg_tag: str = "rembg",
-    n_sample: int = 3,
     seed: int = None,
     req: gr.Request = None,
 ):
     if isinstance(image_wh, int):

 import torch.nn.functional as F
 import trimesh
 from easydict import EasyDict as edict
+from gradio.themes import Soft
+from gradio.themes.utils.colors import gray, neutral, slate, stone, teal, zinc
 from PIL import Image
 from embodied_gen.data.backproject_v2 import entrypoint as backproject_api
 from embodied_gen.data.differentiable_render import entrypoint as render_api
 </style>
 """
+custom_theme = Soft(
+    primary_hue=stone,
+    secondary_hue=gray,
+    radius_size="md",
+    text_size="sm",
+    spacing_size="sm",
+)
 def start_session(req: gr.Request) -> None:
     user_dir = os.path.join(TMP_DIR, str(req.session_hash))
     ip_adapt_scale: float = 0.3,
     image_wh: int | tuple[int, int] = [1024, 1024],
     rmbg_tag: str = "rembg",
     seed: int = None,
+    n_sample: int = 3,
     req: gr.Request = None,
 ):
     if isinstance(image_wh, int):

embodied_gen/models/text_model.py CHANGED Viewed

@@ -16,10 +16,10 @@
 import logging
-import torch
 import numpy as np
-import random
 from diffusers import (
     AutoencoderKL,
     EulerDiscreteScheduler,
@@ -143,9 +143,10 @@ def text2img_gen(
     seed: int = None,
 ) -> list[Image.Image]:
     prompt = "Single " + prompt + ", in the center of the image"
-    prompt += ", high quality, high resolution, best quality, white background, 3D style,"  # noqa
     logger.info(f"Processing prompt: {prompt}")
     if seed is not None:
         generator = torch.Generator(pipeline.device).manual_seed(seed)
         torch.manual_seed(seed)

 import logging
+import random
 import numpy as np
+import torch
 from diffusers import (
     AutoencoderKL,
     EulerDiscreteScheduler,
     seed: int = None,
 ) -> list[Image.Image]:
     prompt = "Single " + prompt + ", in the center of the image"
+    prompt += ", high quality, high resolution, best quality, white background, 3D style"  # noqa
     logger.info(f"Processing prompt: {prompt}")
+    generator = None
     if seed is not None:
         generator = torch.Generator(pipeline.device).manual_seed(seed)
         torch.manual_seed(seed)

embodied_gen/scripts/imageto3d.py CHANGED Viewed

@@ -70,7 +70,9 @@ IMAGESR_MODEL = ImageRealESRGAN(outscale=4)
 RBG_REMOVER = RembgRemover()
 RBG14_REMOVER = BMGG14Remover()
 SAM_PREDICTOR = SAMPredictor(model_type="vit_h", device="cpu")
-PIPELINE = TrellisImageTo3DPipeline.from_pretrained("microsoft/TRELLIS-image-large")
 PIPELINE.cuda()
 SEG_CHECKER = ImageSegChecker(GPT_CLIENT)
 GEO_CHECKER = MeshGeoChecker(GPT_CLIENT)

 RBG_REMOVER = RembgRemover()
 RBG14_REMOVER = BMGG14Remover()
 SAM_PREDICTOR = SAMPredictor(model_type="vit_h", device="cpu")
+PIPELINE = TrellisImageTo3DPipeline.from_pretrained(
+    "microsoft/TRELLIS-image-large"
+)
 PIPELINE.cuda()
 SEG_CHECKER = ImageSegChecker(GPT_CLIENT)
 GEO_CHECKER = MeshGeoChecker(GPT_CLIENT)