# Project EmbodiedGen # # Copyright (c) 2025 Horizon Robotics. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or # implied. See the License for the specific language governing # permissions and limitations under the License. import os os.environ["GRADIO_APP"] = "imageto3d" from glob import glob import gradio as gr from common import ( MAX_SEED, VERSION, active_btn_by_content, custom_theme, end_session, extract_3d_representations_v2, extract_urdf, get_seed, image_css, image_to_3d, lighting_css, preprocess_image_fn, preprocess_sam_image_fn, select_point, start_session, ) with gr.Blocks(delete_cache=(43200, 43200), theme=custom_theme) as demo: gr.HTML(image_css, visible=False) # gr.HTML(lighting_css, visible=False) gr.Markdown( """ ## ***EmbodiedGen***: Image-to-3D Asset **🔖 Version**: {VERSION}
🖼️ Generate physically plausible 3D asset from single input image. """.format( VERSION=VERSION ), elem_classes=["header"], ) with gr.Row(): with gr.Column(scale=2): with gr.Tabs() as input_tabs: with gr.Tab( label="Image(auto seg)", id=0 ) as single_image_input_tab: raw_image_cache = gr.Image( format="png", image_mode="RGB", type="pil", visible=False, ) image_prompt = gr.Image( label="Input Image", format="png", image_mode="RGBA", type="pil", height=400, elem_classes=["image_fit"], ) gr.Markdown( """ If you are not satisfied with the auto segmentation result, please switch to the `Image(SAM seg)` tab.""" ) with gr.Tab( label="Image(SAM seg)", id=1 ) as samimage_input_tab: with gr.Row(): with gr.Column(scale=1): image_prompt_sam = gr.Image( label="Input Image", type="numpy", height=400, elem_classes=["image_fit"], ) image_seg_sam = gr.Image( label="SAM Seg Image", image_mode="RGBA", type="pil", height=400, visible=False, ) with gr.Column(scale=1): image_mask_sam = gr.AnnotatedImage( elem_classes=["image_fit"] ) fg_bg_radio = gr.Radio( ["foreground_point", "background_point"], label="Select foreground(green) or background(red) points, by default foreground", # noqa value="foreground_point", ) gr.Markdown( """ Click the `Input Image` to select SAM points, after get the satisified segmentation, click `Generate` button to generate the 3D asset. \n Note: If the segmented foreground is too small relative to the entire image area, the generation will fail. """ ) with gr.Accordion(label="Generation Settings", open=False): with gr.Row(): seed = gr.Slider( 0, MAX_SEED, label="Seed", value=0, step=1 ) texture_size = gr.Slider( 1024, 4096, label="UV texture size", value=2048, step=256, ) rmbg_tag = gr.Radio( choices=["rembg", "rmbg14"], value="rembg", label="Background Removal Model", ) with gr.Row(): randomize_seed = gr.Checkbox( label="Randomize Seed", value=False ) project_delight = gr.Checkbox( label="Back-project Delight", value=True, ) gr.Markdown("Geo Structure Generation") with gr.Row(): ss_guidance_strength = gr.Slider( 0.0, 10.0, label="Guidance Strength", value=7.5, step=0.1, ) ss_sampling_steps = gr.Slider( 1, 50, label="Sampling Steps", value=12, step=1 ) gr.Markdown("Visual Appearance Generation") with gr.Row(): slat_guidance_strength = gr.Slider( 0.0, 10.0, label="Guidance Strength", value=3.0, step=0.1, ) slat_sampling_steps = gr.Slider( 1, 50, label="Sampling Steps", value=12, step=1 ) generate_btn = gr.Button( "🚀 1. Generate(~0.5 mins)", variant="primary", interactive=False, ) model_output_obj = gr.Textbox(label="raw mesh .obj", visible=False) with gr.Row(): extract_rep3d_btn = gr.Button( "🔍 2. Extract 3D Representation(~2 mins)", variant="primary", interactive=False, ) with gr.Accordion( label="Enter Asset Attributes(optional)", open=False ): asset_cat_text = gr.Textbox( label="Enter Asset Category (e.g., chair)" ) height_range_text = gr.Textbox( label="Enter **Height Range** in meter (e.g., 0.5-0.6)" ) mass_range_text = gr.Textbox( label="Enter **Mass Range** in kg (e.g., 1.1-1.2)" ) asset_version_text = gr.Textbox( label=f"Enter version (e.g., {VERSION})" ) with gr.Row(): extract_urdf_btn = gr.Button( "🧩 3. Extract URDF with physics(~1 mins)", variant="primary", interactive=False, ) with gr.Row(): gr.Markdown( "#### Estimated Asset 3D Attributes(No input required)" ) with gr.Row(): est_type_text = gr.Textbox( label="Asset category", interactive=False ) est_height_text = gr.Textbox( label="Real height(.m)", interactive=False ) est_mass_text = gr.Textbox( label="Mass(.kg)", interactive=False ) est_mu_text = gr.Textbox( label="Friction coefficient", interactive=False ) with gr.Row(): download_urdf = gr.DownloadButton( label="⬇️ 4. Download URDF", variant="primary", interactive=False, ) gr.Markdown( """ NOTE: If `Asset Attributes` are provided, it will guide GPT to perform physical attributes restoration. \n The `Download URDF` file is restored to the real scale and has quality inspection, open with an editor to view details. """ ) with gr.Row() as single_image_example: examples = gr.Examples( label="Image Gallery", examples=[ [image_path] for image_path in sorted( glob("assets/example_image/*") ) ], inputs=[image_prompt, rmbg_tag], fn=preprocess_image_fn, outputs=[image_prompt, raw_image_cache], run_on_click=True, examples_per_page=10, ) with gr.Row(visible=False) as single_sam_image_example: examples = gr.Examples( label="Image Gallery", examples=[ [image_path] for image_path in sorted( glob("assets/example_image/*") ) ], inputs=[image_prompt_sam], fn=preprocess_sam_image_fn, outputs=[image_prompt_sam, raw_image_cache], run_on_click=True, examples_per_page=10, ) with gr.Column(scale=1): gr.Markdown("