Spaces:
Running
on
Zero
Running
on
Zero
# Project EmbodiedGen | |
# | |
# Copyright (c) 2025 Horizon Robotics. All Rights Reserved. | |
# | |
# Licensed under the Apache License, Version 2.0 (the "License"); | |
# you may not use this file except in compliance with the License. | |
# You may obtain a copy of the License at | |
# | |
# http://www.apache.org/licenses/LICENSE-2.0 | |
# | |
# Unless required by applicable law or agreed to in writing, software | |
# distributed under the License is distributed on an "AS IS" BASIS, | |
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or | |
# implied. See the License for the specific language governing | |
# permissions and limitations under the License. | |
import json | |
import os | |
from collections import defaultdict | |
from dataclasses import dataclass, field | |
from typing import Literal | |
import imageio | |
import numpy as np | |
import torch | |
import tyro | |
from tqdm import tqdm | |
from embodied_gen.models.gs_model import GaussianOperator | |
from embodied_gen.utils.enum import LayoutInfo, Scene3DItemEnum | |
from embodied_gen.utils.geometry import quaternion_multiply | |
from embodied_gen.utils.log import logger | |
from embodied_gen.utils.process_media import alpha_blend_rgba | |
from embodied_gen.utils.simulation import ( | |
SIM_COORD_ALIGN, | |
FrankaPandaGrasper, | |
SapienSceneManager, | |
load_assets_from_layout_file, | |
load_mani_skill_robot, | |
render_images, | |
) | |
class SapienSimConfig: | |
# Simulation settings. | |
layout_path: str | |
output_dir: str | |
sim_freq: int = 200 | |
sim_step: int = 400 | |
z_offset: float = 0.004 | |
init_quat: list[float] = field( | |
default_factory=lambda: [0.7071, 0, 0, 0.7071] | |
) # xyzw | |
device: str = "cuda" | |
control_freq: int = 50 | |
insert_robot: bool = False | |
# Camera settings. | |
render_interval: int = 10 | |
num_cameras: int = 3 | |
camera_radius: float = 0.9 | |
camera_height: float = 1.1 | |
image_hw: tuple[int, int] = (512, 512) | |
ray_tracing: bool = True | |
fovy_deg: float = 75.0 | |
camera_target_pt: list[float] = field( | |
default_factory=lambda: [0.0, 0.0, 0.9] | |
) | |
render_keys: list[ | |
Literal[ | |
"Color", "Foreground", "Segmentation", "Normal", "Mask", "Depth" | |
] | |
] = field(default_factory=lambda: ["Foreground"]) | |
def entrypoint(**kwargs): | |
if kwargs is None or len(kwargs) == 0: | |
cfg = tyro.cli(SapienSimConfig) | |
else: | |
cfg = SapienSimConfig(**kwargs) | |
scene_manager = SapienSceneManager( | |
cfg.sim_freq, ray_tracing=cfg.ray_tracing | |
) | |
_ = scene_manager.initialize_circular_cameras( | |
num_cameras=cfg.num_cameras, | |
radius=cfg.camera_radius, | |
height=cfg.camera_height, | |
target_pt=cfg.camera_target_pt, | |
image_hw=cfg.image_hw, | |
fovy_deg=cfg.fovy_deg, | |
) | |
with open(cfg.layout_path, "r") as f: | |
layout_data = json.load(f) | |
layout_data: LayoutInfo = LayoutInfo.from_dict(layout_data) | |
actors = load_assets_from_layout_file( | |
scene_manager.scene, | |
layout_data, | |
cfg.z_offset, | |
cfg.init_quat, | |
) | |
agent = load_mani_skill_robot( | |
scene_manager.scene, layout_data, cfg.control_freq | |
) | |
frames = defaultdict(list) | |
image_cnt = 0 | |
for step in tqdm(range(cfg.sim_step), desc="Simulation"): | |
scene_manager.scene.step() | |
agent.reset(agent.init_qpos) | |
if step % cfg.render_interval != 0: | |
continue | |
scene_manager.scene.update_render() | |
image_cnt += 1 | |
for camera in scene_manager.cameras: | |
camera.take_picture() | |
images = render_images(camera, cfg.render_keys) | |
frames[camera.name].append(images) | |
actions = dict() | |
if cfg.insert_robot: | |
grasper = FrankaPandaGrasper( | |
agent, | |
cfg.control_freq, | |
) | |
for node in layout_data.relation[ | |
Scene3DItemEnum.MANIPULATED_OBJS.value | |
]: | |
actions[node] = grasper.compute_grasp_action( | |
actor=actors[node], reach_target_only=True | |
) | |
if "Foreground" not in cfg.render_keys: | |
return | |
bg_node = layout_data.relation[Scene3DItemEnum.BACKGROUND.value] | |
gs_path = f"{layout_data.assets[bg_node]}/gs_model.ply" | |
gs_model: GaussianOperator = GaussianOperator.load_from_ply(gs_path) | |
x, y, z, qx, qy, qz, qw = layout_data.position[bg_node] | |
qx, qy, qz, qw = quaternion_multiply([qx, qy, qz, qw], cfg.init_quat) | |
init_pose = torch.tensor([x, y, z, qx, qy, qz, qw]) | |
gs_model = gs_model.get_gaussians(instance_pose=init_pose) | |
bg_images = dict() | |
for camera in scene_manager.cameras: | |
Ks = camera.get_intrinsic_matrix() | |
c2w = camera.get_model_matrix() | |
c2w = c2w @ SIM_COORD_ALIGN | |
result = gs_model.render( | |
torch.tensor(c2w, dtype=torch.float32).to(cfg.device), | |
torch.tensor(Ks, dtype=torch.float32).to(cfg.device), | |
image_width=cfg.image_hw[1], | |
image_height=cfg.image_hw[0], | |
) | |
bg_images[camera.name] = result.rgb[..., ::-1] | |
video_frames = [] | |
for idx, camera in enumerate(scene_manager.cameras): | |
# Scene rendering | |
if idx == 0: | |
for step in range(image_cnt): | |
rgba = alpha_blend_rgba( | |
frames[camera.name][step]["Foreground"], | |
bg_images[camera.name], | |
) | |
video_frames.append(np.array(rgba)) | |
# Grasp rendering | |
for node in actions: | |
if actions[node] is None: | |
continue | |
for action in tqdm(actions[node]): | |
grasp_frames = scene_manager.step_action( | |
agent, | |
torch.Tensor(action[None, ...]), | |
scene_manager.cameras, | |
cfg.render_keys, | |
sim_steps_per_control=cfg.sim_freq // cfg.control_freq, | |
) | |
rgba = alpha_blend_rgba( | |
grasp_frames[camera.name][0]["Foreground"], | |
bg_images[camera.name], | |
) | |
video_frames.append(np.array(rgba)) | |
agent.reset(agent.init_qpos) | |
os.makedirs(cfg.output_dir, exist_ok=True) | |
video_path = f"{cfg.output_dir}/Iscene.mp4" | |
imageio.mimsave(video_path, video_frames, fps=30) | |
logger.info(f"Interative 3D Scene Visualization saved in {video_path}") | |
if __name__ == "__main__": | |
entrypoint() | |