Spaces:
Running
on
Zero
Running
on
Zero
File size: 7,891 Bytes
575f14d be0ecc3 575f14d be0ecc3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 |
# Project EmbodiedGen
#
# Copyright (c) 2025 Horizon Robotics. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied. See the License for the specific language governing
# permissions and limitations under the License.
import os
import sys
import zipfile
import numpy as np
import torch
from huggingface_hub import hf_hub_download
from omegaconf import OmegaConf
from PIL import Image
from torchvision import transforms
def monkey_patch_pano2room():
current_file_path = os.path.abspath(__file__)
current_dir = os.path.dirname(current_file_path)
sys.path.append(os.path.join(current_dir, "../.."))
sys.path.append(os.path.join(current_dir, "../../thirdparty/pano2room"))
from thirdparty.pano2room.modules.geo_predictors.omnidata.omnidata_normal_predictor import (
OmnidataNormalPredictor,
)
from thirdparty.pano2room.modules.geo_predictors.omnidata.omnidata_predictor import (
OmnidataPredictor,
)
def patched_omni_depth_init(self):
self.img_size = 384
self.model = torch.hub.load(
'alexsax/omnidata_models', 'depth_dpt_hybrid_384'
)
self.model.eval()
self.trans_totensor = transforms.Compose(
[
transforms.Resize(self.img_size, interpolation=Image.BILINEAR),
transforms.CenterCrop(self.img_size),
transforms.Normalize(mean=0.5, std=0.5),
]
)
OmnidataPredictor.__init__ = patched_omni_depth_init
def patched_omni_normal_init(self):
self.img_size = 384
self.model = torch.hub.load(
'alexsax/omnidata_models', 'surface_normal_dpt_hybrid_384'
)
self.model.eval()
self.trans_totensor = transforms.Compose(
[
transforms.Resize(self.img_size, interpolation=Image.BILINEAR),
transforms.CenterCrop(self.img_size),
transforms.Normalize(mean=0.5, std=0.5),
]
)
OmnidataNormalPredictor.__init__ = patched_omni_normal_init
def patched_panojoint_init(self, save_path=None):
self.depth_predictor = OmnidataPredictor()
self.normal_predictor = OmnidataNormalPredictor()
self.save_path = save_path
from modules.geo_predictors import PanoJointPredictor
PanoJointPredictor.__init__ = patched_panojoint_init
# NOTE: We use gsplat instead.
# import depth_diff_gaussian_rasterization_min as ddgr
# from dataclasses import dataclass
# @dataclass
# class PatchedGaussianRasterizationSettings:
# image_height: int
# image_width: int
# tanfovx: float
# tanfovy: float
# bg: torch.Tensor
# scale_modifier: float
# viewmatrix: torch.Tensor
# projmatrix: torch.Tensor
# sh_degree: int
# campos: torch.Tensor
# prefiltered: bool
# debug: bool = False
# ddgr.GaussianRasterizationSettings = PatchedGaussianRasterizationSettings
# disable get_has_ddp_rank print in `BaseInpaintingTrainingModule`
os.environ["NODE_RANK"] = "0"
from thirdparty.pano2room.modules.inpainters.lama.saicinpainting.training.trainers import (
load_checkpoint,
)
from thirdparty.pano2room.modules.inpainters.lama_inpainter import (
LamaInpainter,
)
def patched_lama_inpaint_init(self):
zip_path = hf_hub_download(
repo_id="smartywu/big-lama",
filename="big-lama.zip",
repo_type="model",
)
extract_dir = os.path.splitext(zip_path)[0]
if not os.path.exists(extract_dir):
os.makedirs(extract_dir, exist_ok=True)
with zipfile.ZipFile(zip_path, "r") as zip_ref:
zip_ref.extractall(extract_dir)
config_path = os.path.join(extract_dir, 'big-lama', 'config.yaml')
checkpoint_path = os.path.join(
extract_dir, 'big-lama/models/best.ckpt'
)
train_config = OmegaConf.load(config_path)
train_config.training_model.predict_only = True
train_config.visualizer.kind = 'noop'
self.model = load_checkpoint(
train_config, checkpoint_path, strict=False, map_location='cpu'
)
self.model.freeze()
LamaInpainter.__init__ = patched_lama_inpaint_init
from diffusers import StableDiffusionInpaintPipeline
from thirdparty.pano2room.modules.inpainters.SDFT_inpainter import (
SDFTInpainter,
)
def patched_sd_inpaint_init(self, subset_name=None):
super(SDFTInpainter, self).__init__()
pipe = StableDiffusionInpaintPipeline.from_pretrained(
"stabilityai/stable-diffusion-2-inpainting",
torch_dtype=torch.float16,
).to("cuda")
pipe.enable_model_cpu_offload()
self.inpaint_pipe = pipe
SDFTInpainter.__init__ = patched_sd_inpaint_init
def monkey_patch_maniskill():
from mani_skill.envs.scene import ManiSkillScene
def get_sensor_images(
self, obs: dict[str, any]
) -> dict[str, dict[str, torch.Tensor]]:
sensor_data = dict()
for name, sensor in self.sensors.items():
sensor_data[name] = sensor.get_images(obs[name])
return sensor_data
def get_human_render_camera_images(
self, camera_name: str = None, return_alpha: bool = False
) -> dict[str, torch.Tensor]:
def get_rgba_tensor(camera, return_alpha):
color = camera.get_obs(
rgb=True, depth=False, segmentation=False, position=False
)["rgb"]
if return_alpha:
seg_labels = camera.get_obs(
rgb=False, depth=False, segmentation=True, position=False
)["segmentation"]
masks = np.where((seg_labels.cpu() > 0), 255, 0).astype(
np.uint8
)
masks = torch.tensor(masks).to(color.device)
color = torch.concat([color, masks], dim=-1)
return color
image_data = dict()
if self.gpu_sim_enabled:
if self.parallel_in_single_scene:
for name, camera in self.human_render_cameras.items():
camera.camera._render_cameras[0].take_picture()
rgba = get_rgba_tensor(camera, return_alpha)
image_data[name] = rgba
else:
for name, camera in self.human_render_cameras.items():
if camera_name is not None and name != camera_name:
continue
assert camera.config.shader_config.shader_pack not in [
"rt",
"rt-fast",
"rt-med",
], "ray tracing shaders do not work with parallel rendering"
camera.capture()
rgba = get_rgba_tensor(camera, return_alpha)
image_data[name] = rgba
else:
for name, camera in self.human_render_cameras.items():
if camera_name is not None and name != camera_name:
continue
camera.capture()
rgba = get_rgba_tensor(camera, return_alpha)
image_data[name] = rgba
return image_data
ManiSkillScene.get_sensor_images = get_sensor_images
ManiSkillScene.get_human_render_camera_images = (
get_human_render_camera_images
)
|