Spaces:
Running
on
Zero
Running
on
Zero
xinjie.wang
commited on
Commit
·
3075458
1
Parent(s):
9e9a83d
update
Browse files- common.py +14 -2
- embodied_gen/data/backproject_v2.py +17 -10
- embodied_gen/data/differentiable_render.py +2 -2
- embodied_gen/data/utils.py +51 -10
- embodied_gen/models/delight_model.py +2 -0
- embodied_gen/models/texture_model.py +2 -1
- embodied_gen/scripts/gen_texture.py +123 -0
- embodied_gen/scripts/imageto3d.py +18 -3
- embodied_gen/scripts/render_gs.py +12 -23
- embodied_gen/scripts/texture_gen.sh +1 -0
- embodied_gen/utils/process_media.py +19 -2
- embodied_gen/validators/urdf_convertor.py +1 -1
common.py
CHANGED
@@ -503,7 +503,17 @@ def extract_3d_representations_v2(
|
|
503 |
device="cpu",
|
504 |
)
|
505 |
color_path = os.path.join(user_dir, "color.png")
|
506 |
-
render_gs_api(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
507 |
|
508 |
mesh = trimesh.Trimesh(
|
509 |
vertices=mesh_model.vertices.cpu().numpy(),
|
@@ -518,12 +528,14 @@ def extract_3d_representations_v2(
|
|
518 |
mesh = backproject_api(
|
519 |
delight_model=DELIGHT,
|
520 |
imagesr_model=IMAGESR_MODEL,
|
521 |
-
color_path=color_path,
|
522 |
mesh_path=mesh_obj_path,
|
523 |
output_path=mesh_obj_path,
|
524 |
skip_fix_mesh=False,
|
525 |
delight=enable_delight,
|
526 |
texture_wh=[texture_size, texture_size],
|
|
|
|
|
527 |
)
|
528 |
|
529 |
mesh_glb_path = os.path.join(user_dir, f"{filename}.glb")
|
|
|
503 |
device="cpu",
|
504 |
)
|
505 |
color_path = os.path.join(user_dir, "color.png")
|
506 |
+
render_gs_api(
|
507 |
+
input_gs=aligned_gs_path,
|
508 |
+
output_path=color_path,
|
509 |
+
elevation=[20, -10],
|
510 |
+
)
|
511 |
+
color_path2 = os.path.join(user_dir, "color2.png")
|
512 |
+
render_gs_api(
|
513 |
+
input_gs=aligned_gs_path,
|
514 |
+
output_path=color_path2,
|
515 |
+
elevation=[60, -50],
|
516 |
+
)
|
517 |
|
518 |
mesh = trimesh.Trimesh(
|
519 |
vertices=mesh_model.vertices.cpu().numpy(),
|
|
|
528 |
mesh = backproject_api(
|
529 |
delight_model=DELIGHT,
|
530 |
imagesr_model=IMAGESR_MODEL,
|
531 |
+
color_path=[color_path, color_path2],
|
532 |
mesh_path=mesh_obj_path,
|
533 |
output_path=mesh_obj_path,
|
534 |
skip_fix_mesh=False,
|
535 |
delight=enable_delight,
|
536 |
texture_wh=[texture_size, texture_size],
|
537 |
+
elevation=[20, -10, 60, -50],
|
538 |
+
num_images=12,
|
539 |
)
|
540 |
|
541 |
mesh_glb_path = os.path.join(user_dir, f"{filename}.glb")
|
embodied_gen/data/backproject_v2.py
CHANGED
@@ -33,6 +33,7 @@ from embodied_gen.data.mesh_operator import MeshFixer
|
|
33 |
from embodied_gen.data.utils import (
|
34 |
CameraSetting,
|
35 |
DiffrastRender,
|
|
|
36 |
get_images_from_grid,
|
37 |
init_kal_camera,
|
38 |
normalize_vertices_array,
|
@@ -41,6 +42,7 @@ from embodied_gen.data.utils import (
|
|
41 |
)
|
42 |
from embodied_gen.models.delight_model import DelightingModel
|
43 |
from embodied_gen.models.sr_model import ImageRealESRGAN
|
|
|
44 |
|
45 |
logging.basicConfig(
|
46 |
format="%(asctime)s - %(levelname)s - %(message)s", level=logging.INFO
|
@@ -541,8 +543,9 @@ def parse_args():
|
|
541 |
parser = argparse.ArgumentParser(description="Backproject texture")
|
542 |
parser.add_argument(
|
543 |
"--color_path",
|
|
|
544 |
type=str,
|
545 |
-
help="Multiview color image in 6x512x512 file
|
546 |
)
|
547 |
parser.add_argument(
|
548 |
"--mesh_path",
|
@@ -559,7 +562,7 @@ def parse_args():
|
|
559 |
)
|
560 |
parser.add_argument(
|
561 |
"--elevation",
|
562 |
-
nargs=
|
563 |
type=float,
|
564 |
default=[20.0, -10.0],
|
565 |
help="Elevation angles for the camera (default: [20.0, -10.0])",
|
@@ -647,19 +650,23 @@ def entrypoint(
|
|
647 |
fov=math.radians(args.fov),
|
648 |
device=args.device,
|
649 |
)
|
650 |
-
view_weights = [1, 0.1, 0.02, 0.1, 1, 0.02]
|
651 |
|
652 |
-
|
|
|
|
|
|
|
|
|
|
|
653 |
if args.delight:
|
654 |
-
if delight_model is None:
|
655 |
-
delight_model = DelightingModel()
|
656 |
-
save_dir = os.path.dirname(args.output_path)
|
657 |
-
os.makedirs(save_dir, exist_ok=True)
|
658 |
color_grid = delight_model(color_grid)
|
659 |
if not args.no_save_delight_img:
|
660 |
-
|
|
|
|
|
661 |
|
662 |
multiviews = get_images_from_grid(color_grid, img_size=512)
|
|
|
|
|
663 |
|
664 |
# Use RealESRGAN_x4plus for x4 (512->2048) image super resolution.
|
665 |
if imagesr_model is None:
|
@@ -688,7 +695,7 @@ def entrypoint(
|
|
688 |
texture_backer = TextureBacker(
|
689 |
camera_params=camera_params,
|
690 |
view_weights=view_weights,
|
691 |
-
render_wh=
|
692 |
texture_wh=args.texture_wh,
|
693 |
smooth_texture=not args.no_smooth_texture,
|
694 |
)
|
|
|
33 |
from embodied_gen.data.utils import (
|
34 |
CameraSetting,
|
35 |
DiffrastRender,
|
36 |
+
as_list,
|
37 |
get_images_from_grid,
|
38 |
init_kal_camera,
|
39 |
normalize_vertices_array,
|
|
|
42 |
)
|
43 |
from embodied_gen.models.delight_model import DelightingModel
|
44 |
from embodied_gen.models.sr_model import ImageRealESRGAN
|
45 |
+
from embodied_gen.utils.process_media import vcat_pil_images
|
46 |
|
47 |
logging.basicConfig(
|
48 |
format="%(asctime)s - %(levelname)s - %(message)s", level=logging.INFO
|
|
|
543 |
parser = argparse.ArgumentParser(description="Backproject texture")
|
544 |
parser.add_argument(
|
545 |
"--color_path",
|
546 |
+
nargs="+",
|
547 |
type=str,
|
548 |
+
help="Multiview color image in 6x512x512 file paths",
|
549 |
)
|
550 |
parser.add_argument(
|
551 |
"--mesh_path",
|
|
|
562 |
)
|
563 |
parser.add_argument(
|
564 |
"--elevation",
|
565 |
+
nargs="+",
|
566 |
type=float,
|
567 |
default=[20.0, -10.0],
|
568 |
help="Elevation angles for the camera (default: [20.0, -10.0])",
|
|
|
650 |
fov=math.radians(args.fov),
|
651 |
device=args.device,
|
652 |
)
|
|
|
653 |
|
654 |
+
args.color_path = as_list(args.color_path)
|
655 |
+
if args.delight and delight_model is None:
|
656 |
+
delight_model = DelightingModel()
|
657 |
+
|
658 |
+
color_grid = [Image.open(color_path) for color_path in args.color_path]
|
659 |
+
color_grid = vcat_pil_images(color_grid, image_mode="RGBA")
|
660 |
if args.delight:
|
|
|
|
|
|
|
|
|
661 |
color_grid = delight_model(color_grid)
|
662 |
if not args.no_save_delight_img:
|
663 |
+
save_dir = os.path.dirname(args.output_path)
|
664 |
+
os.makedirs(save_dir, exist_ok=True)
|
665 |
+
color_grid.save(f"{save_dir}/color_delight.png")
|
666 |
|
667 |
multiviews = get_images_from_grid(color_grid, img_size=512)
|
668 |
+
view_weights = [1, 0.1, 0.02, 0.1, 1, 0.02]
|
669 |
+
view_weights += [0.01] * (len(multiviews) - len(view_weights))
|
670 |
|
671 |
# Use RealESRGAN_x4plus for x4 (512->2048) image super resolution.
|
672 |
if imagesr_model is None:
|
|
|
695 |
texture_backer = TextureBacker(
|
696 |
camera_params=camera_params,
|
697 |
view_weights=view_weights,
|
698 |
+
render_wh=args.resolution_hw,
|
699 |
texture_wh=args.texture_wh,
|
700 |
smooth_texture=not args.no_smooth_texture,
|
701 |
)
|
embodied_gen/data/differentiable_render.py
CHANGED
@@ -503,7 +503,7 @@ def parse_args():
|
|
503 |
help="Whether to generate global normal .mp4 rendering file.",
|
504 |
)
|
505 |
parser.add_argument(
|
506 |
-
"--
|
507 |
type=str,
|
508 |
nargs="+",
|
509 |
default=None,
|
@@ -579,7 +579,7 @@ def entrypoint(**kwargs) -> None:
|
|
579 |
mesh_path=args.mesh_path,
|
580 |
output_root=args.output_root,
|
581 |
uuid=args.uuid,
|
582 |
-
prompts=args.
|
583 |
)
|
584 |
|
585 |
return
|
|
|
503 |
help="Whether to generate global normal .mp4 rendering file.",
|
504 |
)
|
505 |
parser.add_argument(
|
506 |
+
"--video_prompts",
|
507 |
type=str,
|
508 |
nargs="+",
|
509 |
default=None,
|
|
|
579 |
mesh_path=args.mesh_path,
|
580 |
output_root=args.output_root,
|
581 |
uuid=args.uuid,
|
582 |
+
prompts=args.video_prompts,
|
583 |
)
|
584 |
|
585 |
return
|
embodied_gen/data/utils.py
CHANGED
@@ -28,7 +28,7 @@ import numpy as np
|
|
28 |
import nvdiffrast.torch as dr
|
29 |
import torch
|
30 |
import torch.nn.functional as F
|
31 |
-
from PIL import Image
|
32 |
|
33 |
try:
|
34 |
from kolors.models.modeling_chatglm import ChatGLMModel
|
@@ -698,6 +698,8 @@ def as_list(obj):
|
|
698 |
return obj
|
699 |
elif isinstance(obj, set):
|
700 |
return list(obj)
|
|
|
|
|
701 |
else:
|
702 |
return [obj]
|
703 |
|
@@ -742,6 +744,8 @@ def _compute_az_el_by_camera_params(
|
|
742 |
):
|
743 |
num_view = camera_params.num_images // len(camera_params.elevation)
|
744 |
view_interval = 2 * np.pi / num_view / 2
|
|
|
|
|
745 |
azimuths = []
|
746 |
elevations = []
|
747 |
for idx, el in enumerate(camera_params.elevation):
|
@@ -758,8 +762,13 @@ def _compute_az_el_by_camera_params(
|
|
758 |
return azimuths, elevations
|
759 |
|
760 |
|
761 |
-
def init_kal_camera(
|
762 |
-
|
|
|
|
|
|
|
|
|
|
|
763 |
cam_pts = _compute_cam_pts_by_az_el(
|
764 |
azimuths, elevations, camera_params.distance
|
765 |
)
|
@@ -856,13 +865,38 @@ def get_images_from_grid(
|
|
856 |
image = Image.open(image)
|
857 |
|
858 |
view_images = np.array(image)
|
859 |
-
|
860 |
-
|
861 |
-
|
862 |
-
|
863 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
864 |
|
865 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
866 |
|
867 |
|
868 |
def post_process_texture(texture: np.ndarray, iter: int = 1) -> np.ndarray:
|
@@ -872,7 +906,14 @@ def post_process_texture(texture: np.ndarray, iter: int = 1) -> np.ndarray:
|
|
872 |
texture, d=5, sigmaColor=20, sigmaSpace=20
|
873 |
)
|
874 |
|
875 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
876 |
|
877 |
|
878 |
def quat_mult(q1, q2):
|
|
|
28 |
import nvdiffrast.torch as dr
|
29 |
import torch
|
30 |
import torch.nn.functional as F
|
31 |
+
from PIL import Image, ImageEnhance
|
32 |
|
33 |
try:
|
34 |
from kolors.models.modeling_chatglm import ChatGLMModel
|
|
|
698 |
return obj
|
699 |
elif isinstance(obj, set):
|
700 |
return list(obj)
|
701 |
+
elif obj is None:
|
702 |
+
return obj
|
703 |
else:
|
704 |
return [obj]
|
705 |
|
|
|
744 |
):
|
745 |
num_view = camera_params.num_images // len(camera_params.elevation)
|
746 |
view_interval = 2 * np.pi / num_view / 2
|
747 |
+
if num_view == 1:
|
748 |
+
view_interval = np.pi / 2
|
749 |
azimuths = []
|
750 |
elevations = []
|
751 |
for idx, el in enumerate(camera_params.elevation):
|
|
|
762 |
return azimuths, elevations
|
763 |
|
764 |
|
765 |
+
def init_kal_camera(
|
766 |
+
camera_params: CameraSetting,
|
767 |
+
flip_az: bool = False,
|
768 |
+
) -> Camera:
|
769 |
+
azimuths, elevations = _compute_az_el_by_camera_params(
|
770 |
+
camera_params, flip_az
|
771 |
+
)
|
772 |
cam_pts = _compute_cam_pts_by_az_el(
|
773 |
azimuths, elevations, camera_params.distance
|
774 |
)
|
|
|
865 |
image = Image.open(image)
|
866 |
|
867 |
view_images = np.array(image)
|
868 |
+
height, width, _ = view_images.shape
|
869 |
+
rows = height // img_size
|
870 |
+
cols = width // img_size
|
871 |
+
blocks = []
|
872 |
+
for i in range(rows):
|
873 |
+
for j in range(cols):
|
874 |
+
block = view_images[
|
875 |
+
i * img_size : (i + 1) * img_size,
|
876 |
+
j * img_size : (j + 1) * img_size,
|
877 |
+
:,
|
878 |
+
]
|
879 |
+
blocks.append(Image.fromarray(block))
|
880 |
+
|
881 |
+
return blocks
|
882 |
+
|
883 |
|
884 |
+
def enhance_image(
|
885 |
+
image: Image.Image,
|
886 |
+
contrast_factor: float = 1.3,
|
887 |
+
color_factor: float = 1.2,
|
888 |
+
brightness_factor: float = 0.95,
|
889 |
+
) -> Image.Image:
|
890 |
+
enhancer_contrast = ImageEnhance.Contrast(image)
|
891 |
+
img_contrasted = enhancer_contrast.enhance(contrast_factor)
|
892 |
+
|
893 |
+
enhancer_color = ImageEnhance.Color(img_contrasted)
|
894 |
+
img_colored = enhancer_color.enhance(color_factor)
|
895 |
+
|
896 |
+
enhancer_brightness = ImageEnhance.Brightness(img_colored)
|
897 |
+
enhanced_image = enhancer_brightness.enhance(brightness_factor)
|
898 |
+
|
899 |
+
return enhanced_image
|
900 |
|
901 |
|
902 |
def post_process_texture(texture: np.ndarray, iter: int = 1) -> np.ndarray:
|
|
|
906 |
texture, d=5, sigmaColor=20, sigmaSpace=20
|
907 |
)
|
908 |
|
909 |
+
texture = enhance_image(
|
910 |
+
image=Image.fromarray(texture),
|
911 |
+
contrast_factor=1.3,
|
912 |
+
color_factor=1.2,
|
913 |
+
brightness_factor=0.95,
|
914 |
+
)
|
915 |
+
|
916 |
+
return np.array(texture)
|
917 |
|
918 |
|
919 |
def quat_mult(q1, q2):
|
embodied_gen/models/delight_model.py
CHANGED
@@ -29,6 +29,7 @@ from diffusers import (
|
|
29 |
from huggingface_hub import snapshot_download
|
30 |
from PIL import Image
|
31 |
from embodied_gen.models.segment_model import RembgRemover
|
|
|
32 |
|
33 |
__all__ = [
|
34 |
"DelightingModel",
|
@@ -84,6 +85,7 @@ class DelightingModel(object):
|
|
84 |
|
85 |
def _lazy_init_pipeline(self):
|
86 |
if self.pipeline is None:
|
|
|
87 |
pipeline = StableDiffusionInstructPix2PixPipeline.from_pretrained(
|
88 |
self.model_path,
|
89 |
torch_dtype=torch.float16,
|
|
|
29 |
from huggingface_hub import snapshot_download
|
30 |
from PIL import Image
|
31 |
from embodied_gen.models.segment_model import RembgRemover
|
32 |
+
from embodied_gen.utils.log import logger
|
33 |
|
34 |
__all__ = [
|
35 |
"DelightingModel",
|
|
|
85 |
|
86 |
def _lazy_init_pipeline(self):
|
87 |
if self.pipeline is None:
|
88 |
+
logger.info("Loading Delighting Model...")
|
89 |
pipeline = StableDiffusionInstructPix2PixPipeline.from_pretrained(
|
90 |
self.model_path,
|
91 |
torch_dtype=torch.float16,
|
embodied_gen/models/texture_model.py
CHANGED
@@ -29,6 +29,7 @@ from kolors.pipelines.pipeline_controlnet_xl_kolors_img2img import (
|
|
29 |
)
|
30 |
from transformers import CLIPImageProcessor, CLIPVisionModelWithProjection
|
31 |
from embodied_gen.models.text_model import download_kolors_weights
|
|
|
32 |
|
33 |
__all__ = [
|
34 |
"build_texture_gen_pipe",
|
@@ -42,7 +43,7 @@ def build_texture_gen_pipe(
|
|
42 |
device: str = "cuda",
|
43 |
) -> DiffusionPipeline:
|
44 |
download_kolors_weights(f"{base_ckpt_dir}/Kolors")
|
45 |
-
|
46 |
tokenizer = ChatGLMTokenizer.from_pretrained(
|
47 |
f"{base_ckpt_dir}/Kolors/text_encoder"
|
48 |
)
|
|
|
29 |
)
|
30 |
from transformers import CLIPImageProcessor, CLIPVisionModelWithProjection
|
31 |
from embodied_gen.models.text_model import download_kolors_weights
|
32 |
+
from embodied_gen.utils.log import logger
|
33 |
|
34 |
__all__ = [
|
35 |
"build_texture_gen_pipe",
|
|
|
43 |
device: str = "cuda",
|
44 |
) -> DiffusionPipeline:
|
45 |
download_kolors_weights(f"{base_ckpt_dir}/Kolors")
|
46 |
+
logger.info(f"Load Kolors weights...")
|
47 |
tokenizer = ChatGLMTokenizer.from_pretrained(
|
48 |
f"{base_ckpt_dir}/Kolors/text_encoder"
|
49 |
)
|
embodied_gen/scripts/gen_texture.py
ADDED
@@ -0,0 +1,123 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import shutil
|
3 |
+
from dataclasses import dataclass
|
4 |
+
|
5 |
+
import tyro
|
6 |
+
from embodied_gen.data.backproject_v2 import entrypoint as backproject_api
|
7 |
+
from embodied_gen.data.differentiable_render import entrypoint as drender_api
|
8 |
+
from embodied_gen.data.utils import as_list
|
9 |
+
from embodied_gen.models.delight_model import DelightingModel
|
10 |
+
from embodied_gen.models.sr_model import ImageRealESRGAN
|
11 |
+
from embodied_gen.scripts.render_mv import (
|
12 |
+
build_texture_gen_pipe,
|
13 |
+
)
|
14 |
+
from embodied_gen.scripts.render_mv import infer_pipe as render_mv_api
|
15 |
+
from embodied_gen.utils.log import logger
|
16 |
+
|
17 |
+
|
18 |
+
@dataclass
|
19 |
+
class TextureGenConfig:
|
20 |
+
mesh_path: str | list[str]
|
21 |
+
prompt: str | list[str]
|
22 |
+
output_root: str
|
23 |
+
controlnet_cond_scale: float = 0.7
|
24 |
+
guidance_scale: float = 9
|
25 |
+
strength: float = 0.9
|
26 |
+
num_inference_steps: int = 40
|
27 |
+
delight: bool = True
|
28 |
+
seed: int = 0
|
29 |
+
base_ckpt_dir: str = "./weights"
|
30 |
+
texture_size: int = 2048
|
31 |
+
ip_adapt_scale: float = 0.0
|
32 |
+
ip_img_path: str | list[str] | None = None
|
33 |
+
|
34 |
+
|
35 |
+
def entrypoint() -> None:
|
36 |
+
cfg = tyro.cli(TextureGenConfig)
|
37 |
+
cfg.mesh_path = as_list(cfg.mesh_path)
|
38 |
+
cfg.prompt = as_list(cfg.prompt)
|
39 |
+
cfg.ip_img_path = as_list(cfg.ip_img_path)
|
40 |
+
assert len(cfg.mesh_path) == len(cfg.prompt)
|
41 |
+
|
42 |
+
# Pre-load models.
|
43 |
+
if cfg.ip_adapt_scale > 0:
|
44 |
+
PIPELINE = build_texture_gen_pipe(
|
45 |
+
base_ckpt_dir="./weights",
|
46 |
+
ip_adapt_scale=cfg.ip_adapt_scale,
|
47 |
+
device="cuda",
|
48 |
+
)
|
49 |
+
else:
|
50 |
+
PIPELINE = build_texture_gen_pipe(
|
51 |
+
base_ckpt_dir="./weights",
|
52 |
+
ip_adapt_scale=0,
|
53 |
+
device="cuda",
|
54 |
+
)
|
55 |
+
DELIGHT = None
|
56 |
+
if cfg.delight:
|
57 |
+
DELIGHT = DelightingModel()
|
58 |
+
IMAGESR_MODEL = ImageRealESRGAN(outscale=4)
|
59 |
+
|
60 |
+
for idx in range(len(cfg.mesh_path)):
|
61 |
+
mesh_path = cfg.mesh_path[idx]
|
62 |
+
prompt = cfg.prompt[idx]
|
63 |
+
uuid = os.path.splitext(os.path.basename(mesh_path))[0]
|
64 |
+
output_root = os.path.join(cfg.output_root, uuid)
|
65 |
+
drender_api(
|
66 |
+
mesh_path=mesh_path,
|
67 |
+
output_root=f"{output_root}/condition",
|
68 |
+
uuid=uuid,
|
69 |
+
)
|
70 |
+
render_mv_api(
|
71 |
+
index_file=f"{output_root}/condition/index.json",
|
72 |
+
controlnet_cond_scale=cfg.controlnet_cond_scale,
|
73 |
+
guidance_scale=cfg.guidance_scale,
|
74 |
+
strength=cfg.strength,
|
75 |
+
num_inference_steps=cfg.num_inference_steps,
|
76 |
+
ip_adapt_scale=cfg.ip_adapt_scale,
|
77 |
+
ip_img_path=(
|
78 |
+
None if cfg.ip_img_path is None else cfg.ip_img_path[idx]
|
79 |
+
),
|
80 |
+
prompt=prompt,
|
81 |
+
save_dir=f"{output_root}/multi_view",
|
82 |
+
sub_idxs=[[0, 1, 2], [3, 4, 5]],
|
83 |
+
pipeline=PIPELINE,
|
84 |
+
seed=cfg.seed,
|
85 |
+
)
|
86 |
+
textured_mesh = backproject_api(
|
87 |
+
delight_model=DELIGHT,
|
88 |
+
imagesr_model=IMAGESR_MODEL,
|
89 |
+
mesh_path=mesh_path,
|
90 |
+
color_path=f"{output_root}/multi_view/color_sample0.png",
|
91 |
+
output_path=f"{output_root}/texture_mesh/{uuid}.obj",
|
92 |
+
save_glb_path=f"{output_root}/texture_mesh/{uuid}.glb",
|
93 |
+
skip_fix_mesh=True,
|
94 |
+
delight=cfg.delight,
|
95 |
+
no_save_delight_img=True,
|
96 |
+
texture_wh=[cfg.texture_size, cfg.texture_size],
|
97 |
+
)
|
98 |
+
drender_api(
|
99 |
+
mesh_path=f"{output_root}/texture_mesh/{uuid}.obj",
|
100 |
+
output_root=f"{output_root}/texture_mesh",
|
101 |
+
uuid=uuid,
|
102 |
+
num_images=90,
|
103 |
+
elevation=[20],
|
104 |
+
with_mtl=True,
|
105 |
+
gen_color_mp4=True,
|
106 |
+
pbr_light_factor=1.2,
|
107 |
+
)
|
108 |
+
|
109 |
+
# Re-organize folders
|
110 |
+
shutil.rmtree(f"{output_root}/condition")
|
111 |
+
shutil.copy(
|
112 |
+
f"{output_root}/texture_mesh/{uuid}/color.mp4",
|
113 |
+
f"{output_root}/color.mp4",
|
114 |
+
)
|
115 |
+
shutil.rmtree(f"{output_root}/texture_mesh/{uuid}")
|
116 |
+
|
117 |
+
logger.info(
|
118 |
+
f"Successfully generate textured mesh in {output_root}/texture_mesh"
|
119 |
+
)
|
120 |
+
|
121 |
+
|
122 |
+
if __name__ == "__main__":
|
123 |
+
entrypoint()
|
embodied_gen/scripts/imageto3d.py
CHANGED
@@ -108,6 +108,9 @@ def parse_args():
|
|
108 |
default=2,
|
109 |
)
|
110 |
parser.add_argument("--disable_decompose_convex", action="store_true")
|
|
|
|
|
|
|
111 |
args, unknown = parser.parse_known_args()
|
112 |
|
113 |
return args
|
@@ -209,7 +212,17 @@ def entrypoint(**kwargs):
|
|
209 |
device="cpu",
|
210 |
)
|
211 |
color_path = os.path.join(output_root, "color.png")
|
212 |
-
render_gs_api(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
213 |
|
214 |
geo_flag, geo_result = GEO_CHECKER(
|
215 |
[color_path], text=asset_node
|
@@ -241,12 +254,14 @@ def entrypoint(**kwargs):
|
|
241 |
mesh = backproject_api(
|
242 |
delight_model=DELIGHT,
|
243 |
imagesr_model=IMAGESR_MODEL,
|
244 |
-
color_path=color_path,
|
245 |
mesh_path=mesh_obj_path,
|
246 |
output_path=mesh_obj_path,
|
247 |
skip_fix_mesh=False,
|
248 |
delight=True,
|
249 |
-
texture_wh=
|
|
|
|
|
250 |
)
|
251 |
|
252 |
mesh_glb_path = os.path.join(output_root, f"{filename}.glb")
|
|
|
108 |
default=2,
|
109 |
)
|
110 |
parser.add_argument("--disable_decompose_convex", action="store_true")
|
111 |
+
parser.add_argument(
|
112 |
+
"--texture_wh", type=int, nargs=2, default=[2048, 2048]
|
113 |
+
)
|
114 |
args, unknown = parser.parse_known_args()
|
115 |
|
116 |
return args
|
|
|
212 |
device="cpu",
|
213 |
)
|
214 |
color_path = os.path.join(output_root, "color.png")
|
215 |
+
render_gs_api(
|
216 |
+
input_gs=aligned_gs_path,
|
217 |
+
output_path=color_path,
|
218 |
+
elevation=[20, -10],
|
219 |
+
)
|
220 |
+
color_path2 = os.path.join(output_root, "color2.png")
|
221 |
+
render_gs_api(
|
222 |
+
input_gs=aligned_gs_path,
|
223 |
+
output_path=color_path2,
|
224 |
+
elevation=[60, -50],
|
225 |
+
)
|
226 |
|
227 |
geo_flag, geo_result = GEO_CHECKER(
|
228 |
[color_path], text=asset_node
|
|
|
254 |
mesh = backproject_api(
|
255 |
delight_model=DELIGHT,
|
256 |
imagesr_model=IMAGESR_MODEL,
|
257 |
+
color_path=[color_path, color_path2],
|
258 |
mesh_path=mesh_obj_path,
|
259 |
output_path=mesh_obj_path,
|
260 |
skip_fix_mesh=False,
|
261 |
delight=True,
|
262 |
+
texture_wh=args.texture_wh,
|
263 |
+
elevation=[20, -10, 60, -50],
|
264 |
+
num_images=12,
|
265 |
)
|
266 |
|
267 |
mesh_glb_path = os.path.join(output_root, f"{filename}.glb")
|
embodied_gen/scripts/render_gs.py
CHANGED
@@ -18,12 +18,11 @@
|
|
18 |
import argparse
|
19 |
import logging
|
20 |
import math
|
21 |
-
import os
|
22 |
|
23 |
import cv2
|
24 |
-
import numpy as np
|
25 |
import spaces
|
26 |
import torch
|
|
|
27 |
from tqdm import tqdm
|
28 |
from embodied_gen.data.utils import (
|
29 |
CameraSetting,
|
@@ -31,6 +30,7 @@ from embodied_gen.data.utils import (
|
|
31 |
normalize_vertices_array,
|
32 |
)
|
33 |
from embodied_gen.models.gs_model import GaussianOperator
|
|
|
34 |
|
35 |
logging.basicConfig(
|
36 |
format="%(asctime)s - %(levelname)s - %(message)s", level=logging.INFO
|
@@ -113,12 +113,11 @@ def load_gs_model(
|
|
113 |
|
114 |
|
115 |
@spaces.GPU
|
116 |
-
def entrypoint(
|
117 |
args = parse_args()
|
118 |
-
|
119 |
-
args
|
120 |
-
|
121 |
-
args.output_path = output_path
|
122 |
|
123 |
# Setup camera parameters
|
124 |
camera_params = CameraSetting(
|
@@ -129,7 +128,7 @@ def entrypoint(input_gs: str = None, output_path: str = None) -> None:
|
|
129 |
fov=math.radians(args.fov),
|
130 |
device=args.device,
|
131 |
)
|
132 |
-
camera = init_kal_camera(camera_params)
|
133 |
matrix_mv = camera.view_matrix() # (n_cam 4 4) world2cam
|
134 |
matrix_mv[:, :3, 3] = -matrix_mv[:, :3, 3]
|
135 |
w2cs = matrix_mv.to(camera_params.device)
|
@@ -153,21 +152,11 @@ def entrypoint(input_gs: str = None, output_path: str = None) -> None:
|
|
153 |
(args.image_size, args.image_size),
|
154 |
interpolation=cv2.INTER_AREA,
|
155 |
)
|
156 |
-
|
157 |
-
|
158 |
-
|
159 |
-
|
160 |
-
|
161 |
-
for row_idxs in select_idxs:
|
162 |
-
row_image = []
|
163 |
-
for row_idx in row_idxs:
|
164 |
-
row_image.append(images[row_idx])
|
165 |
-
row_image = np.concatenate(row_image, axis=1)
|
166 |
-
grid_image.append(row_image)
|
167 |
-
|
168 |
-
grid_image = np.concatenate(grid_image, axis=0)
|
169 |
-
os.makedirs(os.path.dirname(args.output_path), exist_ok=True)
|
170 |
-
cv2.imwrite(args.output_path, grid_image)
|
171 |
logger.info(f"Saved grid image to {args.output_path}")
|
172 |
|
173 |
|
|
|
18 |
import argparse
|
19 |
import logging
|
20 |
import math
|
|
|
21 |
|
22 |
import cv2
|
|
|
23 |
import spaces
|
24 |
import torch
|
25 |
+
from PIL import Image
|
26 |
from tqdm import tqdm
|
27 |
from embodied_gen.data.utils import (
|
28 |
CameraSetting,
|
|
|
30 |
normalize_vertices_array,
|
31 |
)
|
32 |
from embodied_gen.models.gs_model import GaussianOperator
|
33 |
+
from embodied_gen.utils.process_media import combine_images_to_grid
|
34 |
|
35 |
logging.basicConfig(
|
36 |
format="%(asctime)s - %(levelname)s - %(message)s", level=logging.INFO
|
|
|
113 |
|
114 |
|
115 |
@spaces.GPU
|
116 |
+
def entrypoint(**kwargs) -> None:
|
117 |
args = parse_args()
|
118 |
+
for k, v in kwargs.items():
|
119 |
+
if hasattr(args, k) and v is not None:
|
120 |
+
setattr(args, k, v)
|
|
|
121 |
|
122 |
# Setup camera parameters
|
123 |
camera_params = CameraSetting(
|
|
|
128 |
fov=math.radians(args.fov),
|
129 |
device=args.device,
|
130 |
)
|
131 |
+
camera = init_kal_camera(camera_params, flip_az=True)
|
132 |
matrix_mv = camera.view_matrix() # (n_cam 4 4) world2cam
|
133 |
matrix_mv[:, :3, 3] = -matrix_mv[:, :3, 3]
|
134 |
w2cs = matrix_mv.to(camera_params.device)
|
|
|
152 |
(args.image_size, args.image_size),
|
153 |
interpolation=cv2.INTER_AREA,
|
154 |
)
|
155 |
+
color = cv2.cvtColor(color, cv2.COLOR_BGRA2RGBA)
|
156 |
+
images.append(Image.fromarray(color))
|
157 |
+
|
158 |
+
combine_images_to_grid(images, image_mode="RGBA")[0].save(args.output_path)
|
159 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
160 |
logger.info(f"Saved grid image to {args.output_path}")
|
161 |
|
162 |
|
embodied_gen/scripts/texture_gen.sh
CHANGED
@@ -28,6 +28,7 @@ if [[ -z "$mesh_path" || -z "$prompt" || -z "$output_root" ]]; then
|
|
28 |
exit 1
|
29 |
fi
|
30 |
|
|
|
31 |
uuid=$(basename "$output_root")
|
32 |
# Step 1: drender-cli for condition rendering
|
33 |
drender-cli --mesh_path ${mesh_path} \
|
|
|
28 |
exit 1
|
29 |
fi
|
30 |
|
31 |
+
echo "Will be deprecated, recommended to use 'texture-cli' instead."
|
32 |
uuid=$(basename "$output_root")
|
33 |
# Step 1: drender-cli for condition rendering
|
34 |
drender-cli --mesh_path ${mesh_path} \
|
embodied_gen/utils/process_media.py
CHANGED
@@ -49,6 +49,7 @@ __all__ = [
|
|
49 |
"is_image_file",
|
50 |
"parse_text_prompts",
|
51 |
"check_object_edge_truncated",
|
|
|
52 |
]
|
53 |
|
54 |
|
@@ -166,6 +167,7 @@ def combine_images_to_grid(
|
|
166 |
images: list[str | Image.Image],
|
167 |
cat_row_col: tuple[int, int] = None,
|
168 |
target_wh: tuple[int, int] = (512, 512),
|
|
|
169 |
) -> list[Image.Image]:
|
170 |
n_images = len(images)
|
171 |
if n_images == 1:
|
@@ -178,13 +180,13 @@ def combine_images_to_grid(
|
|
178 |
n_row, n_col = cat_row_col
|
179 |
|
180 |
images = [
|
181 |
-
Image.open(p).convert(
|
182 |
for p in images
|
183 |
]
|
184 |
images = [img.resize(target_wh) for img in images]
|
185 |
|
186 |
grid_w, grid_h = n_col * target_wh[0], n_row * target_wh[1]
|
187 |
-
grid = Image.new(
|
188 |
|
189 |
for idx, img in enumerate(images):
|
190 |
row, col = divmod(idx, n_col)
|
@@ -435,6 +437,21 @@ def check_object_edge_truncated(
|
|
435 |
return not (top or bottom or left or right)
|
436 |
|
437 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
438 |
if __name__ == "__main__":
|
439 |
image_paths = [
|
440 |
"outputs/layouts_sim/task_0000/images/pen.png",
|
|
|
49 |
"is_image_file",
|
50 |
"parse_text_prompts",
|
51 |
"check_object_edge_truncated",
|
52 |
+
"vcat_pil_images",
|
53 |
]
|
54 |
|
55 |
|
|
|
167 |
images: list[str | Image.Image],
|
168 |
cat_row_col: tuple[int, int] = None,
|
169 |
target_wh: tuple[int, int] = (512, 512),
|
170 |
+
image_mode: str = "RGB",
|
171 |
) -> list[Image.Image]:
|
172 |
n_images = len(images)
|
173 |
if n_images == 1:
|
|
|
180 |
n_row, n_col = cat_row_col
|
181 |
|
182 |
images = [
|
183 |
+
Image.open(p).convert(image_mode) if isinstance(p, str) else p
|
184 |
for p in images
|
185 |
]
|
186 |
images = [img.resize(target_wh) for img in images]
|
187 |
|
188 |
grid_w, grid_h = n_col * target_wh[0], n_row * target_wh[1]
|
189 |
+
grid = Image.new(image_mode, (grid_w, grid_h), (0, 0, 0))
|
190 |
|
191 |
for idx, img in enumerate(images):
|
192 |
row, col = divmod(idx, n_col)
|
|
|
437 |
return not (top or bottom or left or right)
|
438 |
|
439 |
|
440 |
+
def vcat_pil_images(
|
441 |
+
images: list[Image.Image], image_mode: str = "RGB"
|
442 |
+
) -> Image.Image:
|
443 |
+
widths, heights = zip(*(img.size for img in images))
|
444 |
+
total_height = sum(heights)
|
445 |
+
max_width = max(widths)
|
446 |
+
new_image = Image.new(image_mode, (max_width, total_height))
|
447 |
+
y_offset = 0
|
448 |
+
for image in images:
|
449 |
+
new_image.paste(image, (0, y_offset))
|
450 |
+
y_offset += image.size[1]
|
451 |
+
|
452 |
+
return new_image
|
453 |
+
|
454 |
+
|
455 |
if __name__ == "__main__":
|
456 |
image_paths = [
|
457 |
"outputs/layouts_sim/task_0000/images/pen.png",
|
embodied_gen/validators/urdf_convertor.py
CHANGED
@@ -266,7 +266,7 @@ class URDFGenerator(object):
|
|
266 |
if self.decompose_convex:
|
267 |
try:
|
268 |
d_params = dict(
|
269 |
-
threshold=0.05, max_convex_hull=
|
270 |
)
|
271 |
filename = f"{os.path.splitext(obj_name)[0]}_collision.ply"
|
272 |
output_path = os.path.join(mesh_folder, filename)
|
|
|
266 |
if self.decompose_convex:
|
267 |
try:
|
268 |
d_params = dict(
|
269 |
+
threshold=0.05, max_convex_hull=100, verbose=False
|
270 |
)
|
271 |
filename = f"{os.path.splitext(obj_name)[0]}_collision.ply"
|
272 |
output_path = os.path.join(mesh_folder, filename)
|