CRM

Running on Zero

App Files Files Community

YoussefAnso commited on 8 days ago

Commit

1fa688f

1 Parent(s): 37c1f6f

Refactor app.py and inference.py to streamline image generation and mesh export processes. Removed unnecessary CPU transfers and temporary file handling, directly returning generated GLB paths. Updated mesh export in CRM model to support vertex colors and improved overall efficiency in texture mapping.

Browse files

Files changed (3) hide show

app.py +4 -33
inference.py +63 -69
model/crm/model.py +3 -3

app.py CHANGED Viewed

@@ -6,7 +6,6 @@ from omegaconf import OmegaConf
 import torch
 from PIL import Image
 import PIL
-import base64
 from pipelines import TwoStagePipeline
 from huggingface_hub import hf_hub_download
 import os
@@ -94,9 +93,7 @@ def preprocess_image(image, background_choice, foreground_ratio, backgroud_color
     image = add_background(image, backgroud_color)
     return image.convert("RGB")
 @spaces.GPU
 def gen_image(input_image, seed, scale, step):
     global pipeline, model, args
     pipeline.set_seed(seed)
@@ -105,26 +102,12 @@ def gen_image(input_image, seed, scale, step):
     stage2_images = rt_dict["stage2_images"]
     np_imgs = np.concatenate(stage1_images, 1)
     np_xyzs = np.concatenate(stage2_images, 1)
     glb_path = generate3d(model, np_imgs, np_xyzs, args.device)
-    # Create a temporary file with a proper name for the GLB data
-    import tempfile
-    import shutil
-    # Create a temporary file with a proper extension
-    temp_glb = tempfile.NamedTemporaryFile(suffix='.glb', delete=False)
-    temp_glb.close()
-    # Copy the generated GLB file to our temporary file
-    shutil.copy2(glb_path, temp_glb.name)
-    # Return images and the path to the temporary GLB file
-    return Image.fromarray(np_imgs), Image.fromarray(np_xyzs), temp_glb.name
 parser = argparse.ArgumentParser()
 parser.add_argument(
     "--stage1_config",
     type=str,
@@ -137,6 +120,7 @@ parser.add_argument(
     default="configs/stage2-v2-snr.yaml",
     help="config for stage2",
 )
 parser.add_argument("--device", type=str, default="cuda")
 args = parser.parse_args()
@@ -146,19 +130,6 @@ model = CRM(specs)
 model.load_state_dict(torch.load(crm_path, map_location="cpu"), strict=False)
 model = model.to(args.device)
-# After loading or instantiating the model, ensure everything is on CPU
-model = model.cpu()
-if hasattr(model, 'rgbMlp'):
-    model.rgbMlp = model.rgbMlp.cpu()
-if hasattr(model, 'decoder'):
-    model.decoder = model.decoder.cpu()
-if hasattr(model, 'unet2'):
-    model.unet2 = model.unet2.cpu()
-    if hasattr(model.unet2, 'unet'):
-        model.unet2.unet = model.unet2.unet.cpu()
-if hasattr(model, 'lora'):
-    model.lora = model.lora.cpu()
 stage1_config = OmegaConf.load(args.stage1_config).config
 stage2_config = OmegaConf.load(args.stage2_config).config
 stage2_sampler_config = stage2_config.sampler
@@ -262,4 +233,4 @@ with gr.Blocks() as demo:
         inputs=inputs,
         outputs=outputs,
     )
-    demo.queue().launch()

 import torch
 from PIL import Image
 import PIL
 from pipelines import TwoStagePipeline
 from huggingface_hub import hf_hub_download
 import os
     image = add_background(image, backgroud_color)
     return image.convert("RGB")
 @spaces.GPU
 def gen_image(input_image, seed, scale, step):
     global pipeline, model, args
     pipeline.set_seed(seed)
     stage2_images = rt_dict["stage2_images"]
     np_imgs = np.concatenate(stage1_images, 1)
     np_xyzs = np.concatenate(stage2_images, 1)
     glb_path = generate3d(model, np_imgs, np_xyzs, args.device)
+    return Image.fromarray(np_imgs), Image.fromarray(np_xyzs), glb_path#, obj_path
 parser = argparse.ArgumentParser()
 parser.add_argument(
     "--stage1_config",
     type=str,
     default="configs/stage2-v2-snr.yaml",
     help="config for stage2",
 )
 parser.add_argument("--device", type=str, default="cuda")
 args = parser.parse_args()
 model.load_state_dict(torch.load(crm_path, map_location="cpu"), strict=False)
 model = model.to(args.device)
 stage1_config = OmegaConf.load(args.stage1_config).config
 stage2_config = OmegaConf.load(args.stage2_config).config
 stage2_sampler_config = stage2_config.sampler
         inputs=inputs,
         outputs=outputs,
     )
+    demo.queue().launch()

inference.py CHANGED Viewed

@@ -1,96 +1,90 @@
 import numpy as np
 import torch
 import time
-import tempfile
-import zipfile
 import nvdiffrast.torch as dr
-import xatlas
-import cv2
-import trimesh
 from util.utils import get_tri
 from mesh import Mesh
 from util.renderer import Renderer
-from kiui.mesh_utils import clean_mesh
 def generate3d(model, rgb, ccm, device):
     model.renderer = Renderer(tet_grid_size=model.tet_grid_size, camera_angle_num=model.camera_angle_num,
-                              scale=model.input.scale, geo_type=model.geo_type)
-    color_tri = torch.from_numpy(rgb) / 255
-    xyz_tri = torch.from_numpy(ccm[:, :, (2, 1, 0)]) / 255
-    color = color_tri.permute(2, 0, 1)
-    xyz = xyz_tri.permute(2, 0, 1)
     def get_imgs(color):
-        return torch.stack([color[:, :, 256 * i:256 * (i + 1)] for i in [5, 0, 1, 2, 3, 4]], dim=0)
-    triplane_color = get_imgs(color).permute(0, 2, 3, 1).unsqueeze(0).to(device)
     color = get_imgs(color)
     xyz = get_imgs(xyz)
-    color = get_tri(color, dim=0, blender=True, scale=1).unsqueeze(0)
-    xyz = get_tri(xyz, dim=0, blender=True, scale=1, fix=True).unsqueeze(0)
-    triplane = torch.cat([color, xyz], dim=1).to(device)
     model.eval()
-    if model.denoising:
-        tnew = torch.randint(20, 21, [triplane.shape[0]], dtype=torch.long, device=triplane.device)
-        noise_new = torch.randn_like(triplane) * 0.5 + 0.5
-        triplane = model.scheduler.add_noise(triplane, noise_new, tnew)
         with torch.no_grad():
-            triplane_feature2 = model.unet2(triplane, tnew)
     else:
-        with torch.no_grad():
-            triplane_feature2 = model.unet2(triplane)
-    data_config = {
-        'resolution': [1024, 1024],
-        "triview_color": triplane_color.to(device),
-    }
     with torch.no_grad():
         verts, faces = model.decode(data_config, triplane_feature2)
         data_config['verts'] = verts[0]
         data_config['faces'] = faces
-    verts, faces = clean_mesh(
-        data_config['verts'].squeeze().cpu().numpy().astype(np.float32),
-        data_config['faces'].squeeze().cpu().numpy().astype(np.int32),
-        repair=False, remesh=True, remesh_size=0.005, remesh_iters=1
-    )
-    data_config['verts'] = torch.from_numpy(verts).contiguous()
-    data_config['faces'] = torch.from_numpy(faces).contiguous()
-    # CPU-only UV unwrapping with xatlas
-    mesh_v = data_config['verts'].cpu().numpy()
-    mesh_f = data_config['faces'].cpu().numpy()
-    vmapping, ft, vt = xatlas.parametrize(mesh_v, mesh_f)
-    # Use per-vertex colors if available, else fallback to white
-    vertex_colors = np.ones((mesh_v.shape[0], 3), dtype=np.float32)  # fallback: white
-    # If you have per-vertex color, you can assign here, e.g.:
-    # vertex_colors = ...
-    # Bake vertex colors to texture in UV space
-    tex_res = (1024, 1024)
-    texture = np.zeros((tex_res[1], tex_res[0], 3), dtype=np.float32)
-    vt_img = (vt * np.array(tex_res)).astype(np.int32)
-    for face, uv_idx in zip(mesh_f, ft):
-        pts = vt_img[uv_idx]
-        color = vertex_colors[face].mean(axis=0)
-        cv2.fillPoly(texture, [pts], color.tolist())
-    texture = np.clip(texture, 0, 1)
-    # Create Mesh and export .glb
-    mesh = Mesh(
-        v=torch.from_numpy(mesh_v).float(),
-        f=torch.from_numpy(mesh_f).int(),
-        vt=torch.from_numpy(vt).float(),
-        ft=torch.from_numpy(ft).int(),
-        albedo=torch.from_numpy(texture).float()
-    )
-    temp_path = tempfile.NamedTemporaryFile(suffix=".glb", delete=False).name
-    mesh.write(temp_path)
-    return temp_path

 import numpy as np
 import torch
 import time
 import nvdiffrast.torch as dr
 from util.utils import get_tri
+import tempfile
 from mesh import Mesh
+import zipfile
 from util.renderer import Renderer
+import trimesh
 def generate3d(model, rgb, ccm, device):
     model.renderer = Renderer(tet_grid_size=model.tet_grid_size, camera_angle_num=model.camera_angle_num,
+                                scale=model.input.scale, geo_type = model.geo_type)
+    color_tri = torch.from_numpy(rgb)/255
+    xyz_tri = torch.from_numpy(ccm[:,:,(2,1,0)])/255
+    color = color_tri.permute(2,0,1)
+    xyz = xyz_tri.permute(2,0,1)
     def get_imgs(color):
+        # color : [C, H, W*6]
+        color_list = []
+        color_list.append(color[:,:,256*5:256*(1+5)])
+        for i in range(0,5):
+            color_list.append(color[:,:,256*i:256*(1+i)])
+        return torch.stack(color_list, dim=0)# [6, C, H, W]
+    triplane_color = get_imgs(color).permute(0,2,3,1).unsqueeze(0).to(device)# [1, 6, H, W, C]
     color = get_imgs(color)
     xyz = get_imgs(xyz)
+    color = get_tri(color, dim=0, blender= True, scale = 1).unsqueeze(0)
+    xyz = get_tri(xyz, dim=0, blender= True, scale = 1, fix= True).unsqueeze(0)
+    triplane = torch.cat([color,xyz],dim=1).to(device)
+    # 3D visualize
     model.eval()
+    if model.denoising == True:
+        tnew = 20
+        tnew = torch.randint(tnew, tnew+1, [triplane.shape[0]], dtype=torch.long, device=triplane.device)
+        noise_new = torch.randn_like(triplane) *0.5+0.5
+        triplane = model.scheduler.add_noise(triplane, noise_new, tnew)
+        start_time = time.time()
         with torch.no_grad():
+            triplane_feature2 = model.unet2(triplane,tnew)
+        end_time = time.time()
+        elapsed_time = end_time - start_time
+        print(f"unet takes {elapsed_time}s")
     else:
+        triplane_feature2 = model.unet2(triplane)
     with torch.no_grad():
+        data_config = {
+            'resolution': [1024, 1024],
+            "triview_color": triplane_color.to(device),
+        }
         verts, faces = model.decode(data_config, triplane_feature2)
         data_config['verts'] = verts[0]
         data_config['faces'] = faces
+    from kiui.mesh_utils import clean_mesh
+    verts, faces = clean_mesh(data_config['verts'].squeeze().cpu().numpy().astype(np.float32), data_config['faces'].squeeze().cpu().numpy().astype(np.int32), repair = False, remesh=True, remesh_size=0.005, remesh_iters=1)
+    data_config['verts'] = torch.from_numpy(verts).cuda().contiguous()
+    data_config['faces'] = torch.from_numpy(faces).cuda().contiguous()
+    start_time = time.time()
+    with torch.no_grad():
+        mesh_path_glb = tempfile.NamedTemporaryFile(suffix=f"", delete=False).name
+        model.export_mesh(data_config, mesh_path_glb, tri_fea_2 = triplane_feature2)
+    end_time = time.time()
+    elapsed_time = end_time - start_time
+    print(f"uv takes {elapsed_time}s")
+    # Convert .obj (with vertex colors) to .glb
+    obj_path = mesh_path_glb + ".obj"
+    glb_path = mesh_path_glb + ".glb"
+    mesh = trimesh.load(obj_path, process=False)
+    mesh.export(glb_path)
+    return glb_path

model/crm/model.py CHANGED Viewed

@@ -107,8 +107,8 @@ class CRM(nn.Module):
         # export the final mesh
         with torch.no_grad():
-            mesh = trimesh.Trimesh(vertices=verts, faces=faces, process=False) # important, process=True leads to seg fault...
-            mesh.export(f"{out_dir}.glb", file_type="glb")
     def export_mesh_wt_uv(self, ctx, data, out_dir, ind, device, res, tri_fea_2=None):
@@ -214,4 +214,4 @@ class CRM(nn.Module):
         img = img.clip(0, 255).astype(np.uint8)
         cv2.imwrite(f'{out_dir}.png', img[..., [2, 1, 0]])
-        # cv2.imwrite(f'{out_dir}/{ind}.png', img[..., [2, 1, 0]])

         # export the final mesh
         with torch.no_grad():
+            mesh = trimesh.Trimesh(verts, faces, vertex_colors=colors, process=False) # important, process=True leads to seg fault...
+            mesh.export(f'{out_dir}.obj')
     def export_mesh_wt_uv(self, ctx, data, out_dir, ind, device, res, tri_fea_2=None):
         img = img.clip(0, 255).astype(np.uint8)
         cv2.imwrite(f'{out_dir}.png', img[..., [2, 1, 0]])
+        # cv2.imwrite(f'{out_dir}/{ind}.png', img[..., [2, 1, 0]])