import gradio as gr import numpy as np from PIL import Image, ImageDraw def get_valid_mask(mask: np.ndarray): """Convert mask from gr.Image(0 to 255, RGBA) to binary mask. """ if mask.ndim == 3: mask_pil = Image.fromarray(mask).convert('L') mask = np.array(mask_pil) if mask.max() == 255: mask = mask / 255 return mask def draw_points_on_image(image, points, intr=None, extr=None, z=None, radius_scale=0.006, intr_orig=None, extr_orig=None): use_2d = (intr is None and extr is None) same_cam = (intr_orig is None and extr_orig is None) if use_2d: assert same_cam else: assert intr is not None assert extr is not None assert z is not None if intr_orig is None: assert extr_orig is None intr_orig = intr extr_orig = extr overlay_rgba = Image.new("RGBA", image.size, 0) overlay_draw = ImageDraw.Draw(overlay_rgba) for point_key, point in points.items(): t_color = (255, 100, 100) o_color = (255, 50, 50) rad_draw = int(image.size[0] * radius_scale) + 2 p_start = point["start"] p_target = point["target"] if p_start is not None and p_target is not None: if same_cam: p_draw = int(p_start[0]), int(p_start[1]) t_draw = int(p_target[0]), int(p_target[1]) # 2d if use_2d: pt = (p_target[0] - p_start[0], p_target[1] - p_start[1]) pt_norm = np.linalg.norm(pt) pt_unit = (pt[0] / pt_norm, pt[1] / pt_norm) pt_tang = (pt_unit[1], -pt_unit[0]) tt1 = (t_draw[0] + pt_tang[0] * 0.1 * pt_norm - pt_unit[0] * 0.1 * pt_norm, t_draw[1] + pt_tang[1] * 0.1 * pt_norm - pt_unit[1] * 0.1 * pt_norm) tt2 = (t_draw[0] - pt_tang[0] * 0.1 * pt_norm - pt_unit[0] * 0.1 * pt_norm, t_draw[1] - pt_tang[1] * 0.1 * pt_norm - pt_unit[1] * 0.1 * pt_norm) # 3d else: p_start_3d = np.array([p_start[0], p_start[1], 1]) p_target_3d = np.array([p_target[0], p_target[1], 1]) p_start_3d = np.dot(np.linalg.inv(intr_orig), p_start_3d) p_target_3d = np.dot(np.linalg.inv(intr_orig), p_target_3d) p_start_3d = np.dot(np.linalg.inv(extr_orig), np.concatenate([p_start_3d, [1]])) p_target_3d = np.dot(np.linalg.inv(extr_orig), np.concatenate([p_target_3d, [1]])) camera_t = np.linalg.inv(extr_orig)[:3, 3] p_start_3d = (p_start_3d[:3] - camera_t) * (z - camera_t[2]) / (p_start_3d[2] - camera_t[2]) + camera_t p_target_3d = (p_target_3d[:3] - camera_t) * (z - camera_t[2]) / (p_target_3d[2] - camera_t[2]) + camera_t pt_3d = p_target_3d - p_start_3d pt_3d_norm = np.linalg.norm(pt_3d) pt_3d_unit = pt_3d / pt_3d_norm pt_3d_tang = np.array([pt_3d_unit[1], -pt_3d_unit[0], 0]) tt1_3d = p_target_3d + pt_3d_tang * 0.02 - pt_3d_unit * 0.02 tt2_3d = p_target_3d - pt_3d_tang * 0.02 - pt_3d_unit * 0.02 tt1_3d = np.dot(extr, np.concatenate([tt1_3d, [1]]))[:3] tt2_3d = np.dot(extr, np.concatenate([tt2_3d, [1]]))[:3] tt1_3d = np.dot(intr, tt1_3d) tt2_3d = np.dot(intr, tt2_3d) tt1_3d = (tt1_3d[:2] / tt1_3d[2]).astype(int) tt2_3d = (tt2_3d[:2] / tt2_3d[2]).astype(int) tt1 = (tt1_3d[0], tt1_3d[1]) tt2 = (tt2_3d[0], tt2_3d[1]) tt1_draw = int(tt1[0]), int(tt1[1]) tt2_draw = int(tt2[0]), int(tt2[1]) if not same_cam: assert not use_2d p_proj = np.dot(intr, np.dot(extr, np.concatenate([p_start_3d, [1]]))[:3]) p_draw = (p_proj[:2] / p_proj[2]).astype(int) t_proj = np.dot(intr, np.dot(extr, np.concatenate([p_target_3d, [1]]))[:3]) t_draw = (t_proj[:2] / t_proj[2]).astype(int) overlay_draw.line( (p_draw[0], p_draw[1], t_draw[0], t_draw[1]), fill=o_color, width=4, ) overlay_draw.line( (t_draw[0], t_draw[1], tt1_draw[0], tt1_draw[1]), fill=o_color, width=4, ) overlay_draw.line( (t_draw[0], t_draw[1], tt2_draw[0], tt2_draw[1]), fill=o_color, width=4, ) if p_start is not None: if same_cam: p_draw = int(p_start[0]), int(p_start[1]) else: assert not use_2d # 3d p_start_3d = np.array([p_start[0], p_start[1], 1]) p_start_3d = np.dot(np.linalg.inv(intr_orig), p_start_3d) p_start_3d = np.dot(np.linalg.inv(extr_orig), np.concatenate([p_start_3d, [1]])) camera_t = np.linalg.inv(extr_orig)[:3, 3] p_start_3d = (p_start_3d[:3] - camera_t) * (z - camera_t[2]) / (p_start_3d[2] - camera_t[2]) + camera_t p_proj = np.dot(intr, np.dot(extr, np.concatenate([p_start_3d, [1]]))[:3]) p_draw = (p_proj[:2] / p_proj[2]).astype(int) overlay_draw.ellipse( ( p_draw[0] - rad_draw, p_draw[1] - rad_draw, p_draw[0] + rad_draw, p_draw[1] + rad_draw, ), fill=t_color, outline=o_color, width=2, ) if p_target is not None: assert p_start is not None return Image.alpha_composite(image.convert("RGBA"), overlay_rgba).convert("RGB") def draw_raw_points_on_image(image, points, radius_scale=0.002): overlay_rgba = Image.new("RGBA", image.size, 0) overlay_draw = ImageDraw.Draw(overlay_rgba) for p in range(points.shape[0]): point = points[p] t_color = (150, 150, 255) o_color = (50, 50, 255) rad_draw = int(image.size[0] * radius_scale) t_draw = int(point[0]), int(point[1]) overlay_draw.ellipse( ( t_draw[0] - rad_draw, t_draw[1] - rad_draw, t_draw[0] + rad_draw, t_draw[1] + rad_draw, ), fill=t_color, outline=o_color, ) return Image.alpha_composite(image.convert("RGBA"), overlay_rgba).convert("RGB") def draw_mask_on_image(image, mask): im_mask = np.uint8(mask * 255) im_mask_rgba = np.concatenate( ( np.tile(im_mask[..., None], [1, 1, 3]), 45 * np.ones( (im_mask.shape[0], im_mask.shape[1], 1), dtype=np.uint8), ), axis=-1, ) im_mask_rgba = Image.fromarray(im_mask_rgba).convert("RGBA") return Image.alpha_composite(image.convert("RGBA"), im_mask_rgba).convert("RGB") def on_change_single_global_state(keys, value, global_state, map_transform=None): if map_transform is not None: value = map_transform(value) curr_state = global_state if isinstance(keys, str): last_key = keys else: for k in keys[:-1]: curr_state = curr_state[k] last_key = keys[-1] curr_state[last_key] = value return global_state def get_latest_points_pair(points_dict): if not points_dict: return None point_idx = list(points_dict.keys()) latest_point_idx = max(point_idx) return latest_point_idx