Spaces:
Running
on
Zero
Running
on
Zero
# Copyright (c) Meta Platforms, Inc. and affiliates. | |
# All rights reserved. | |
# This source code is licensed under the license found in the | |
# LICENSE file in the root directory of this source tree. | |
import torch | |
import numpy as np | |
def get_3d_sincos_pos_embed(embed_dim, grid_size, cls_token=False, extra_tokens=0): | |
""" | |
grid_size: int of the grid height and width | |
return: | |
pos_embed: [grid_size*grid_size, embed_dim] or [1+grid_size*grid_size, embed_dim] (w/ or w/o cls_token) | |
""" | |
if isinstance(grid_size, tuple): | |
grid_size_h, grid_size_w = grid_size | |
else: | |
grid_size_h = grid_size_w = grid_size | |
grid_h = np.arange(grid_size_h, dtype=np.float32) | |
grid_w = np.arange(grid_size_w, dtype=np.float32) | |
grid = np.meshgrid(grid_w, grid_h) # here w goes first | |
grid = np.stack(grid, axis=0) | |
grid = grid.reshape([2, 1, grid_size_h, grid_size_w]) | |
pos_embed = get_3d_sincos_pos_embed_from_grid(embed_dim, grid) | |
if cls_token and extra_tokens > 0: | |
pos_embed = np.concatenate( | |
[np.zeros([extra_tokens, embed_dim]), pos_embed], axis=0 | |
) | |
return pos_embed | |
def get_3d_sincos_pos_embed_from_grid(embed_dim, grid): | |
assert embed_dim % 3 == 0 | |
# use half of dimensions to encode grid_h | |
B, S, N, _ = grid.shape | |
gridx = grid[..., 0].view(B*S*N).detach().cpu().numpy() | |
gridy = grid[..., 1].view(B*S*N).detach().cpu().numpy() | |
gridz = grid[..., 2].view(B*S*N).detach().cpu().numpy() | |
emb_h = get_1d_sincos_pos_embed_from_grid(embed_dim // 3, gridx) # (N, D/3) | |
emb_w = get_1d_sincos_pos_embed_from_grid(embed_dim // 3, gridy) # (N, D/3) | |
emb_z = get_1d_sincos_pos_embed_from_grid(embed_dim // 3, gridz) # (N, D/3) | |
emb = np.concatenate([emb_h, emb_w, emb_z], axis=1) # (N, D) | |
emb = torch.from_numpy(emb).to(grid.device) | |
return emb.view(B, S, N, embed_dim) | |
def get_2d_sincos_pos_embed(embed_dim, grid_size, cls_token=False, extra_tokens=0): | |
""" | |
grid_size: int of the grid height and width | |
return: | |
pos_embed: [grid_size*grid_size, embed_dim] or [1+grid_size*grid_size, embed_dim] (w/ or w/o cls_token) | |
""" | |
if isinstance(grid_size, tuple): | |
grid_size_h, grid_size_w = grid_size | |
else: | |
grid_size_h = grid_size_w = grid_size | |
grid_h = np.arange(grid_size_h, dtype=np.float32) | |
grid_w = np.arange(grid_size_w, dtype=np.float32) | |
grid = np.meshgrid(grid_w, grid_h) # here w goes first | |
grid = np.stack(grid, axis=0) | |
grid = grid.reshape([2, 1, grid_size_h, grid_size_w]) | |
pos_embed = get_2d_sincos_pos_embed_from_grid(embed_dim, grid) | |
if cls_token and extra_tokens > 0: | |
pos_embed = np.concatenate( | |
[np.zeros([extra_tokens, embed_dim]), pos_embed], axis=0 | |
) | |
return pos_embed | |
def get_2d_sincos_pos_embed_from_grid(embed_dim, grid): | |
assert embed_dim % 2 == 0 | |
# use half of dimensions to encode grid_h | |
emb_h = get_1d_sincos_pos_embed_from_grid(embed_dim // 2, grid[0]) # (H*W, D/2) | |
emb_w = get_1d_sincos_pos_embed_from_grid(embed_dim // 2, grid[1]) # (H*W, D/2) | |
emb = np.concatenate([emb_h, emb_w], axis=1) # (H*W, D) | |
return emb | |
def get_1d_sincos_pos_embed_from_grid(embed_dim, pos): | |
""" | |
embed_dim: output dimension for each position | |
pos: a list of positions to be encoded: size (M,) | |
out: (M, D) | |
""" | |
assert embed_dim % 2 == 0 | |
omega = np.arange(embed_dim // 2, dtype=np.float64) | |
omega /= embed_dim / 2.0 | |
omega = 1.0 / 10000 ** omega # (D/2,) | |
pos = pos.reshape(-1) # (M,) | |
out = np.einsum("m,d->md", pos, omega) # (M, D/2), outer product | |
emb_sin = np.sin(out) # (M, D/2) | |
emb_cos = np.cos(out) # (M, D/2) | |
emb = np.concatenate([emb_sin, emb_cos], axis=1) # (M, D) | |
return emb | |
def get_2d_embedding(xy, C, cat_coords=True): | |
B, N, D = xy.shape | |
assert D == 2 | |
x = xy[:, :, 0:1] | |
y = xy[:, :, 1:2] | |
div_term = ( | |
torch.arange(0, C, 2, device=xy.device, dtype=torch.float32) * (1000.0 / C) | |
).reshape(1, 1, int(C / 2)) | |
pe_x = torch.zeros(B, N, C, device=xy.device, dtype=torch.float32) | |
pe_y = torch.zeros(B, N, C, device=xy.device, dtype=torch.float32) | |
pe_x[:, :, 0::2] = torch.sin(x * div_term) | |
pe_x[:, :, 1::2] = torch.cos(x * div_term) | |
pe_y[:, :, 0::2] = torch.sin(y * div_term) | |
pe_y[:, :, 1::2] = torch.cos(y * div_term) | |
pe = torch.cat([pe_x, pe_y], dim=2) # B, N, C*3 | |
if cat_coords: | |
pe = torch.cat([xy, pe], dim=2) # B, N, C*3+3 | |
return pe | |
def get_3d_embedding(xyz, C, cat_coords=True): | |
B, N, D = xyz.shape | |
assert D == 3 | |
x = xyz[:, :, 0:1] | |
y = xyz[:, :, 1:2] | |
z = xyz[:, :, 2:3] | |
div_term = ( | |
torch.arange(0, C, 2, device=xyz.device, dtype=torch.float32) * (1000.0 / C) | |
).reshape(1, 1, int(C / 2)) | |
pe_x = torch.zeros(B, N, C, device=xyz.device, dtype=torch.float32) | |
pe_y = torch.zeros(B, N, C, device=xyz.device, dtype=torch.float32) | |
pe_z = torch.zeros(B, N, C, device=xyz.device, dtype=torch.float32) | |
pe_x[:, :, 0::2] = torch.sin(x * div_term) | |
pe_x[:, :, 1::2] = torch.cos(x * div_term) | |
pe_y[:, :, 0::2] = torch.sin(y * div_term) | |
pe_y[:, :, 1::2] = torch.cos(y * div_term) | |
pe_z[:, :, 0::2] = torch.sin(z * div_term) | |
pe_z[:, :, 1::2] = torch.cos(z * div_term) | |
pe = torch.cat([pe_x, pe_y, pe_z], dim=2) # B, N, C*3 | |
if cat_coords: | |
pe = torch.cat([pe, xyz], dim=2) # B, N, C*3+3 | |
return pe | |
def get_4d_embedding(xyzw, C, cat_coords=True): | |
B, N, D = xyzw.shape | |
assert D == 4 | |
x = xyzw[:, :, 0:1] | |
y = xyzw[:, :, 1:2] | |
z = xyzw[:, :, 2:3] | |
w = xyzw[:, :, 3:4] | |
div_term = ( | |
torch.arange(0, C, 2, device=xyzw.device, dtype=torch.float32) * (1000.0 / C) | |
).reshape(1, 1, int(C / 2)) | |
pe_x = torch.zeros(B, N, C, device=xyzw.device, dtype=torch.float32) | |
pe_y = torch.zeros(B, N, C, device=xyzw.device, dtype=torch.float32) | |
pe_z = torch.zeros(B, N, C, device=xyzw.device, dtype=torch.float32) | |
pe_w = torch.zeros(B, N, C, device=xyzw.device, dtype=torch.float32) | |
pe_x[:, :, 0::2] = torch.sin(x * div_term) | |
pe_x[:, :, 1::2] = torch.cos(x * div_term) | |
pe_y[:, :, 0::2] = torch.sin(y * div_term) | |
pe_y[:, :, 1::2] = torch.cos(y * div_term) | |
pe_z[:, :, 0::2] = torch.sin(z * div_term) | |
pe_z[:, :, 1::2] = torch.cos(z * div_term) | |
pe_w[:, :, 0::2] = torch.sin(w * div_term) | |
pe_w[:, :, 1::2] = torch.cos(w * div_term) | |
pe = torch.cat([pe_x, pe_y, pe_z, pe_w], dim=2) # B, N, C*3 | |
if cat_coords: | |
pe = torch.cat([pe, xyzw], dim=2) # B, N, C*3+3 | |
return pe | |
import torch.nn as nn | |
class Embedder_Fourier(nn.Module): | |
def __init__(self, input_dim, max_freq_log2, N_freqs, | |
log_sampling=True, include_input=True, | |
periodic_fns=(torch.sin, torch.cos)): | |
''' | |
:param input_dim: dimension of input to be embedded | |
:param max_freq_log2: log2 of max freq; min freq is 1 by default | |
:param N_freqs: number of frequency bands | |
:param log_sampling: if True, frequency bands are linerly sampled in log-space | |
:param include_input: if True, raw input is included in the embedding | |
:param periodic_fns: periodic functions used to embed input | |
''' | |
super(Embedder_Fourier, self).__init__() | |
self.input_dim = input_dim | |
self.include_input = include_input | |
self.periodic_fns = periodic_fns | |
self.out_dim = 0 | |
if self.include_input: | |
self.out_dim += self.input_dim | |
self.out_dim += self.input_dim * N_freqs * len(self.periodic_fns) | |
if log_sampling: | |
self.freq_bands = 2. ** torch.linspace(0., max_freq_log2, N_freqs) | |
else: | |
self.freq_bands = torch.linspace( | |
2. ** 0., 2. ** max_freq_log2, N_freqs) | |
self.freq_bands = self.freq_bands.numpy().tolist() | |
def forward(self, | |
input: torch.Tensor, | |
rescale: float = 1.0): | |
''' | |
:param input: tensor of shape [..., self.input_dim] | |
:return: tensor of shape [..., self.out_dim] | |
''' | |
assert (input.shape[-1] == self.input_dim) | |
out = [] | |
if self.include_input: | |
out.append(input/rescale) | |
for i in range(len(self.freq_bands)): | |
freq = self.freq_bands[i] | |
for p_fn in self.periodic_fns: | |
out.append(p_fn(input * freq)) | |
out = torch.cat(out, dim=-1) | |
assert (out.shape[-1] == self.out_dim) | |
return out |