|
|
|
|
|
import numpy as np
|
|
import torch
|
|
import torch.nn as nn
|
|
from einops import rearrange
|
|
from PIL import Image
|
|
|
|
|
|
norm_layer = nn.InstanceNorm2d
|
|
|
|
def convert_to_torch(image):
|
|
if isinstance(image, Image.Image):
|
|
image = torch.from_numpy(np.array(image)).float()
|
|
elif isinstance(image, torch.Tensor):
|
|
image = image.clone()
|
|
elif isinstance(image, np.ndarray):
|
|
image = torch.from_numpy(image.copy()).float()
|
|
else:
|
|
raise f'Unsurpport datatype{type(image)}, only surpport np.ndarray, torch.Tensor, Pillow Image.'
|
|
return image
|
|
|
|
class ResidualBlock(nn.Module):
|
|
def __init__(self, in_features):
|
|
super(ResidualBlock, self).__init__()
|
|
|
|
conv_block = [
|
|
nn.ReflectionPad2d(1),
|
|
nn.Conv2d(in_features, in_features, 3),
|
|
norm_layer(in_features),
|
|
nn.ReLU(inplace=True),
|
|
nn.ReflectionPad2d(1),
|
|
nn.Conv2d(in_features, in_features, 3),
|
|
norm_layer(in_features)
|
|
]
|
|
|
|
self.conv_block = nn.Sequential(*conv_block)
|
|
|
|
def forward(self, x):
|
|
return x + self.conv_block(x)
|
|
|
|
|
|
class ContourInference(nn.Module):
|
|
def __init__(self, input_nc, output_nc, n_residual_blocks=9, sigmoid=True):
|
|
super(ContourInference, self).__init__()
|
|
|
|
|
|
model0 = [
|
|
nn.ReflectionPad2d(3),
|
|
nn.Conv2d(input_nc, 64, 7),
|
|
norm_layer(64),
|
|
nn.ReLU(inplace=True)
|
|
]
|
|
self.model0 = nn.Sequential(*model0)
|
|
|
|
|
|
model1 = []
|
|
in_features = 64
|
|
out_features = in_features * 2
|
|
for _ in range(2):
|
|
model1 += [
|
|
nn.Conv2d(in_features, out_features, 3, stride=2, padding=1),
|
|
norm_layer(out_features),
|
|
nn.ReLU(inplace=True)
|
|
]
|
|
in_features = out_features
|
|
out_features = in_features * 2
|
|
self.model1 = nn.Sequential(*model1)
|
|
|
|
model2 = []
|
|
|
|
for _ in range(n_residual_blocks):
|
|
model2 += [ResidualBlock(in_features)]
|
|
self.model2 = nn.Sequential(*model2)
|
|
|
|
|
|
model3 = []
|
|
out_features = in_features // 2
|
|
for _ in range(2):
|
|
model3 += [
|
|
nn.ConvTranspose2d(in_features,
|
|
out_features,
|
|
3,
|
|
stride=2,
|
|
padding=1,
|
|
output_padding=1),
|
|
norm_layer(out_features),
|
|
nn.ReLU(inplace=True)
|
|
]
|
|
in_features = out_features
|
|
out_features = in_features // 2
|
|
self.model3 = nn.Sequential(*model3)
|
|
|
|
|
|
model4 = [nn.ReflectionPad2d(3), nn.Conv2d(64, output_nc, 7)]
|
|
if sigmoid:
|
|
model4 += [nn.Sigmoid()]
|
|
|
|
self.model4 = nn.Sequential(*model4)
|
|
|
|
def forward(self, x, cond=None):
|
|
out = self.model0(x)
|
|
out = self.model1(out)
|
|
out = self.model2(out)
|
|
out = self.model3(out)
|
|
out = self.model4(out)
|
|
|
|
return out
|
|
|
|
|
|
class ScribbleAnnotator:
|
|
def __init__(self, cfg, device=None):
|
|
input_nc = cfg.get('INPUT_NC', 3)
|
|
output_nc = cfg.get('OUTPUT_NC', 1)
|
|
n_residual_blocks = cfg.get('N_RESIDUAL_BLOCKS', 3)
|
|
sigmoid = cfg.get('SIGMOID', True)
|
|
pretrained_model = cfg['PRETRAINED_MODEL']
|
|
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") if device is None else device
|
|
self.model = ContourInference(input_nc, output_nc, n_residual_blocks,
|
|
sigmoid)
|
|
self.model.load_state_dict(torch.load(pretrained_model, weights_only=True))
|
|
self.model = self.model.eval().requires_grad_(False).to(self.device)
|
|
|
|
@torch.no_grad()
|
|
@torch.inference_mode()
|
|
@torch.autocast('cuda', enabled=False)
|
|
def forward(self, image):
|
|
is_batch = False if len(image.shape) == 3 else True
|
|
image = convert_to_torch(image)
|
|
if len(image.shape) == 3:
|
|
image = rearrange(image, 'h w c -> 1 c h w')
|
|
image = image.float().div(255).to(self.device)
|
|
contour_map = self.model(image)
|
|
contour_map = (contour_map.squeeze(dim=1) * 255.0).clip(
|
|
0, 255).cpu().numpy().astype(np.uint8)
|
|
contour_map = contour_map[..., None].repeat(3, -1)
|
|
if not is_batch:
|
|
contour_map = contour_map.squeeze()
|
|
return contour_map
|
|
|
|
|
|
class ScribbleVideoAnnotator(ScribbleAnnotator):
|
|
def forward(self, frames):
|
|
ret_frames = []
|
|
for frame in frames:
|
|
anno_frame = super().forward(np.array(frame))
|
|
ret_frames.append(anno_frame)
|
|
return ret_frames |