Spaces:
Runtime error
Runtime error
Abubakar Abid
commited on
Commit
·
62a7498
1
Parent(s):
8677654
all files
Browse files- .gitignore +4 -0
- LICENSE +21 -0
- download.png +0 -0
- download_from_google.py +30 -0
- images/1.png +0 -0
- images/2.png +0 -0
- images/3.png +0 -0
- images/4.png +0 -0
- images/5.png +0 -0
- images/blm_fist.png +0 -0
- images/screenshot.png +0 -0
- model.py +204 -0
- requirements.txt +1 -0
- run-gradio.py +43 -0
- simple_example.ipynb +0 -0
- utils_model.py +174 -0
- utils_nms.py +152 -0
.gitignore
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
__pycache__/
|
2 |
+
.DS_Store
|
3 |
+
*.pyc
|
4 |
+
weights/*
|
LICENSE
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
MIT License
|
2 |
+
|
3 |
+
Copyright (c) 2019 Video Analytics Lab -- IISc
|
4 |
+
|
5 |
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6 |
+
of this software and associated documentation files (the "Software"), to deal
|
7 |
+
in the Software without restriction, including without limitation the rights
|
8 |
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9 |
+
copies of the Software, and to permit persons to whom the Software is
|
10 |
+
furnished to do so, subject to the following conditions:
|
11 |
+
|
12 |
+
The above copyright notice and this permission notice shall be included in all
|
13 |
+
copies or substantial portions of the Software.
|
14 |
+
|
15 |
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16 |
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17 |
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18 |
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19 |
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20 |
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21 |
+
SOFTWARE.
|
download.png
ADDED
![]() |
download_from_google.py
ADDED
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import requests
|
2 |
+
|
3 |
+
def download_file_from_google_drive(id, destination):
|
4 |
+
URL = "https://docs.google.com/uc?export=download"
|
5 |
+
|
6 |
+
session = requests.Session()
|
7 |
+
|
8 |
+
response = session.get(URL, params = { 'id' : id }, stream = True)
|
9 |
+
token = get_confirm_token(response)
|
10 |
+
|
11 |
+
if token:
|
12 |
+
params = { 'id' : id, 'confirm' : token }
|
13 |
+
response = session.get(URL, params = params, stream = True)
|
14 |
+
|
15 |
+
save_response_content(response, destination)
|
16 |
+
|
17 |
+
def get_confirm_token(response):
|
18 |
+
for key, value in response.cookies.items():
|
19 |
+
if key.startswith('download_warning'):
|
20 |
+
return value
|
21 |
+
|
22 |
+
return None
|
23 |
+
|
24 |
+
def save_response_content(response, destination):
|
25 |
+
CHUNK_SIZE = 32768
|
26 |
+
|
27 |
+
with open(destination, "wb") as f:
|
28 |
+
for chunk in response.iter_content(CHUNK_SIZE):
|
29 |
+
if chunk: # filter out keep-alive new chunks
|
30 |
+
f.write(chunk)
|
images/1.png
ADDED
![]() |
images/2.png
ADDED
![]() |
images/3.png
ADDED
![]() |
images/4.png
ADDED
![]() |
images/5.png
ADDED
![]() |
images/blm_fist.png
ADDED
![]() |
images/screenshot.png
ADDED
![]() |
model.py
ADDED
@@ -0,0 +1,204 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import cv2
|
2 |
+
import torch
|
3 |
+
import torch.nn as nn
|
4 |
+
import numpy as np
|
5 |
+
import logging
|
6 |
+
from utils_model import compute_boxes_and_sizes, get_upsample_output, get_box_and_dot_maps, get_boxed_img
|
7 |
+
from time import time
|
8 |
+
|
9 |
+
class LSCCNN(nn.Module):
|
10 |
+
def __init__(self, name='scale_4', checkpoint_path=None, output_downscale=2,
|
11 |
+
PRED_DOWNSCALE_FACTORS=(8, 4, 2, 1), GAMMA=(1, 1, 2, 4), NUM_BOXES_PER_SCALE=3):
|
12 |
+
|
13 |
+
super(LSCCNN, self).__init__()
|
14 |
+
self.name = name
|
15 |
+
if torch.cuda.is_available():
|
16 |
+
self.rgb_means = torch.cuda.FloatTensor([104.008, 116.669, 122.675])
|
17 |
+
else:
|
18 |
+
self.rgb_means = torch.FloatTensor([104.008, 116.669, 122.675])
|
19 |
+
self.rgb_means = torch.autograd.Variable(self.rgb_means, requires_grad=False).unsqueeze(0).unsqueeze(
|
20 |
+
2).unsqueeze(3)
|
21 |
+
|
22 |
+
self.BOXES, self.BOX_SIZE_BINS = compute_boxes_and_sizes(PRED_DOWNSCALE_FACTORS, GAMMA, NUM_BOXES_PER_SCALE)
|
23 |
+
self.output_downscale = output_downscale
|
24 |
+
|
25 |
+
in_channels = 3
|
26 |
+
self.relu = nn.ReLU(inplace=True)
|
27 |
+
self.conv1_1 = nn.Conv2d(in_channels, 64, kernel_size=3, padding=1)
|
28 |
+
self.conv1_2 = nn.Conv2d(64, 64, kernel_size=3, padding=1)
|
29 |
+
self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)
|
30 |
+
|
31 |
+
self.conv2_1 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
|
32 |
+
self.conv2_2 = nn.Conv2d(128, 128, kernel_size=3, padding=1)
|
33 |
+
self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2)
|
34 |
+
|
35 |
+
self.conv3_1 = nn.Conv2d(128, 256, kernel_size=3, padding=1)
|
36 |
+
self.conv3_2 = nn.Conv2d(256, 256, kernel_size=3, padding=1)
|
37 |
+
self.conv3_3 = nn.Conv2d(256, 256, kernel_size=3, padding=1)
|
38 |
+
self.pool3 = nn.MaxPool2d(kernel_size=2, stride=2)
|
39 |
+
|
40 |
+
self.conv4_1 = nn.Conv2d(256, 512, kernel_size=3, padding=1)
|
41 |
+
self.conv4_2 = nn.Conv2d(512, 512, kernel_size=3, padding=1)
|
42 |
+
self.conv4_3 = nn.Conv2d(512, 512, kernel_size=3, padding=1)
|
43 |
+
self.pool4 = nn.MaxPool2d(kernel_size=2, stride=2)
|
44 |
+
|
45 |
+
self.conv5_1 = nn.Conv2d(512, 512, kernel_size=3, padding=1)
|
46 |
+
self.conv5_2 = nn.Conv2d(512, 512, kernel_size=3, padding=1)
|
47 |
+
self.conv5_3 = nn.Conv2d(512, 512, kernel_size=3, padding=1)
|
48 |
+
|
49 |
+
self.convA_1 = nn.Conv2d(256, 256, kernel_size=3, padding=1)
|
50 |
+
self.convA_2 = nn.Conv2d(256, 128, kernel_size=3, padding=1)
|
51 |
+
self.convA_3 = nn.Conv2d(128, 64, kernel_size=3, padding=1)
|
52 |
+
self.convA_4 = nn.Conv2d(64, 32, kernel_size=3, padding=1)
|
53 |
+
self.convA_5 = nn.Conv2d(32, 4, kernel_size=3, padding=1)
|
54 |
+
|
55 |
+
self.convB_1 = nn.Conv2d(512, 256, kernel_size=3, padding=1)
|
56 |
+
self.convB_2 = nn.Conv2d(256, 128, kernel_size=3, padding=1)
|
57 |
+
self.convB_3 = nn.Conv2d(128, 64, kernel_size=3, padding=1)
|
58 |
+
self.convB_4 = nn.Conv2d(64, 32, kernel_size=3, padding=1)
|
59 |
+
self.convB_5 = nn.Conv2d(32, 4, kernel_size=3, padding=1)
|
60 |
+
|
61 |
+
self.convC_1 = nn.Conv2d(384, 256, kernel_size=3, padding=1)
|
62 |
+
self.convC_2 = nn.Conv2d(256, 128, kernel_size=3, padding=1)
|
63 |
+
self.convC_3 = nn.Conv2d(128, 64, kernel_size=3, padding=1)
|
64 |
+
self.convC_4 = nn.Conv2d(64, 32, kernel_size=3, padding=1)
|
65 |
+
self.convC_5 = nn.Conv2d(32, 4, kernel_size=3, padding=1)
|
66 |
+
|
67 |
+
self.convD_1 = nn.Conv2d(256, 256, kernel_size=3, padding=1)
|
68 |
+
self.convD_2 = nn.Conv2d(256, 128, kernel_size=3, padding=1)
|
69 |
+
self.convD_3 = nn.Conv2d(128, 64, kernel_size=3, padding=1)
|
70 |
+
self.convD_4 = nn.Conv2d(64, 32, kernel_size=3, padding=1)
|
71 |
+
self.convD_5 = nn.Conv2d(32, 4, kernel_size=3, padding=1)
|
72 |
+
|
73 |
+
self.conv_before_transpose_1 = nn.Conv2d(512, 256, kernel_size=3, padding=1)
|
74 |
+
self.transpose_1 = nn.ConvTranspose2d(256, 256, kernel_size=3, stride=2, padding=1, output_padding=1)
|
75 |
+
self.conv_after_transpose_1_1 = nn.Conv2d(256, 256, kernel_size=3, padding=1)
|
76 |
+
|
77 |
+
self.transpose_2 = nn.ConvTranspose2d(256, 256, kernel_size=3, stride=2, padding=1, output_padding=1)
|
78 |
+
self.conv_after_transpose_2_1 = nn.Conv2d(256, 128, kernel_size=3, padding=1)
|
79 |
+
|
80 |
+
self.transpose_3 = nn.ConvTranspose2d(256, 256, kernel_size=3, stride=4, padding=0, output_padding=1)
|
81 |
+
self.conv_after_transpose_3_1 = nn.Conv2d(256, 128, kernel_size=3, padding=1)
|
82 |
+
|
83 |
+
self.transpose_4_1_a = nn.ConvTranspose2d(256, 256, kernel_size=3, stride=4, padding=0, output_padding=1)
|
84 |
+
self.transpose_4_1_b = nn.ConvTranspose2d(256, 256, kernel_size=3, stride=2, padding=1, output_padding=1)
|
85 |
+
self.conv_after_transpose_4_1 = nn.Conv2d(256, 64, kernel_size=3, padding=1)
|
86 |
+
|
87 |
+
self.transpose_4_2 = nn.ConvTranspose2d(256, 256, kernel_size=3, stride=4, padding=0, output_padding=1)
|
88 |
+
self.conv_after_transpose_4_2 = nn.Conv2d(256, 64, kernel_size=3, padding=1)
|
89 |
+
|
90 |
+
self.transpose_4_3 = nn.ConvTranspose2d(128, 128, kernel_size=3, stride=2, padding=1, output_padding=1)
|
91 |
+
self.conv_after_transpose_4_3 = nn.Conv2d(128, 64, kernel_size=3, padding=1)
|
92 |
+
|
93 |
+
self.conv_middle_1 = nn.Conv2d(256, 512, kernel_size=3, padding=1)
|
94 |
+
self.conv_middle_2 = nn.Conv2d(512, 512, kernel_size=3, padding=1)
|
95 |
+
self.conv_middle_3 = nn.Conv2d(512, 512, kernel_size=3, padding=1)
|
96 |
+
self.conv_mid_4 = nn.Conv2d(512, 256, kernel_size=3, padding=1)
|
97 |
+
|
98 |
+
self.conv_lowest_1 = nn.Conv2d(128, 256, kernel_size=3, padding=1)
|
99 |
+
self.conv_lowest_2 = nn.Conv2d(256, 256, kernel_size=3, padding=1)
|
100 |
+
self.conv_lowest_3 = nn.Conv2d(256, 256, kernel_size=3, padding=1)
|
101 |
+
self.conv_lowest_4 = nn.Conv2d(256, 128, kernel_size=3, padding=1)
|
102 |
+
|
103 |
+
self.conv_scale1_1 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
|
104 |
+
self.conv_scale1_2 = nn.Conv2d(128, 128, kernel_size=3, padding=1)
|
105 |
+
self.conv_scale1_3 = nn.Conv2d(128, 64, kernel_size=3, padding=1)
|
106 |
+
|
107 |
+
if checkpoint_path is not None:
|
108 |
+
self.load_state_dict(torch.load(checkpoint_path))
|
109 |
+
|
110 |
+
|
111 |
+
|
112 |
+
def forward(self, x):
|
113 |
+
mean_sub_input = x
|
114 |
+
mean_sub_input -= self.rgb_means
|
115 |
+
|
116 |
+
#################### Stage 1 ##########################
|
117 |
+
|
118 |
+
main_out_block1 = self.relu(self.conv1_2(self.relu(self.conv1_1(mean_sub_input))))
|
119 |
+
main_out_pool1 = self.pool1(main_out_block1)
|
120 |
+
|
121 |
+
main_out_block2 = self.relu(self.conv2_2(self.relu(self.conv2_1(main_out_pool1))))
|
122 |
+
main_out_pool2 = self.pool2(main_out_block2)
|
123 |
+
|
124 |
+
main_out_block3 = self.relu(self.conv3_3(self.relu(self.conv3_2(self.relu(self.conv3_1(main_out_pool2))))))
|
125 |
+
main_out_pool3 = self.pool3(main_out_block3)
|
126 |
+
|
127 |
+
main_out_block4 = self.relu(self.conv4_3(self.relu(self.conv4_2(self.relu(self.conv4_1(main_out_pool3))))))
|
128 |
+
main_out_pool4 = self.pool3(main_out_block4)
|
129 |
+
|
130 |
+
main_out_block5 = self.relu(self.conv_before_transpose_1(
|
131 |
+
self.relu(self.conv5_3(self.relu(self.conv5_2(self.relu(self.conv5_1(main_out_pool4))))))))
|
132 |
+
|
133 |
+
main_out_rest = self.convA_5(self.relu(
|
134 |
+
self.convA_4(self.relu(self.convA_3(self.relu(self.convA_2(self.relu(self.convA_1(main_out_block5)))))))))
|
135 |
+
if self.name == "scale_1":
|
136 |
+
return main_out_rest
|
137 |
+
################## Stage 2 ############################
|
138 |
+
|
139 |
+
sub1_out_conv1 = self.relu(self.conv_mid_4(self.relu(
|
140 |
+
self.conv_middle_3(self.relu(self.conv_middle_2(self.relu(self.conv_middle_1(main_out_pool3))))))))
|
141 |
+
sub1_transpose = self.relu(self.transpose_1(main_out_block5))
|
142 |
+
sub1_after_transpose_1 = self.relu(self.conv_after_transpose_1_1(sub1_transpose))
|
143 |
+
|
144 |
+
sub1_concat = torch.cat((sub1_out_conv1, sub1_after_transpose_1), dim=1)
|
145 |
+
|
146 |
+
sub1_out_rest = self.convB_5(self.relu(
|
147 |
+
self.convB_4(self.relu(self.convB_3(self.relu(self.convB_2(self.relu(self.convB_1(sub1_concat)))))))))
|
148 |
+
if self.name == "scale_2":
|
149 |
+
return main_out_rest, sub1_out_rest
|
150 |
+
################# Stage 3 ############################
|
151 |
+
|
152 |
+
sub2_out_conv1 = self.relu(self.conv_lowest_4(self.relu(
|
153 |
+
self.conv_lowest_3(self.relu(self.conv_lowest_2(self.relu(self.conv_lowest_1(main_out_pool2))))))))
|
154 |
+
sub2_transpose = self.relu(self.transpose_2(sub1_out_conv1))
|
155 |
+
sub2_after_transpose_1 = self.relu(self.conv_after_transpose_2_1(sub2_transpose))
|
156 |
+
|
157 |
+
sub3_transpose = self.relu(self.transpose_3(main_out_block5))
|
158 |
+
sub3_after_transpose_1 = self.relu(self.conv_after_transpose_3_1(sub3_transpose))
|
159 |
+
|
160 |
+
sub2_concat = torch.cat((sub2_out_conv1, sub2_after_transpose_1, sub3_after_transpose_1), dim=1)
|
161 |
+
|
162 |
+
sub2_out_rest = self.convC_5(self.relu(
|
163 |
+
self.convC_4(self.relu(self.convC_3(self.relu(self.convC_2(self.relu(self.convC_1(sub2_concat)))))))))
|
164 |
+
|
165 |
+
if self.name == "scale_3":
|
166 |
+
return main_out_rest, sub1_out_rest, sub2_out_rest
|
167 |
+
################# Stage 4 ############################
|
168 |
+
sub4_out_conv1 = self.relu(
|
169 |
+
self.conv_scale1_3(self.relu(self.conv_scale1_2(self.relu(self.conv_scale1_1(main_out_pool1))))))
|
170 |
+
|
171 |
+
# TDF 1
|
172 |
+
tdf_4_1_a = self.relu(self.transpose_4_1_a(main_out_block5))
|
173 |
+
tdf_4_1_b = self.relu(self.transpose_4_1_b(tdf_4_1_a))
|
174 |
+
after_tdf_4_1 = self.relu(self.conv_after_transpose_4_1(tdf_4_1_b))
|
175 |
+
|
176 |
+
# TDF 2
|
177 |
+
tdf_4_2 = self.relu(self.transpose_4_2(sub1_out_conv1))
|
178 |
+
after_tdf_4_2 = self.relu(self.conv_after_transpose_4_2(tdf_4_2))
|
179 |
+
|
180 |
+
# TDF 3
|
181 |
+
tdf_4_3 = self.relu(self.transpose_4_3(sub2_out_conv1))
|
182 |
+
after_tdf_4_3 = self.relu(self.conv_after_transpose_4_3(tdf_4_3))
|
183 |
+
|
184 |
+
sub4_concat = torch.cat((sub4_out_conv1, after_tdf_4_1, after_tdf_4_2, after_tdf_4_3), dim=1)
|
185 |
+
sub4_out_rest = self.convD_5(self.relu(
|
186 |
+
self.convD_4(self.relu(self.convD_3(self.relu(self.convD_2(self.relu(self.convD_1(sub4_concat)))))))))
|
187 |
+
|
188 |
+
logging.info("Forward Finished")
|
189 |
+
if self.name == "scale_4":
|
190 |
+
return main_out_rest, sub1_out_rest, sub2_out_rest, sub4_out_rest
|
191 |
+
|
192 |
+
|
193 |
+
def predict_single_image(self, image, emoji, nms_thresh=0.25, thickness=2, multi_colours=True):
|
194 |
+
if image.shape[0] % 16 or image.shape[1] % 16:
|
195 |
+
image = cv2.resize(image, (image.shape[1]//16*16, image.shape[0]//16*16))
|
196 |
+
img_tensor = torch.from_numpy(image.transpose((2, 0, 1)).astype(np.float32)).unsqueeze(0)
|
197 |
+
with torch.no_grad():
|
198 |
+
out = self.forward(img_tensor.cuda())
|
199 |
+
# out = self.forward(img_tensor)
|
200 |
+
out = get_upsample_output(out, self.output_downscale)
|
201 |
+
pred_dot_map, pred_box_map = get_box_and_dot_maps(out, nms_thresh, self.BOXES)
|
202 |
+
img_out = get_boxed_img(image, emoji, pred_box_map, pred_box_map, pred_dot_map, self.output_downscale,
|
203 |
+
self.BOXES, self.BOX_SIZE_BINS, thickness=thickness, multi_colours=multi_colours)
|
204 |
+
return pred_dot_map, pred_box_map, img_out
|
requirements.txt
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
opencv-python==4.4.0.40
|
run-gradio.py
ADDED
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import cv2
|
2 |
+
from model import LSCCNN
|
3 |
+
from download_from_google import download_file_from_google_drive
|
4 |
+
import os
|
5 |
+
import gradio as gr
|
6 |
+
from time import time
|
7 |
+
if not os.path.exists("weights/weights.pth"):
|
8 |
+
os.mkdir("weights")
|
9 |
+
output = 'weights/weights.pth'
|
10 |
+
file_id = '1QbPwRXcrONMuBL_39gvnvGGsFCnNyjEm'
|
11 |
+
download_file_from_google_drive(file_id, output)
|
12 |
+
|
13 |
+
|
14 |
+
checkpoint_path = './weights/weights.pth'
|
15 |
+
network = LSCCNN(checkpoint_path=checkpoint_path)
|
16 |
+
network.cuda()
|
17 |
+
network.eval()
|
18 |
+
emoji = cv2.imread("images/blm_fist.png", -1)
|
19 |
+
|
20 |
+
|
21 |
+
def predict(img):
|
22 |
+
if img.shape[2] > 3:
|
23 |
+
img = img[:, :, :3]
|
24 |
+
pred_dot_map, pred_box_map, img_out = \
|
25 |
+
network.predict_single_image(img, emoji, nms_thresh=0.25)
|
26 |
+
return img_out
|
27 |
+
|
28 |
+
thumbnail="https://i.ibb.co/bzwSBzw/Screen-Shot-2020-08-24-at-7-05-36-AM.png"
|
29 |
+
examples=[
|
30 |
+
["images/1.png"],
|
31 |
+
["images/2.png"],
|
32 |
+
["images/3.png"],
|
33 |
+
["images/4.png"],
|
34 |
+
["images/5.png"]
|
35 |
+
]
|
36 |
+
|
37 |
+
gr.Interface(predict, "image", "image", title="BLM Photo "
|
38 |
+
"Anonymization",
|
39 |
+
description="Anonymize photos to protect BLM "
|
40 |
+
"protesters. Faces will be covered with the "
|
41 |
+
"black fist emoji. Model developed by the Stanford ML Group and LSC-CNN.", examples=examples,
|
42 |
+
thumbnail=thumbnail).launch()
|
43 |
+
|
simple_example.ipynb
ADDED
The diff for this file is too large to render.
See raw diff
|
|
utils_model.py
ADDED
@@ -0,0 +1,174 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from utils_nms import apply_nms
|
2 |
+
from PIL import Image
|
3 |
+
import cv2
|
4 |
+
import torch
|
5 |
+
import numpy as np
|
6 |
+
import copy
|
7 |
+
import logging
|
8 |
+
|
9 |
+
|
10 |
+
def compute_boxes_and_sizes(PRED_DOWNSCALE_FACTORS, GAMMA,
|
11 |
+
NUM_BOXES_PER_SCALE):
|
12 |
+
BOX_SIZE_BINS = [1]
|
13 |
+
g_idx = 0
|
14 |
+
while len(BOX_SIZE_BINS) < NUM_BOXES_PER_SCALE * len(
|
15 |
+
PRED_DOWNSCALE_FACTORS):
|
16 |
+
gamma_idx = len(BOX_SIZE_BINS) // (len(GAMMA) - 1)
|
17 |
+
box_size = BOX_SIZE_BINS[g_idx] + GAMMA[gamma_idx]
|
18 |
+
BOX_SIZE_BINS.append(box_size)
|
19 |
+
g_idx += 1
|
20 |
+
|
21 |
+
BOX_SIZE_BINS_NPY = np.array(BOX_SIZE_BINS)
|
22 |
+
BOXES = np.reshape(BOX_SIZE_BINS_NPY, (4, 3))
|
23 |
+
BOXES = BOXES[::-1]
|
24 |
+
|
25 |
+
return BOXES, BOX_SIZE_BINS
|
26 |
+
|
27 |
+
|
28 |
+
def upsample_single(input_, factor=2):
|
29 |
+
channels = input_.size(1)
|
30 |
+
indices = torch.nonzero(input_)
|
31 |
+
indices_up = indices.clone()
|
32 |
+
# Corner case!
|
33 |
+
if indices_up.size(0) == 0:
|
34 |
+
return torch.zeros(input_.size(0),input_.size(1), input_.size(2)*factor, input_.size(3)*factor).cuda()
|
35 |
+
# return torch.zeros(input_.size(0), input_.size(1),
|
36 |
+
# input_.size(2) * factor, input_.size(3) * factor)
|
37 |
+
indices_up[:, 2] *= factor
|
38 |
+
indices_up[:, 3] *= factor
|
39 |
+
|
40 |
+
output = torch.zeros(input_.size(0),input_.size(1), input_.size(2)*factor, input_.size(3)*factor).cuda()
|
41 |
+
# output = torch.zeros(input_.size(0), input_.size(1),
|
42 |
+
# input_.size(2) * factor, input_.size(3) * factor)
|
43 |
+
output[indices_up[:, 0], indices_up[:, 1], indices_up[:, 2], indices_up[:,
|
44 |
+
3]] = input_[
|
45 |
+
indices[:, 0], indices[:, 1], indices[:, 2], indices[:, 3]]
|
46 |
+
|
47 |
+
output[indices_up[:, 0], channels - 1, indices_up[:, 2] + 1, indices_up[:,
|
48 |
+
3]] = 1.0
|
49 |
+
output[indices_up[:, 0], channels - 1, indices_up[:, 2], indices_up[:,
|
50 |
+
3] + 1] = 1.0
|
51 |
+
output[indices_up[:, 0], channels - 1, indices_up[:, 2] + 1, indices_up[:,
|
52 |
+
3] + 1] = 1.0
|
53 |
+
|
54 |
+
# output_check = nn.functional.max_pool2d(output, kernel_size=2)
|
55 |
+
|
56 |
+
return output
|
57 |
+
|
58 |
+
|
59 |
+
def get_upsample_output(model_output, output_downscale):
|
60 |
+
upsample_max = int(np.log2(16 // output_downscale))
|
61 |
+
upsample_pred = []
|
62 |
+
for idx, out in enumerate(model_output):
|
63 |
+
out = torch.nn.functional.softmax(out, dim=1)
|
64 |
+
upsample_out = out
|
65 |
+
for n in range(upsample_max - idx):
|
66 |
+
upsample_out = upsample_single(upsample_out, factor=2)
|
67 |
+
upsample_pred.append(upsample_out.cpu().data.numpy().squeeze(0))
|
68 |
+
return upsample_pred
|
69 |
+
|
70 |
+
|
71 |
+
def box_NMS(predictions, nms_thresh, BOXES):
|
72 |
+
Scores = []
|
73 |
+
Boxes = []
|
74 |
+
for k in range(len(BOXES)):
|
75 |
+
scores = np.max(predictions[k], axis=0)
|
76 |
+
boxes = np.argmax(predictions[k], axis=0)
|
77 |
+
# index the boxes with BOXES to get h_map and w_map (both are the same for us)
|
78 |
+
mask = (boxes < 3) # removing Z
|
79 |
+
boxes = (boxes + 1) * mask
|
80 |
+
scores = (
|
81 |
+
scores * mask) # + 100 # added 100 since we take logsoftmax and it's negative!!
|
82 |
+
|
83 |
+
boxes = (boxes == 1) * BOXES[k][0] + (boxes == 2) * BOXES[k][1] + (
|
84 |
+
boxes == 3) * BOXES[k][2]
|
85 |
+
Scores.append(scores)
|
86 |
+
Boxes.append(boxes)
|
87 |
+
|
88 |
+
x, y, h, w, scores = apply_nms(Scores, Boxes, Boxes, 0.5,
|
89 |
+
thresh=nms_thresh)
|
90 |
+
|
91 |
+
nms_out = np.zeros((predictions[0].shape[1], predictions[0].shape[
|
92 |
+
2])) # since predictions[0] is of size 4 x H x W
|
93 |
+
box_out = np.zeros((predictions[0].shape[1], predictions[0].shape[
|
94 |
+
2])) # since predictions[0] is of size 4 x H x W
|
95 |
+
for (xx, yy, hh) in zip(x, y, h):
|
96 |
+
nms_out[yy, xx] = 1
|
97 |
+
box_out[yy, xx] = hh
|
98 |
+
|
99 |
+
assert (np.count_nonzero(nms_out) == len(x))
|
100 |
+
|
101 |
+
return nms_out, box_out
|
102 |
+
|
103 |
+
|
104 |
+
def get_box_and_dot_maps(pred, nms_thresh, BOXES):
|
105 |
+
assert (len(pred) == 4)
|
106 |
+
# NMS on the multi-scale outputs
|
107 |
+
nms_out, h = box_NMS(pred, nms_thresh, BOXES)
|
108 |
+
return nms_out, h
|
109 |
+
|
110 |
+
|
111 |
+
def get_boxed_img(image, original_emoji, h_map, w_map, gt_pred_map,
|
112 |
+
prediction_downscale, BOXES, BOX_SIZE_BINS,
|
113 |
+
thickness=1, multi_colours=False):
|
114 |
+
if image.shape[2] != 3:
|
115 |
+
boxed_img = image.astype(np.uint8).transpose((1, 2, 0)).copy()
|
116 |
+
else:
|
117 |
+
boxed_img = image.astype(np.uint8).copy()
|
118 |
+
head_idx = np.where(gt_pred_map > 0)
|
119 |
+
|
120 |
+
H, W = boxed_img.shape[:2]
|
121 |
+
|
122 |
+
Y, X = head_idx[-2], head_idx[-1]
|
123 |
+
|
124 |
+
# scale to image
|
125 |
+
enlarge_factor = max(((H * W) / (48 ** 2)) // 300, 1)
|
126 |
+
|
127 |
+
for i, (y, x) in enumerate(zip(Y, X)):
|
128 |
+
|
129 |
+
h, w = h_map[y, x] * prediction_downscale, w_map[
|
130 |
+
y, x] * prediction_downscale
|
131 |
+
scale = ((BOX_SIZE_BINS.index(h // prediction_downscale)) // 3) + 1
|
132 |
+
|
133 |
+
if enlarge_factor > 1:
|
134 |
+
h *= enlarge_factor / 2
|
135 |
+
w *= enlarge_factor / 2
|
136 |
+
|
137 |
+
expand_w = (0.2 * scale * w) // 2
|
138 |
+
expand_h = (0.2 * scale * h) // 2
|
139 |
+
|
140 |
+
y2 = min(int((prediction_downscale * x + w / 2) + expand_w), W)
|
141 |
+
y1 = max(int((prediction_downscale * x - w / 2) - expand_w), 0)
|
142 |
+
x2 = min(int((prediction_downscale * y + h / 2) + expand_h), H)
|
143 |
+
x1 = max(int((prediction_downscale * y - h / 2) - expand_h), 0)
|
144 |
+
|
145 |
+
emoji = copy.deepcopy(original_emoji)
|
146 |
+
# emoji = original_emoji.copy()
|
147 |
+
width = x2 - x1
|
148 |
+
height = y2 - y1
|
149 |
+
emoji = cv2.resize(emoji, (height, width))
|
150 |
+
# emoji = emoji.resize((width, height))
|
151 |
+
# emoji = np.array(emoji)
|
152 |
+
|
153 |
+
# https://gist.github.com/clungzta/b4bbb3e2aa0490b0cfcbc042184b0b4e
|
154 |
+
|
155 |
+
# Extract the alpha mask of the RGBA image, convert to RGB
|
156 |
+
r, g, b, a = cv2.split(emoji)
|
157 |
+
overlay_color = cv2.merge((b, g, r))
|
158 |
+
|
159 |
+
# Apply some simple filtering to remove edge noise
|
160 |
+
mask = cv2.medianBlur(a, 5)
|
161 |
+
mask[mask != 255] = 0
|
162 |
+
roi = boxed_img[x1:x2, y1:y2]
|
163 |
+
|
164 |
+
# Black-out the area behind the emoji in our original ROI
|
165 |
+
img1_bg = cv2.bitwise_and(roi.copy(), roi.copy(),
|
166 |
+
mask=cv2.bitwise_not(mask))
|
167 |
+
|
168 |
+
# Mask out the emoji from the emoji image.
|
169 |
+
img2_fg = cv2.bitwise_and(overlay_color, overlay_color, mask=mask)
|
170 |
+
|
171 |
+
# Update the original image with our new ROI
|
172 |
+
boxed_img[x1:x2, y1:y2] = cv2.add(img1_bg, img2_fg)
|
173 |
+
|
174 |
+
return boxed_img
|
utils_nms.py
ADDED
@@ -0,0 +1,152 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
apply_nms.py: Wrapper for nms.py
|
3 |
+
Authors : svp
|
4 |
+
"""
|
5 |
+
|
6 |
+
import numpy as np
|
7 |
+
|
8 |
+
'''
|
9 |
+
nms.py: CPU implementation of non maximal supression modified from Ross's code.
|
10 |
+
Authors : svp
|
11 |
+
|
12 |
+
Modified from https://github.com/rbgirshick/fast-rcnn/blob/master/lib/utils/nms.py
|
13 |
+
to accommodate a corner case which handles one box lying completely inside another.
|
14 |
+
'''
|
15 |
+
|
16 |
+
|
17 |
+
def nms(dets, thresh):
|
18 |
+
x1 = dets[:, 0]
|
19 |
+
y1 = dets[:, 1]
|
20 |
+
x2 = dets[:, 2]
|
21 |
+
y2 = dets[:, 3]
|
22 |
+
scores = dets[:, 4]
|
23 |
+
|
24 |
+
areas = (x2 - x1 + 1) * (y2 - y1 + 1)
|
25 |
+
|
26 |
+
order = scores.argsort()[::-1]
|
27 |
+
|
28 |
+
keep = []
|
29 |
+
while order.size > 0:
|
30 |
+
i = order[0]
|
31 |
+
keep.append(i)
|
32 |
+
xx1 = np.maximum(x1[i], x1[order[1:]])
|
33 |
+
yy1 = np.maximum(y1[i], y1[order[1:]])
|
34 |
+
xx2 = np.minimum(x2[i], x2[order[1:]])
|
35 |
+
yy2 = np.minimum(y2[i], y2[order[1:]])
|
36 |
+
|
37 |
+
w = np.maximum(0.0, xx2 - xx1 + 1)
|
38 |
+
h = np.maximum(0.0, yy2 - yy1 + 1)
|
39 |
+
inter = w * h
|
40 |
+
remove_index_1 = np.where(areas[i] == inter)
|
41 |
+
remove_index_2 = np.where(areas[order[1:]] == inter)
|
42 |
+
ovr = inter / (areas[i] + areas[order[1:]] - inter)
|
43 |
+
ovr[remove_index_1] = 1.0
|
44 |
+
ovr[remove_index_2] = 1.0
|
45 |
+
inds = np.where(ovr <= thresh)[0]
|
46 |
+
order = order[inds + 1]
|
47 |
+
|
48 |
+
return keep
|
49 |
+
|
50 |
+
|
51 |
+
'''
|
52 |
+
Extracts confidence map and box map from N (N=4 here)
|
53 |
+
channel input.
|
54 |
+
|
55 |
+
Parameters:
|
56 |
+
-----------
|
57 |
+
confidence_map - (list) list of confidences for N channels
|
58 |
+
hmap - (list) list of box values for N channels
|
59 |
+
|
60 |
+
Returns
|
61 |
+
-------
|
62 |
+
nms_conf_map - (HXW) single channel confidence score map
|
63 |
+
nms_conf_box - (HXW) single channel box map.
|
64 |
+
'''
|
65 |
+
|
66 |
+
|
67 |
+
def extract_conf_points(confidence_map, hmap):
|
68 |
+
nms_conf_map = np.zeros_like(confidence_map[0])
|
69 |
+
nms_conf_box = np.zeros_like(confidence_map[0])
|
70 |
+
|
71 |
+
idx_1 = np.where(np.logical_and(confidence_map[0] > 0, confidence_map[1] <= 0))
|
72 |
+
idx_2 = np.where(np.logical_and(confidence_map[0] <= 0, confidence_map[1] > 0))
|
73 |
+
idx_common = np.where(np.logical_and(confidence_map[0] > 0, confidence_map[1] > 0))
|
74 |
+
|
75 |
+
nms_conf_map[idx_1] = confidence_map[0][idx_1]
|
76 |
+
nms_conf_map[idx_2] = confidence_map[1][idx_2]
|
77 |
+
|
78 |
+
nms_conf_box[idx_1] = hmap[0][idx_1]
|
79 |
+
nms_conf_box[idx_2] = hmap[1][idx_2]
|
80 |
+
|
81 |
+
for ii in range(len(idx_common[0])):
|
82 |
+
x, y = idx_common[0][ii], idx_common[1][ii]
|
83 |
+
if confidence_map[0][x, y] > confidence_map[1][x, y]:
|
84 |
+
nms_conf_map[x, y] = confidence_map[0][x, y]
|
85 |
+
nms_conf_box[x, y] = hmap[0][x, y]
|
86 |
+
else:
|
87 |
+
nms_conf_map[x, y] = confidence_map[1][x, y]
|
88 |
+
nms_conf_box[x, y] = hmap[1][x, y]
|
89 |
+
|
90 |
+
assert (np.sum(nms_conf_map > 0) == len(idx_1[0]) + len(idx_2[0]) + len(idx_common[0]))
|
91 |
+
|
92 |
+
return nms_conf_map, nms_conf_box
|
93 |
+
|
94 |
+
|
95 |
+
'''
|
96 |
+
Wrapper function to perform NMS
|
97 |
+
|
98 |
+
Parameters:
|
99 |
+
-----------
|
100 |
+
confidence_map - (list) list of confidences for N channels
|
101 |
+
hmap - (list) list of box values for N channels
|
102 |
+
wmap - (list) list of box values for N channels
|
103 |
+
dotmap_pred_downscale -(int) prediction scale
|
104 |
+
thresh - (float) Threshold for NMS.
|
105 |
+
|
106 |
+
Returns
|
107 |
+
-------
|
108 |
+
x, y - (list) list of x-coordinates and y-coordinates to keep
|
109 |
+
after NMS.
|
110 |
+
h, w - (list) list of height and width of the corresponding x, y
|
111 |
+
points.
|
112 |
+
scores - (list) list of confidence for h and w at (x, y) point.
|
113 |
+
|
114 |
+
'''
|
115 |
+
|
116 |
+
|
117 |
+
def apply_nms(confidence_map, hmap, wmap, dotmap_pred_downscale=2, thresh=0.3):
|
118 |
+
nms_conf_map, nms_conf_box = extract_conf_points([confidence_map[0], confidence_map[1]], [hmap[0], hmap[1]])
|
119 |
+
nms_conf_map, nms_conf_box = extract_conf_points([confidence_map[2], nms_conf_map], [hmap[2], nms_conf_box])
|
120 |
+
nms_conf_map, nms_conf_box = extract_conf_points([confidence_map[3], nms_conf_map], [hmap[3], nms_conf_box])
|
121 |
+
|
122 |
+
confidence_map = nms_conf_map
|
123 |
+
hmap = nms_conf_box
|
124 |
+
wmap = nms_conf_box
|
125 |
+
|
126 |
+
confidence_map = np.squeeze(confidence_map)
|
127 |
+
hmap = np.squeeze(hmap)
|
128 |
+
wmap = np.squeeze(wmap)
|
129 |
+
|
130 |
+
dets_idx = np.where(confidence_map > 0)
|
131 |
+
|
132 |
+
y, x = dets_idx[-2], dets_idx[-1]
|
133 |
+
h, w = hmap[dets_idx], wmap[dets_idx]
|
134 |
+
x1 = x - w / 2
|
135 |
+
x2 = x + w / 2
|
136 |
+
y1 = y - h / 2
|
137 |
+
y2 = y + h / 2
|
138 |
+
scores = confidence_map[dets_idx]
|
139 |
+
|
140 |
+
dets = np.stack([np.array(x1), np.array(y1), np.array(x2), np.array(y2), np.array(scores)], axis=1)
|
141 |
+
# List of indices to keep
|
142 |
+
keep = nms(dets, thresh)
|
143 |
+
|
144 |
+
y, x = dets_idx[-2], dets_idx[-1]
|
145 |
+
h, w = hmap[dets_idx], wmap[dets_idx]
|
146 |
+
x = x[keep]
|
147 |
+
y = y[keep]
|
148 |
+
h = h[keep]
|
149 |
+
w = w[keep]
|
150 |
+
|
151 |
+
scores = scores[keep]
|
152 |
+
return x, y, h, w, scores
|