Abubakar Abid commited on
Commit
62a7498
·
1 Parent(s): 8677654
.gitignore ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ __pycache__/
2
+ .DS_Store
3
+ *.pyc
4
+ weights/*
LICENSE ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MIT License
2
+
3
+ Copyright (c) 2019 Video Analytics Lab -- IISc
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
download.png ADDED
download_from_google.py ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+
3
+ def download_file_from_google_drive(id, destination):
4
+ URL = "https://docs.google.com/uc?export=download"
5
+
6
+ session = requests.Session()
7
+
8
+ response = session.get(URL, params = { 'id' : id }, stream = True)
9
+ token = get_confirm_token(response)
10
+
11
+ if token:
12
+ params = { 'id' : id, 'confirm' : token }
13
+ response = session.get(URL, params = params, stream = True)
14
+
15
+ save_response_content(response, destination)
16
+
17
+ def get_confirm_token(response):
18
+ for key, value in response.cookies.items():
19
+ if key.startswith('download_warning'):
20
+ return value
21
+
22
+ return None
23
+
24
+ def save_response_content(response, destination):
25
+ CHUNK_SIZE = 32768
26
+
27
+ with open(destination, "wb") as f:
28
+ for chunk in response.iter_content(CHUNK_SIZE):
29
+ if chunk: # filter out keep-alive new chunks
30
+ f.write(chunk)
images/1.png ADDED
images/2.png ADDED
images/3.png ADDED
images/4.png ADDED
images/5.png ADDED
images/blm_fist.png ADDED
images/screenshot.png ADDED
model.py ADDED
@@ -0,0 +1,204 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import cv2
2
+ import torch
3
+ import torch.nn as nn
4
+ import numpy as np
5
+ import logging
6
+ from utils_model import compute_boxes_and_sizes, get_upsample_output, get_box_and_dot_maps, get_boxed_img
7
+ from time import time
8
+
9
+ class LSCCNN(nn.Module):
10
+ def __init__(self, name='scale_4', checkpoint_path=None, output_downscale=2,
11
+ PRED_DOWNSCALE_FACTORS=(8, 4, 2, 1), GAMMA=(1, 1, 2, 4), NUM_BOXES_PER_SCALE=3):
12
+
13
+ super(LSCCNN, self).__init__()
14
+ self.name = name
15
+ if torch.cuda.is_available():
16
+ self.rgb_means = torch.cuda.FloatTensor([104.008, 116.669, 122.675])
17
+ else:
18
+ self.rgb_means = torch.FloatTensor([104.008, 116.669, 122.675])
19
+ self.rgb_means = torch.autograd.Variable(self.rgb_means, requires_grad=False).unsqueeze(0).unsqueeze(
20
+ 2).unsqueeze(3)
21
+
22
+ self.BOXES, self.BOX_SIZE_BINS = compute_boxes_and_sizes(PRED_DOWNSCALE_FACTORS, GAMMA, NUM_BOXES_PER_SCALE)
23
+ self.output_downscale = output_downscale
24
+
25
+ in_channels = 3
26
+ self.relu = nn.ReLU(inplace=True)
27
+ self.conv1_1 = nn.Conv2d(in_channels, 64, kernel_size=3, padding=1)
28
+ self.conv1_2 = nn.Conv2d(64, 64, kernel_size=3, padding=1)
29
+ self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)
30
+
31
+ self.conv2_1 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
32
+ self.conv2_2 = nn.Conv2d(128, 128, kernel_size=3, padding=1)
33
+ self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2)
34
+
35
+ self.conv3_1 = nn.Conv2d(128, 256, kernel_size=3, padding=1)
36
+ self.conv3_2 = nn.Conv2d(256, 256, kernel_size=3, padding=1)
37
+ self.conv3_3 = nn.Conv2d(256, 256, kernel_size=3, padding=1)
38
+ self.pool3 = nn.MaxPool2d(kernel_size=2, stride=2)
39
+
40
+ self.conv4_1 = nn.Conv2d(256, 512, kernel_size=3, padding=1)
41
+ self.conv4_2 = nn.Conv2d(512, 512, kernel_size=3, padding=1)
42
+ self.conv4_3 = nn.Conv2d(512, 512, kernel_size=3, padding=1)
43
+ self.pool4 = nn.MaxPool2d(kernel_size=2, stride=2)
44
+
45
+ self.conv5_1 = nn.Conv2d(512, 512, kernel_size=3, padding=1)
46
+ self.conv5_2 = nn.Conv2d(512, 512, kernel_size=3, padding=1)
47
+ self.conv5_3 = nn.Conv2d(512, 512, kernel_size=3, padding=1)
48
+
49
+ self.convA_1 = nn.Conv2d(256, 256, kernel_size=3, padding=1)
50
+ self.convA_2 = nn.Conv2d(256, 128, kernel_size=3, padding=1)
51
+ self.convA_3 = nn.Conv2d(128, 64, kernel_size=3, padding=1)
52
+ self.convA_4 = nn.Conv2d(64, 32, kernel_size=3, padding=1)
53
+ self.convA_5 = nn.Conv2d(32, 4, kernel_size=3, padding=1)
54
+
55
+ self.convB_1 = nn.Conv2d(512, 256, kernel_size=3, padding=1)
56
+ self.convB_2 = nn.Conv2d(256, 128, kernel_size=3, padding=1)
57
+ self.convB_3 = nn.Conv2d(128, 64, kernel_size=3, padding=1)
58
+ self.convB_4 = nn.Conv2d(64, 32, kernel_size=3, padding=1)
59
+ self.convB_5 = nn.Conv2d(32, 4, kernel_size=3, padding=1)
60
+
61
+ self.convC_1 = nn.Conv2d(384, 256, kernel_size=3, padding=1)
62
+ self.convC_2 = nn.Conv2d(256, 128, kernel_size=3, padding=1)
63
+ self.convC_3 = nn.Conv2d(128, 64, kernel_size=3, padding=1)
64
+ self.convC_4 = nn.Conv2d(64, 32, kernel_size=3, padding=1)
65
+ self.convC_5 = nn.Conv2d(32, 4, kernel_size=3, padding=1)
66
+
67
+ self.convD_1 = nn.Conv2d(256, 256, kernel_size=3, padding=1)
68
+ self.convD_2 = nn.Conv2d(256, 128, kernel_size=3, padding=1)
69
+ self.convD_3 = nn.Conv2d(128, 64, kernel_size=3, padding=1)
70
+ self.convD_4 = nn.Conv2d(64, 32, kernel_size=3, padding=1)
71
+ self.convD_5 = nn.Conv2d(32, 4, kernel_size=3, padding=1)
72
+
73
+ self.conv_before_transpose_1 = nn.Conv2d(512, 256, kernel_size=3, padding=1)
74
+ self.transpose_1 = nn.ConvTranspose2d(256, 256, kernel_size=3, stride=2, padding=1, output_padding=1)
75
+ self.conv_after_transpose_1_1 = nn.Conv2d(256, 256, kernel_size=3, padding=1)
76
+
77
+ self.transpose_2 = nn.ConvTranspose2d(256, 256, kernel_size=3, stride=2, padding=1, output_padding=1)
78
+ self.conv_after_transpose_2_1 = nn.Conv2d(256, 128, kernel_size=3, padding=1)
79
+
80
+ self.transpose_3 = nn.ConvTranspose2d(256, 256, kernel_size=3, stride=4, padding=0, output_padding=1)
81
+ self.conv_after_transpose_3_1 = nn.Conv2d(256, 128, kernel_size=3, padding=1)
82
+
83
+ self.transpose_4_1_a = nn.ConvTranspose2d(256, 256, kernel_size=3, stride=4, padding=0, output_padding=1)
84
+ self.transpose_4_1_b = nn.ConvTranspose2d(256, 256, kernel_size=3, stride=2, padding=1, output_padding=1)
85
+ self.conv_after_transpose_4_1 = nn.Conv2d(256, 64, kernel_size=3, padding=1)
86
+
87
+ self.transpose_4_2 = nn.ConvTranspose2d(256, 256, kernel_size=3, stride=4, padding=0, output_padding=1)
88
+ self.conv_after_transpose_4_2 = nn.Conv2d(256, 64, kernel_size=3, padding=1)
89
+
90
+ self.transpose_4_3 = nn.ConvTranspose2d(128, 128, kernel_size=3, stride=2, padding=1, output_padding=1)
91
+ self.conv_after_transpose_4_3 = nn.Conv2d(128, 64, kernel_size=3, padding=1)
92
+
93
+ self.conv_middle_1 = nn.Conv2d(256, 512, kernel_size=3, padding=1)
94
+ self.conv_middle_2 = nn.Conv2d(512, 512, kernel_size=3, padding=1)
95
+ self.conv_middle_3 = nn.Conv2d(512, 512, kernel_size=3, padding=1)
96
+ self.conv_mid_4 = nn.Conv2d(512, 256, kernel_size=3, padding=1)
97
+
98
+ self.conv_lowest_1 = nn.Conv2d(128, 256, kernel_size=3, padding=1)
99
+ self.conv_lowest_2 = nn.Conv2d(256, 256, kernel_size=3, padding=1)
100
+ self.conv_lowest_3 = nn.Conv2d(256, 256, kernel_size=3, padding=1)
101
+ self.conv_lowest_4 = nn.Conv2d(256, 128, kernel_size=3, padding=1)
102
+
103
+ self.conv_scale1_1 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
104
+ self.conv_scale1_2 = nn.Conv2d(128, 128, kernel_size=3, padding=1)
105
+ self.conv_scale1_3 = nn.Conv2d(128, 64, kernel_size=3, padding=1)
106
+
107
+ if checkpoint_path is not None:
108
+ self.load_state_dict(torch.load(checkpoint_path))
109
+
110
+
111
+
112
+ def forward(self, x):
113
+ mean_sub_input = x
114
+ mean_sub_input -= self.rgb_means
115
+
116
+ #################### Stage 1 ##########################
117
+
118
+ main_out_block1 = self.relu(self.conv1_2(self.relu(self.conv1_1(mean_sub_input))))
119
+ main_out_pool1 = self.pool1(main_out_block1)
120
+
121
+ main_out_block2 = self.relu(self.conv2_2(self.relu(self.conv2_1(main_out_pool1))))
122
+ main_out_pool2 = self.pool2(main_out_block2)
123
+
124
+ main_out_block3 = self.relu(self.conv3_3(self.relu(self.conv3_2(self.relu(self.conv3_1(main_out_pool2))))))
125
+ main_out_pool3 = self.pool3(main_out_block3)
126
+
127
+ main_out_block4 = self.relu(self.conv4_3(self.relu(self.conv4_2(self.relu(self.conv4_1(main_out_pool3))))))
128
+ main_out_pool4 = self.pool3(main_out_block4)
129
+
130
+ main_out_block5 = self.relu(self.conv_before_transpose_1(
131
+ self.relu(self.conv5_3(self.relu(self.conv5_2(self.relu(self.conv5_1(main_out_pool4))))))))
132
+
133
+ main_out_rest = self.convA_5(self.relu(
134
+ self.convA_4(self.relu(self.convA_3(self.relu(self.convA_2(self.relu(self.convA_1(main_out_block5)))))))))
135
+ if self.name == "scale_1":
136
+ return main_out_rest
137
+ ################## Stage 2 ############################
138
+
139
+ sub1_out_conv1 = self.relu(self.conv_mid_4(self.relu(
140
+ self.conv_middle_3(self.relu(self.conv_middle_2(self.relu(self.conv_middle_1(main_out_pool3))))))))
141
+ sub1_transpose = self.relu(self.transpose_1(main_out_block5))
142
+ sub1_after_transpose_1 = self.relu(self.conv_after_transpose_1_1(sub1_transpose))
143
+
144
+ sub1_concat = torch.cat((sub1_out_conv1, sub1_after_transpose_1), dim=1)
145
+
146
+ sub1_out_rest = self.convB_5(self.relu(
147
+ self.convB_4(self.relu(self.convB_3(self.relu(self.convB_2(self.relu(self.convB_1(sub1_concat)))))))))
148
+ if self.name == "scale_2":
149
+ return main_out_rest, sub1_out_rest
150
+ ################# Stage 3 ############################
151
+
152
+ sub2_out_conv1 = self.relu(self.conv_lowest_4(self.relu(
153
+ self.conv_lowest_3(self.relu(self.conv_lowest_2(self.relu(self.conv_lowest_1(main_out_pool2))))))))
154
+ sub2_transpose = self.relu(self.transpose_2(sub1_out_conv1))
155
+ sub2_after_transpose_1 = self.relu(self.conv_after_transpose_2_1(sub2_transpose))
156
+
157
+ sub3_transpose = self.relu(self.transpose_3(main_out_block5))
158
+ sub3_after_transpose_1 = self.relu(self.conv_after_transpose_3_1(sub3_transpose))
159
+
160
+ sub2_concat = torch.cat((sub2_out_conv1, sub2_after_transpose_1, sub3_after_transpose_1), dim=1)
161
+
162
+ sub2_out_rest = self.convC_5(self.relu(
163
+ self.convC_4(self.relu(self.convC_3(self.relu(self.convC_2(self.relu(self.convC_1(sub2_concat)))))))))
164
+
165
+ if self.name == "scale_3":
166
+ return main_out_rest, sub1_out_rest, sub2_out_rest
167
+ ################# Stage 4 ############################
168
+ sub4_out_conv1 = self.relu(
169
+ self.conv_scale1_3(self.relu(self.conv_scale1_2(self.relu(self.conv_scale1_1(main_out_pool1))))))
170
+
171
+ # TDF 1
172
+ tdf_4_1_a = self.relu(self.transpose_4_1_a(main_out_block5))
173
+ tdf_4_1_b = self.relu(self.transpose_4_1_b(tdf_4_1_a))
174
+ after_tdf_4_1 = self.relu(self.conv_after_transpose_4_1(tdf_4_1_b))
175
+
176
+ # TDF 2
177
+ tdf_4_2 = self.relu(self.transpose_4_2(sub1_out_conv1))
178
+ after_tdf_4_2 = self.relu(self.conv_after_transpose_4_2(tdf_4_2))
179
+
180
+ # TDF 3
181
+ tdf_4_3 = self.relu(self.transpose_4_3(sub2_out_conv1))
182
+ after_tdf_4_3 = self.relu(self.conv_after_transpose_4_3(tdf_4_3))
183
+
184
+ sub4_concat = torch.cat((sub4_out_conv1, after_tdf_4_1, after_tdf_4_2, after_tdf_4_3), dim=1)
185
+ sub4_out_rest = self.convD_5(self.relu(
186
+ self.convD_4(self.relu(self.convD_3(self.relu(self.convD_2(self.relu(self.convD_1(sub4_concat)))))))))
187
+
188
+ logging.info("Forward Finished")
189
+ if self.name == "scale_4":
190
+ return main_out_rest, sub1_out_rest, sub2_out_rest, sub4_out_rest
191
+
192
+
193
+ def predict_single_image(self, image, emoji, nms_thresh=0.25, thickness=2, multi_colours=True):
194
+ if image.shape[0] % 16 or image.shape[1] % 16:
195
+ image = cv2.resize(image, (image.shape[1]//16*16, image.shape[0]//16*16))
196
+ img_tensor = torch.from_numpy(image.transpose((2, 0, 1)).astype(np.float32)).unsqueeze(0)
197
+ with torch.no_grad():
198
+ out = self.forward(img_tensor.cuda())
199
+ # out = self.forward(img_tensor)
200
+ out = get_upsample_output(out, self.output_downscale)
201
+ pred_dot_map, pred_box_map = get_box_and_dot_maps(out, nms_thresh, self.BOXES)
202
+ img_out = get_boxed_img(image, emoji, pred_box_map, pred_box_map, pred_dot_map, self.output_downscale,
203
+ self.BOXES, self.BOX_SIZE_BINS, thickness=thickness, multi_colours=multi_colours)
204
+ return pred_dot_map, pred_box_map, img_out
requirements.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ opencv-python==4.4.0.40
run-gradio.py ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import cv2
2
+ from model import LSCCNN
3
+ from download_from_google import download_file_from_google_drive
4
+ import os
5
+ import gradio as gr
6
+ from time import time
7
+ if not os.path.exists("weights/weights.pth"):
8
+ os.mkdir("weights")
9
+ output = 'weights/weights.pth'
10
+ file_id = '1QbPwRXcrONMuBL_39gvnvGGsFCnNyjEm'
11
+ download_file_from_google_drive(file_id, output)
12
+
13
+
14
+ checkpoint_path = './weights/weights.pth'
15
+ network = LSCCNN(checkpoint_path=checkpoint_path)
16
+ network.cuda()
17
+ network.eval()
18
+ emoji = cv2.imread("images/blm_fist.png", -1)
19
+
20
+
21
+ def predict(img):
22
+ if img.shape[2] > 3:
23
+ img = img[:, :, :3]
24
+ pred_dot_map, pred_box_map, img_out = \
25
+ network.predict_single_image(img, emoji, nms_thresh=0.25)
26
+ return img_out
27
+
28
+ thumbnail="https://i.ibb.co/bzwSBzw/Screen-Shot-2020-08-24-at-7-05-36-AM.png"
29
+ examples=[
30
+ ["images/1.png"],
31
+ ["images/2.png"],
32
+ ["images/3.png"],
33
+ ["images/4.png"],
34
+ ["images/5.png"]
35
+ ]
36
+
37
+ gr.Interface(predict, "image", "image", title="BLM Photo "
38
+ "Anonymization",
39
+ description="Anonymize photos to protect BLM "
40
+ "protesters. Faces will be covered with the "
41
+ "black fist emoji. Model developed by the Stanford ML Group and LSC-CNN.", examples=examples,
42
+ thumbnail=thumbnail).launch()
43
+
simple_example.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
utils_model.py ADDED
@@ -0,0 +1,174 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from utils_nms import apply_nms
2
+ from PIL import Image
3
+ import cv2
4
+ import torch
5
+ import numpy as np
6
+ import copy
7
+ import logging
8
+
9
+
10
+ def compute_boxes_and_sizes(PRED_DOWNSCALE_FACTORS, GAMMA,
11
+ NUM_BOXES_PER_SCALE):
12
+ BOX_SIZE_BINS = [1]
13
+ g_idx = 0
14
+ while len(BOX_SIZE_BINS) < NUM_BOXES_PER_SCALE * len(
15
+ PRED_DOWNSCALE_FACTORS):
16
+ gamma_idx = len(BOX_SIZE_BINS) // (len(GAMMA) - 1)
17
+ box_size = BOX_SIZE_BINS[g_idx] + GAMMA[gamma_idx]
18
+ BOX_SIZE_BINS.append(box_size)
19
+ g_idx += 1
20
+
21
+ BOX_SIZE_BINS_NPY = np.array(BOX_SIZE_BINS)
22
+ BOXES = np.reshape(BOX_SIZE_BINS_NPY, (4, 3))
23
+ BOXES = BOXES[::-1]
24
+
25
+ return BOXES, BOX_SIZE_BINS
26
+
27
+
28
+ def upsample_single(input_, factor=2):
29
+ channels = input_.size(1)
30
+ indices = torch.nonzero(input_)
31
+ indices_up = indices.clone()
32
+ # Corner case!
33
+ if indices_up.size(0) == 0:
34
+ return torch.zeros(input_.size(0),input_.size(1), input_.size(2)*factor, input_.size(3)*factor).cuda()
35
+ # return torch.zeros(input_.size(0), input_.size(1),
36
+ # input_.size(2) * factor, input_.size(3) * factor)
37
+ indices_up[:, 2] *= factor
38
+ indices_up[:, 3] *= factor
39
+
40
+ output = torch.zeros(input_.size(0),input_.size(1), input_.size(2)*factor, input_.size(3)*factor).cuda()
41
+ # output = torch.zeros(input_.size(0), input_.size(1),
42
+ # input_.size(2) * factor, input_.size(3) * factor)
43
+ output[indices_up[:, 0], indices_up[:, 1], indices_up[:, 2], indices_up[:,
44
+ 3]] = input_[
45
+ indices[:, 0], indices[:, 1], indices[:, 2], indices[:, 3]]
46
+
47
+ output[indices_up[:, 0], channels - 1, indices_up[:, 2] + 1, indices_up[:,
48
+ 3]] = 1.0
49
+ output[indices_up[:, 0], channels - 1, indices_up[:, 2], indices_up[:,
50
+ 3] + 1] = 1.0
51
+ output[indices_up[:, 0], channels - 1, indices_up[:, 2] + 1, indices_up[:,
52
+ 3] + 1] = 1.0
53
+
54
+ # output_check = nn.functional.max_pool2d(output, kernel_size=2)
55
+
56
+ return output
57
+
58
+
59
+ def get_upsample_output(model_output, output_downscale):
60
+ upsample_max = int(np.log2(16 // output_downscale))
61
+ upsample_pred = []
62
+ for idx, out in enumerate(model_output):
63
+ out = torch.nn.functional.softmax(out, dim=1)
64
+ upsample_out = out
65
+ for n in range(upsample_max - idx):
66
+ upsample_out = upsample_single(upsample_out, factor=2)
67
+ upsample_pred.append(upsample_out.cpu().data.numpy().squeeze(0))
68
+ return upsample_pred
69
+
70
+
71
+ def box_NMS(predictions, nms_thresh, BOXES):
72
+ Scores = []
73
+ Boxes = []
74
+ for k in range(len(BOXES)):
75
+ scores = np.max(predictions[k], axis=0)
76
+ boxes = np.argmax(predictions[k], axis=0)
77
+ # index the boxes with BOXES to get h_map and w_map (both are the same for us)
78
+ mask = (boxes < 3) # removing Z
79
+ boxes = (boxes + 1) * mask
80
+ scores = (
81
+ scores * mask) # + 100 # added 100 since we take logsoftmax and it's negative!!
82
+
83
+ boxes = (boxes == 1) * BOXES[k][0] + (boxes == 2) * BOXES[k][1] + (
84
+ boxes == 3) * BOXES[k][2]
85
+ Scores.append(scores)
86
+ Boxes.append(boxes)
87
+
88
+ x, y, h, w, scores = apply_nms(Scores, Boxes, Boxes, 0.5,
89
+ thresh=nms_thresh)
90
+
91
+ nms_out = np.zeros((predictions[0].shape[1], predictions[0].shape[
92
+ 2])) # since predictions[0] is of size 4 x H x W
93
+ box_out = np.zeros((predictions[0].shape[1], predictions[0].shape[
94
+ 2])) # since predictions[0] is of size 4 x H x W
95
+ for (xx, yy, hh) in zip(x, y, h):
96
+ nms_out[yy, xx] = 1
97
+ box_out[yy, xx] = hh
98
+
99
+ assert (np.count_nonzero(nms_out) == len(x))
100
+
101
+ return nms_out, box_out
102
+
103
+
104
+ def get_box_and_dot_maps(pred, nms_thresh, BOXES):
105
+ assert (len(pred) == 4)
106
+ # NMS on the multi-scale outputs
107
+ nms_out, h = box_NMS(pred, nms_thresh, BOXES)
108
+ return nms_out, h
109
+
110
+
111
+ def get_boxed_img(image, original_emoji, h_map, w_map, gt_pred_map,
112
+ prediction_downscale, BOXES, BOX_SIZE_BINS,
113
+ thickness=1, multi_colours=False):
114
+ if image.shape[2] != 3:
115
+ boxed_img = image.astype(np.uint8).transpose((1, 2, 0)).copy()
116
+ else:
117
+ boxed_img = image.astype(np.uint8).copy()
118
+ head_idx = np.where(gt_pred_map > 0)
119
+
120
+ H, W = boxed_img.shape[:2]
121
+
122
+ Y, X = head_idx[-2], head_idx[-1]
123
+
124
+ # scale to image
125
+ enlarge_factor = max(((H * W) / (48 ** 2)) // 300, 1)
126
+
127
+ for i, (y, x) in enumerate(zip(Y, X)):
128
+
129
+ h, w = h_map[y, x] * prediction_downscale, w_map[
130
+ y, x] * prediction_downscale
131
+ scale = ((BOX_SIZE_BINS.index(h // prediction_downscale)) // 3) + 1
132
+
133
+ if enlarge_factor > 1:
134
+ h *= enlarge_factor / 2
135
+ w *= enlarge_factor / 2
136
+
137
+ expand_w = (0.2 * scale * w) // 2
138
+ expand_h = (0.2 * scale * h) // 2
139
+
140
+ y2 = min(int((prediction_downscale * x + w / 2) + expand_w), W)
141
+ y1 = max(int((prediction_downscale * x - w / 2) - expand_w), 0)
142
+ x2 = min(int((prediction_downscale * y + h / 2) + expand_h), H)
143
+ x1 = max(int((prediction_downscale * y - h / 2) - expand_h), 0)
144
+
145
+ emoji = copy.deepcopy(original_emoji)
146
+ # emoji = original_emoji.copy()
147
+ width = x2 - x1
148
+ height = y2 - y1
149
+ emoji = cv2.resize(emoji, (height, width))
150
+ # emoji = emoji.resize((width, height))
151
+ # emoji = np.array(emoji)
152
+
153
+ # https://gist.github.com/clungzta/b4bbb3e2aa0490b0cfcbc042184b0b4e
154
+
155
+ # Extract the alpha mask of the RGBA image, convert to RGB
156
+ r, g, b, a = cv2.split(emoji)
157
+ overlay_color = cv2.merge((b, g, r))
158
+
159
+ # Apply some simple filtering to remove edge noise
160
+ mask = cv2.medianBlur(a, 5)
161
+ mask[mask != 255] = 0
162
+ roi = boxed_img[x1:x2, y1:y2]
163
+
164
+ # Black-out the area behind the emoji in our original ROI
165
+ img1_bg = cv2.bitwise_and(roi.copy(), roi.copy(),
166
+ mask=cv2.bitwise_not(mask))
167
+
168
+ # Mask out the emoji from the emoji image.
169
+ img2_fg = cv2.bitwise_and(overlay_color, overlay_color, mask=mask)
170
+
171
+ # Update the original image with our new ROI
172
+ boxed_img[x1:x2, y1:y2] = cv2.add(img1_bg, img2_fg)
173
+
174
+ return boxed_img
utils_nms.py ADDED
@@ -0,0 +1,152 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ apply_nms.py: Wrapper for nms.py
3
+ Authors : svp
4
+ """
5
+
6
+ import numpy as np
7
+
8
+ '''
9
+ nms.py: CPU implementation of non maximal supression modified from Ross's code.
10
+ Authors : svp
11
+
12
+ Modified from https://github.com/rbgirshick/fast-rcnn/blob/master/lib/utils/nms.py
13
+ to accommodate a corner case which handles one box lying completely inside another.
14
+ '''
15
+
16
+
17
+ def nms(dets, thresh):
18
+ x1 = dets[:, 0]
19
+ y1 = dets[:, 1]
20
+ x2 = dets[:, 2]
21
+ y2 = dets[:, 3]
22
+ scores = dets[:, 4]
23
+
24
+ areas = (x2 - x1 + 1) * (y2 - y1 + 1)
25
+
26
+ order = scores.argsort()[::-1]
27
+
28
+ keep = []
29
+ while order.size > 0:
30
+ i = order[0]
31
+ keep.append(i)
32
+ xx1 = np.maximum(x1[i], x1[order[1:]])
33
+ yy1 = np.maximum(y1[i], y1[order[1:]])
34
+ xx2 = np.minimum(x2[i], x2[order[1:]])
35
+ yy2 = np.minimum(y2[i], y2[order[1:]])
36
+
37
+ w = np.maximum(0.0, xx2 - xx1 + 1)
38
+ h = np.maximum(0.0, yy2 - yy1 + 1)
39
+ inter = w * h
40
+ remove_index_1 = np.where(areas[i] == inter)
41
+ remove_index_2 = np.where(areas[order[1:]] == inter)
42
+ ovr = inter / (areas[i] + areas[order[1:]] - inter)
43
+ ovr[remove_index_1] = 1.0
44
+ ovr[remove_index_2] = 1.0
45
+ inds = np.where(ovr <= thresh)[0]
46
+ order = order[inds + 1]
47
+
48
+ return keep
49
+
50
+
51
+ '''
52
+ Extracts confidence map and box map from N (N=4 here)
53
+ channel input.
54
+
55
+ Parameters:
56
+ -----------
57
+ confidence_map - (list) list of confidences for N channels
58
+ hmap - (list) list of box values for N channels
59
+
60
+ Returns
61
+ -------
62
+ nms_conf_map - (HXW) single channel confidence score map
63
+ nms_conf_box - (HXW) single channel box map.
64
+ '''
65
+
66
+
67
+ def extract_conf_points(confidence_map, hmap):
68
+ nms_conf_map = np.zeros_like(confidence_map[0])
69
+ nms_conf_box = np.zeros_like(confidence_map[0])
70
+
71
+ idx_1 = np.where(np.logical_and(confidence_map[0] > 0, confidence_map[1] <= 0))
72
+ idx_2 = np.where(np.logical_and(confidence_map[0] <= 0, confidence_map[1] > 0))
73
+ idx_common = np.where(np.logical_and(confidence_map[0] > 0, confidence_map[1] > 0))
74
+
75
+ nms_conf_map[idx_1] = confidence_map[0][idx_1]
76
+ nms_conf_map[idx_2] = confidence_map[1][idx_2]
77
+
78
+ nms_conf_box[idx_1] = hmap[0][idx_1]
79
+ nms_conf_box[idx_2] = hmap[1][idx_2]
80
+
81
+ for ii in range(len(idx_common[0])):
82
+ x, y = idx_common[0][ii], idx_common[1][ii]
83
+ if confidence_map[0][x, y] > confidence_map[1][x, y]:
84
+ nms_conf_map[x, y] = confidence_map[0][x, y]
85
+ nms_conf_box[x, y] = hmap[0][x, y]
86
+ else:
87
+ nms_conf_map[x, y] = confidence_map[1][x, y]
88
+ nms_conf_box[x, y] = hmap[1][x, y]
89
+
90
+ assert (np.sum(nms_conf_map > 0) == len(idx_1[0]) + len(idx_2[0]) + len(idx_common[0]))
91
+
92
+ return nms_conf_map, nms_conf_box
93
+
94
+
95
+ '''
96
+ Wrapper function to perform NMS
97
+
98
+ Parameters:
99
+ -----------
100
+ confidence_map - (list) list of confidences for N channels
101
+ hmap - (list) list of box values for N channels
102
+ wmap - (list) list of box values for N channels
103
+ dotmap_pred_downscale -(int) prediction scale
104
+ thresh - (float) Threshold for NMS.
105
+
106
+ Returns
107
+ -------
108
+ x, y - (list) list of x-coordinates and y-coordinates to keep
109
+ after NMS.
110
+ h, w - (list) list of height and width of the corresponding x, y
111
+ points.
112
+ scores - (list) list of confidence for h and w at (x, y) point.
113
+
114
+ '''
115
+
116
+
117
+ def apply_nms(confidence_map, hmap, wmap, dotmap_pred_downscale=2, thresh=0.3):
118
+ nms_conf_map, nms_conf_box = extract_conf_points([confidence_map[0], confidence_map[1]], [hmap[0], hmap[1]])
119
+ nms_conf_map, nms_conf_box = extract_conf_points([confidence_map[2], nms_conf_map], [hmap[2], nms_conf_box])
120
+ nms_conf_map, nms_conf_box = extract_conf_points([confidence_map[3], nms_conf_map], [hmap[3], nms_conf_box])
121
+
122
+ confidence_map = nms_conf_map
123
+ hmap = nms_conf_box
124
+ wmap = nms_conf_box
125
+
126
+ confidence_map = np.squeeze(confidence_map)
127
+ hmap = np.squeeze(hmap)
128
+ wmap = np.squeeze(wmap)
129
+
130
+ dets_idx = np.where(confidence_map > 0)
131
+
132
+ y, x = dets_idx[-2], dets_idx[-1]
133
+ h, w = hmap[dets_idx], wmap[dets_idx]
134
+ x1 = x - w / 2
135
+ x2 = x + w / 2
136
+ y1 = y - h / 2
137
+ y2 = y + h / 2
138
+ scores = confidence_map[dets_idx]
139
+
140
+ dets = np.stack([np.array(x1), np.array(y1), np.array(x2), np.array(y2), np.array(scores)], axis=1)
141
+ # List of indices to keep
142
+ keep = nms(dets, thresh)
143
+
144
+ y, x = dets_idx[-2], dets_idx[-1]
145
+ h, w = hmap[dets_idx], wmap[dets_idx]
146
+ x = x[keep]
147
+ y = y[keep]
148
+ h = h[keep]
149
+ w = w[keep]
150
+
151
+ scores = scores[keep]
152
+ return x, y, h, w, scores