|
import argparse
|
|
import json
|
|
import os
|
|
|
|
import cv2 as cv
|
|
import numpy as np
|
|
from tqdm import tqdm
|
|
|
|
from src.camera import Camera
|
|
from src.soccerpitch import SoccerPitch
|
|
|
|
|
|
def normalization_transform(points):
|
|
"""
|
|
Computes the similarity transform such that the list of points is centered around (0,0) and that its distance to the
|
|
center is sqrt(2).
|
|
:param points: point cloud that we wish to normalize
|
|
:return: the affine transformation matrix
|
|
"""
|
|
center = np.mean(points, axis=0)
|
|
|
|
d = 0.
|
|
nelems = 0
|
|
for p in points:
|
|
nelems += 1
|
|
x = p[0] - center[0]
|
|
y = p[1] - center[1]
|
|
di = np.sqrt(x ** 2 + y ** 2)
|
|
d += (di - d) / nelems
|
|
|
|
if d <= 0.:
|
|
s = 1.
|
|
else:
|
|
s = np.sqrt(2) / d
|
|
T = np.zeros((3, 3))
|
|
T[0, 0] = s
|
|
T[0, 2] = -s * center[0]
|
|
T[1, 1] = s
|
|
T[1, 2] = -s * center[1]
|
|
T[2, 2] = 1
|
|
return T
|
|
|
|
|
|
def estimate_homography_from_line_correspondences(lines, T1=np.eye(3), T2=np.eye(3)):
|
|
"""
|
|
Given lines correspondences, computes the homography that maps best the two set of lines.
|
|
:param lines: list of pair of 2D lines matches.
|
|
:param T1: Similarity transform to normalize the elements of the source reference system
|
|
:param T2: Similarity transform to normalize the elements of the target reference system
|
|
:return: boolean to indicate success or failure of the estimation, homography
|
|
"""
|
|
homography = np.eye(3)
|
|
A = np.zeros((len(lines) * 2, 9))
|
|
|
|
for i, line_pair in enumerate(lines):
|
|
src_line = np.transpose(np.linalg.inv(T1)) @ line_pair[0]
|
|
target_line = np.transpose(np.linalg.inv(T2)) @ line_pair[1]
|
|
u = src_line[0]
|
|
v = src_line[1]
|
|
w = src_line[2]
|
|
|
|
x = target_line[0]
|
|
y = target_line[1]
|
|
z = target_line[2]
|
|
|
|
A[2 * i, 0] = 0
|
|
A[2 * i, 1] = x * w
|
|
A[2 * i, 2] = -x * v
|
|
A[2 * i, 3] = 0
|
|
A[2 * i, 4] = y * w
|
|
A[2 * i, 5] = -v * y
|
|
A[2 * i, 6] = 0
|
|
A[2 * i, 7] = z * w
|
|
A[2 * i, 8] = -v * z
|
|
|
|
A[2 * i + 1, 0] = x * w
|
|
A[2 * i + 1, 1] = 0
|
|
A[2 * i + 1, 2] = -x * u
|
|
A[2 * i + 1, 3] = y * w
|
|
A[2 * i + 1, 4] = 0
|
|
A[2 * i + 1, 5] = -u * y
|
|
A[2 * i + 1, 6] = z * w
|
|
A[2 * i + 1, 7] = 0
|
|
A[2 * i + 1, 8] = -u * z
|
|
|
|
try:
|
|
u, s, vh = np.linalg.svd(A)
|
|
except np.linalg.LinAlgError:
|
|
return False, homography
|
|
v = np.eye(3)
|
|
has_positive_singular_value = False
|
|
for i in range(s.shape[0] - 1, -2, -1):
|
|
v = np.reshape(vh[i], (3, 3))
|
|
|
|
if s[i] > 0:
|
|
has_positive_singular_value = True
|
|
break
|
|
|
|
if not has_positive_singular_value:
|
|
return False, homography
|
|
|
|
homography = np.reshape(v, (3, 3))
|
|
homography = np.linalg.inv(T2) @ homography @ T1
|
|
homography /= homography[2, 2]
|
|
|
|
return True, homography
|
|
|
|
|
|
def draw_pitch_homography(image, homography):
|
|
"""
|
|
Draws points along the soccer pitch markings elements in the image based on the homography projection.
|
|
/!\ This function assumes that the resolution of the image is 540p.
|
|
:param image
|
|
:param homography: homography that captures the relation between the world pitch plane and the image
|
|
:return: modified image
|
|
"""
|
|
field = SoccerPitch()
|
|
polylines = field.sample_field_points()
|
|
for line in polylines.values():
|
|
|
|
for point in line:
|
|
if point[2] == 0.:
|
|
hp = np.array((point[0], point[1], 1.))
|
|
projected = homography @ hp
|
|
if projected[2] == 0.:
|
|
continue
|
|
projected /= projected[2]
|
|
if 0 < projected[0] < 960 and 0 < projected[1] < 540:
|
|
cv.circle(image, (int(projected[0]), int(projected[1])), 1, (255, 0, 0), 1)
|
|
|
|
return image
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
parser = argparse.ArgumentParser(description='Baseline for camera parameters extraction')
|
|
|
|
parser.add_argument('-s', '--soccernet', default="/home/fmg/data/SN23/calibration-2023-bis/", type=str,
|
|
help='Path to the SoccerNet-V3 dataset folder')
|
|
parser.add_argument('-p', '--prediction', default="/home/fmg/results/SN23-tests/",
|
|
required=False, type=str,
|
|
help="Path to the prediction folder")
|
|
parser.add_argument('--split', required=False, type=str, default="valid", help='Select the split of data')
|
|
parser.add_argument('--resolution_width', required=False, type=int, default=960,
|
|
help='width resolution of the images')
|
|
parser.add_argument('--resolution_height', required=False, type=int, default=540,
|
|
help='height resolution of the images')
|
|
args = parser.parse_args()
|
|
|
|
field = SoccerPitch()
|
|
|
|
dataset_dir = os.path.join(args.soccernet, args.split)
|
|
if not os.path.exists(dataset_dir):
|
|
print("Invalid dataset path !")
|
|
exit(-1)
|
|
|
|
with open(os.path.join(dataset_dir, "per_match_info.json"), 'r') as f:
|
|
match_info = json.load(f)
|
|
|
|
with tqdm(enumerate(match_info.keys()), total=len(match_info.keys()), ncols=160) as t:
|
|
for i, match in t:
|
|
frame_list = match_info[match].keys()
|
|
|
|
for frame in frame_list:
|
|
frame_index = frame.split(".")[0]
|
|
prediction_file = os.path.join(args.prediction, args.split, f"extremities_{frame_index}.json")
|
|
|
|
if not os.path.exists(prediction_file):
|
|
continue
|
|
|
|
with open(prediction_file, 'r') as f:
|
|
predictions = json.load(f)
|
|
|
|
camera_predictions = dict()
|
|
image_path = os.path.join(dataset_dir, frame)
|
|
|
|
|
|
|
|
line_matches = []
|
|
potential_3d_2d_matches = {}
|
|
src_pts = []
|
|
success = False
|
|
for k, v in predictions.items():
|
|
if k == 'Circle central' or "unknown" in k:
|
|
continue
|
|
P3D1 = field.line_extremities_keys[k][0]
|
|
P3D2 = field.line_extremities_keys[k][1]
|
|
p1 = np.array([v[0]['x'] * args.resolution_width, v[0]['y'] * args.resolution_height, 1.])
|
|
p2 = np.array([v[1]['x'] * args.resolution_width, v[1]['y'] * args.resolution_height, 1.])
|
|
src_pts.extend([p1, p2])
|
|
if P3D1 in potential_3d_2d_matches.keys():
|
|
potential_3d_2d_matches[P3D1].extend([p1, p2])
|
|
else:
|
|
potential_3d_2d_matches[P3D1] = [p1, p2]
|
|
if P3D2 in potential_3d_2d_matches.keys():
|
|
potential_3d_2d_matches[P3D2].extend([p1, p2])
|
|
else:
|
|
potential_3d_2d_matches[P3D2] = [p1, p2]
|
|
|
|
start = (int(p1[0]), int(p1[1]))
|
|
end = (int(p2[0]), int(p2[1]))
|
|
|
|
|
|
line = np.cross(p1, p2)
|
|
if np.isnan(np.sum(line)) or np.isinf(np.sum(line)):
|
|
continue
|
|
line_pitch = field.get_2d_homogeneous_line(k)
|
|
if line_pitch is not None:
|
|
line_matches.append((line_pitch, line))
|
|
|
|
if len(line_matches) >= 4:
|
|
target_pts = [field.point_dict[k][:2] for k in potential_3d_2d_matches.keys()]
|
|
T1 = normalization_transform(target_pts)
|
|
T2 = normalization_transform(src_pts)
|
|
success, homography = estimate_homography_from_line_correspondences(line_matches, T1, T2)
|
|
if success:
|
|
|
|
|
|
cam = Camera(args.resolution_width, args.resolution_height)
|
|
success = cam.from_homography(homography)
|
|
if success:
|
|
point_matches = []
|
|
added_pts = set()
|
|
for k, potential_matches in potential_3d_2d_matches.items():
|
|
p3D = field.point_dict[k]
|
|
projected = cam.project_point(p3D)
|
|
|
|
if 0 < projected[0] < args.resolution_width and 0 < projected[
|
|
1] < args.resolution_height:
|
|
dist = np.zeros(len(potential_matches))
|
|
for i, potential_match in enumerate(potential_matches):
|
|
dist[i] = np.sqrt((projected[0] - potential_match[0]) ** 2 + (
|
|
projected[1] - potential_match[1]) ** 2)
|
|
selected = np.argmin(dist)
|
|
if dist[selected] < 100:
|
|
point_matches.append((p3D, potential_matches[selected][:2]))
|
|
|
|
if len(point_matches) > 3:
|
|
cam.refine_camera(point_matches)
|
|
|
|
|
|
|
|
|
|
|
|
if success:
|
|
camera_predictions = cam.to_json_parameters()
|
|
|
|
task2_prediction_file = os.path.join(args.prediction, args.split, f"camera_{frame_index}.json")
|
|
if camera_predictions:
|
|
with open(task2_prediction_file, "w") as f:
|
|
json.dump(camera_predictions, f, indent=4)
|
|
|