|
|
|
import cv2 |
|
import torch |
|
import shapely |
|
import numpy as np |
|
from matching import viz2d |
|
import matplotlib.pyplot as plt |
|
import torchvision.transforms as tfm |
|
from sklearn.linear_model import LogisticRegression |
|
from torchvision.transforms.functional import InterpolationMode |
|
from pathlib import Path |
|
|
|
def path_to_footprint(path): |
|
"""Given the path of a prediction, get its footprint (i.e. 4 corners) lat and lon""" |
|
_, min_lat, min_lon, _, _, max_lat, max_lon = str(path).split("@")[:7] |
|
min_lat, min_lon, max_lat, max_lon = float(min_lat), float(min_lon), float(max_lat), float(max_lon) |
|
pred_footprint = np.array([min_lat, min_lon, max_lat, min_lon, max_lat, max_lon, min_lat, max_lon]) |
|
pred_footprint = pred_footprint.reshape(4, 2) |
|
pred_footprint = torch.tensor(pred_footprint) |
|
return pred_footprint |
|
|
|
|
|
def apply_homography_to_corners(width, height, fm): |
|
""" |
|
Transform the four corners of an image of given width and height using the provided homography matrix. |
|
:param width: Width of the image. |
|
:param height: Height of the image. |
|
:param fm: Homography fundamental matrix. |
|
:return: New pixel coordinates of the four corners after homography. |
|
""" |
|
|
|
corners = np.array([ |
|
[0, 0], |
|
[width, 0], |
|
[width, height], |
|
[0, height] |
|
], dtype='float32') |
|
|
|
corners = np.array([corners]) |
|
corners = np.reshape(corners, (4, 1, 2)) |
|
|
|
transformed_corners = cv2.perspectiveTransform(corners, fm) |
|
return torch.tensor(transformed_corners).type(torch.int)[:, 0] |
|
|
|
|
|
def compute_matching(image0, image1, matcher, save_images=False, viz_params=None): |
|
|
|
|
|
output_dictionairy= matcher(image0, image1) |
|
|
|
num_inliers = output_dictionairy['num_inliers'] |
|
|
|
|
|
|
|
|
|
fm = output_dictionairy["H"] |
|
|
|
|
|
mkpts0 = output_dictionairy['inlier_kpts0'] |
|
mkpts1 = output_dictionairy['inlier_kpts1'] |
|
|
|
|
|
if num_inliers == 0: |
|
|
|
return num_inliers, fm |
|
|
|
if num_inliers > 0: |
|
if fm is None or not (isinstance(fm, np.ndarray) and fm.shape == (3, 3)): |
|
|
|
print("number of inliers is not zero but the homography is None") |
|
return 0, None |
|
|
|
|
|
if save_images: |
|
path0 = viz_params["query_path"] |
|
path1 = viz_params["pred_path"] |
|
output_dir = viz_params["output_dir"] |
|
output_file_suffix = viz_params["output_file_suffix"] |
|
stem0, stem1 = path0.stem, path1.stem |
|
matches_path = output_dir / f'{stem0}_{stem1}_matches_{output_file_suffix}.torch' |
|
viz_path = output_dir / f'{stem0}_{stem1}_matches_{output_file_suffix}.jpg' |
|
output_dir.mkdir(exist_ok=True) |
|
viz2d.plot_images([image0, image1]) |
|
viz2d.plot_matches(mkpts0, mkpts1, color="lime", lw=0.2) |
|
viz2d.add_text(0, f'{len(mkpts1)} matches', fs=20) |
|
viz2d.save_plot(viz_path) |
|
plt.close() |
|
torch.save((num_inliers, fm, mkpts0, mkpts1), matches_path) |
|
|
|
return num_inliers, fm |
|
|
|
|
|
def footprint_to_minmax_latlon(footprint): |
|
lats = footprint[:, 0] |
|
lons = footprint[:, 1] |
|
min_lat = lats.min() |
|
max_lat = lats.max() |
|
min_lon = lons.min() |
|
max_lon = lons.max() |
|
return min_lat, min_lon, max_lat, max_lon |
|
|
|
|
|
def get_lat_lon_per_pixel(footprint, HW): |
|
"""Return the change in lat lon per each pixel""" |
|
min_lat, min_lon, max_lat, max_lon = footprint_to_minmax_latlon(footprint) |
|
lat_per_pixel = (max_lat - min_lat) / HW |
|
lon_per_pixel = (max_lon - min_lon) / HW |
|
return lat_per_pixel, lon_per_pixel |
|
|
|
|
|
def apply_homography_to_footprint(pred_footprint, transformed_corners, HW): |
|
|
|
center = pred_footprint.mean(0) |
|
diff = pred_footprint.max(0)[0] - pred_footprint.min(0)[0] |
|
diff *= 1.5 |
|
min_lat, min_lon = center - diff |
|
max_lat, max_lon = center + diff |
|
surrounding_pred_footprint = np.array([min_lat, min_lon, max_lat, min_lon, max_lat, max_lon, min_lat, max_lon]).reshape(4, 2) |
|
|
|
lat_per_pixel, lon_per_pixel = get_lat_lon_per_pixel(surrounding_pred_footprint, HW) |
|
min_lat, min_lon, max_lat, max_lon = footprint_to_minmax_latlon(surrounding_pred_footprint) |
|
|
|
px_lats = transformed_corners[:, 1] |
|
px_lons = transformed_corners[:, 0] |
|
|
|
ul_lat = max_lat - (px_lats[0] * lat_per_pixel) |
|
ul_lon = min_lon + (px_lons[0] * lon_per_pixel) |
|
|
|
ur_lat = max_lat - (px_lats[1] * lat_per_pixel) |
|
ur_lon = min_lon + (px_lons[1] * lon_per_pixel) |
|
|
|
ll_lat = max_lat - (px_lats[2] * lat_per_pixel) |
|
ll_lon = min_lon + (px_lons[2] * lon_per_pixel) |
|
|
|
lr_lat = max_lat - (px_lats[3] * lat_per_pixel) |
|
lr_lon = min_lon + (px_lons[3] * lon_per_pixel) |
|
|
|
warped_pred_footprint = torch.tensor([ |
|
[ul_lat, ul_lon], [ur_lat, ur_lon], [ll_lat, ll_lon], [lr_lat, lr_lon] |
|
]) |
|
return warped_pred_footprint |
|
|
|
|
|
def add_homographies_fm(fm1, fm2): |
|
return np.linalg.inv(np.linalg.inv(fm2) @ np.linalg.inv(fm1)) |
|
|
|
|
|
def compute_threshold(true_matches, false_matches, thresh=0.999): |
|
assert isinstance(true_matches, list) |
|
assert isinstance(false_matches, list) |
|
if (len(true_matches) < 4): |
|
return 4 |
|
|
|
X_r = np.array(true_matches).reshape(-1, 1) |
|
X_w = np.array(false_matches).reshape(-1, 1) |
|
X = np.concatenate((X_r, X_w)) |
|
Y_r = np.ones(len(true_matches), dtype=int) |
|
Y_w = np.zeros(len(false_matches), dtype=int) |
|
Y = np.concatenate((Y_r, Y_w)) |
|
lr = LogisticRegression() |
|
lr.fit(X, Y) |
|
f_y = - np.log((1-thresh)/thresh) |
|
match_thresh = (f_y - lr.intercept_)/lr.coef_ |
|
return match_thresh.item() |
|
|
|
|
|
def estimate_footprint( |
|
fm, query_image, surrounding_image, matcher, pred_footprint, HW, |
|
save_images=False, viz_params=None |
|
): |
|
"""Estimate the footprint of the query given a prediction/candidate. |
|
This is equivalent of a single iteration of EarthMatch. |
|
|
|
Parameters |
|
---------- |
|
fm : np.array of shape (3, 3), the fundamental matrix from previous iteration (None if first iteration). |
|
query_image : torch.tensor of shape (3, H, W) with the query image. |
|
surrounding_image : torch.tensor of shape (3, H, W) with the surrounding image. |
|
matcher : a matcher from the image-matching-models. |
|
pred_footprint : torch.tensor of shape (4, 2) with the prediction's footprint. |
|
HW : int, the height and width of the images. |
|
save_images : bool, if true then save matching's visualizations. |
|
viz_params : parameters used for visualizations if save_images is True |
|
|
|
Returns |
|
------- |
|
num_inliers : int, number of inliers from the matching. |
|
fm : np.array of shape (3, 3), the new fundamental matrix mapping query to prediction. |
|
warped_pred_footprint : torch.tensor of shape (4, 2) with the pred_footprint after applying fm's homography. |
|
pretty_printed_footprint : str, a pretty printed warped_pred_footprint. |
|
|
|
If the prediction is deemed invalid, return (-1, None, None, None). |
|
""" |
|
assert surrounding_image.shape[1] == surrounding_image.shape[2], f"{surrounding_image.shape}" |
|
|
|
if fm is not None: |
|
|
|
transformed_corners = apply_homography_to_corners(HW, HW, fm) + HW |
|
endpoints = [[HW, HW], [HW*2, HW], [HW*2, HW*2], [HW, HW*2]] |
|
warped_surrounding_pred_img = tfm.functional.perspective( |
|
surrounding_image, transformed_corners.numpy(), endpoints, InterpolationMode.BILINEAR |
|
) |
|
else: |
|
warped_surrounding_pred_img = surrounding_image |
|
|
|
assert tuple(warped_surrounding_pred_img.shape) == (3, HW*3, HW*3) |
|
warped_pred_img = warped_surrounding_pred_img[:, HW:HW*2, HW:HW*2] |
|
|
|
if save_images: |
|
tfm.ToPILImage()(warped_pred_img).save(viz_params["pred_path"]) |
|
|
|
num_inliers, new_fm = compute_matching( |
|
image0=query_image, image1=warped_pred_img, |
|
matcher=matcher, save_images=save_images, |
|
viz_params=viz_params |
|
) |
|
|
|
if num_inliers == 0: |
|
|
|
return -1, None, None, None |
|
|
|
if fm is None: |
|
fm = new_fm |
|
else: |
|
fm = add_homographies_fm(fm, new_fm) |
|
|
|
transformed_corners = apply_homography_to_corners(HW, HW, fm) + HW |
|
|
|
pred_polygon = shapely.Polygon((transformed_corners - HW) / HW) |
|
if not pred_polygon.convex_hull.equals(pred_polygon): |
|
|
|
return -1, None, None, None |
|
if pred_polygon.area > 9: |
|
|
|
return -1, None, None, None |
|
|
|
warped_pred_footprint = apply_homography_to_footprint(pred_footprint, transformed_corners, HW*3) |
|
pretty_printed_footprint = "; ".join([f"{lat_lon[0]:.5f}, {lat_lon[1]:.5f}" for lat_lon in warped_pred_footprint]) |
|
return num_inliers, fm, warped_pred_footprint, pretty_printed_footprint |
|
|
|
|
|
def enlarge_polygon(polygon, scale_factor): |
|
cntr = polygon.centroid |
|
scaled_coords = [(cntr.x + (lat - cntr.x) * scale_factor, cntr.y + (lon - cntr.y) * scale_factor) |
|
for lat, lon in zip(*polygon.exterior.xy)] |
|
scaled_polygon = shapely.Polygon(scaled_coords) |
|
return scaled_polygon |
|
|
|
|
|
def get_polygon(lats_lons): |
|
assert isinstance(lats_lons, np.ndarray) |
|
assert lats_lons.shape == (4, 2) |
|
polygon = shapely.Polygon(lats_lons) |
|
return polygon |
|
|
|
|
|
def get_query_metadata(query_path): |
|
"""Given the path of the query, extract and return the |
|
center_lat, center_lon, tilt (i.e. obliqueness), focal length and cloud percentage. |
|
""" |
|
_, lat, lon, nlat, nlon, tilt, fclt, cldp, mrf, _ = str(query_path).split("@") |
|
return float(lat), float(lon), int(tilt), int(fclt), int(cldp) |
|
|
|
|
|
def get_centerpoint_from_query_path(query_path): |
|
lat, lon, tilt, fclt, cldp = get_query_metadata(query_path) |
|
return shapely.Point(lat, lon) |
|
|
|
|
|
def fclt_le_200(query_path): |
|
lat, lon, tilt, fclt, cldp = get_query_metadata(query_path) |
|
return fclt <= 200 |
|
|
|
|
|
def fclt_200_400(query_path): |
|
lat, lon, tilt, fclt, cldp = get_query_metadata(query_path) |
|
return 200 < fclt <= 400 |
|
|
|
|
|
def fclt_400_800(query_path): |
|
lat, lon, tilt, fclt, cldp = get_query_metadata(query_path) |
|
return 400 < fclt <= 800 |
|
|
|
|
|
def fclt_g_800(query_path): |
|
lat, lon, tilt, fclt, cldp = get_query_metadata(query_path) |
|
return 800 < fclt |
|
|
|
|
|
def tilt_l_40(query_path): |
|
lat, lon, tilt, fclt, cldp = get_query_metadata(query_path) |
|
return tilt < 40 |
|
|
|
|
|
def tilt_ge_40(query_path): |
|
lat, lon, tilt, fclt, cldp = get_query_metadata(query_path) |
|
return tilt >= 40 |
|
|
|
|
|
def cldp_l_40(query_path): |
|
lat, lon, tilt, fclt, cldp = get_query_metadata(query_path) |
|
return cldp < 40 |
|
|
|
|
|
def cldp_ge_40(query_path): |
|
lat, lon, tilt, fclt, cldp = get_query_metadata(query_path) |
|
return cldp >= 40 |
|
|
|
|
|
|
|
|
|
|
|
import cv2 |
|
import numpy as np |
|
|
|
def to_uint8_numpy(img): |
|
if hasattr(img, 'cpu'): |
|
img = img.cpu() |
|
if hasattr(img, 'detach'): |
|
img = img.detach() |
|
if hasattr(img, 'numpy'): |
|
img = img.numpy() |
|
if img.ndim == 3 and img.shape[0] in [1, 3]: |
|
img = np.transpose(img, (1, 2, 0)) |
|
if img.dtype != np.uint8: |
|
img = np.clip(img, 0, 1) if img.max() <= 1.0 else np.clip(img, 0, 255) |
|
img = (img * 255).astype(np.uint8) if img.max() <= 1.0 else img.astype(np.uint8) |
|
return img |
|
|
|
|
|
|
|
def get_tile_indices(path): |
|
""" |
|
Extract the image_id field from the filename metadata and return (zoom, row, col). |
|
For database images, image_id is expected at index 9 formatted as "zoom_row_col". |
|
""" |
|
splits = get_image_metadata_from_path(path) |
|
try: |
|
image_id_str = splits[9] |
|
parts = image_id_str.split("_") |
|
zoom = int(parts[0]) |
|
row = int(parts[1]) |
|
col = int(parts[2]) |
|
return zoom, row, col |
|
except Exception as e: |
|
raise ValueError( |
|
f"Could not extract tile indices from filename: {Path(path).name}" |
|
) from e |
|
|
|
|
|
def get_image_metadata_from_path(path): |
|
""" |
|
Assumes the filename is structured with '@' separators, e.g., |
|
"image@lat1@lon1@lat2@lon2@lat3@lon3@lat4@lon4@image_id@timestamp@nadir_lat@nadir_lon@sq_km_area@orientation.jpg" |
|
""" |
|
if isinstance(path, Path): |
|
fname = path.stem |
|
return fname.split("@") |
|
elif isinstance(path, str): |
|
fname = Path(path).stem |
|
return fname.split("@") |
|
|
|
|
|
def get_footprint_from_path(path): |
|
""" |
|
Extract the geographic footprint from the filename metadata. |
|
Returns a list of four (lat, lon) tuples. |
|
""" |
|
splits = get_image_metadata_from_path(path) |
|
coords = splits[1:9] |
|
coords = [float(c) for c in coords] |
|
return [ |
|
(coords[0], coords[1]), |
|
(coords[2], coords[3]), |
|
(coords[4], coords[5]), |
|
(coords[6], coords[7]), |
|
] |
|
|
|
|
|
|
|
|
|
|