File size: 13,783 Bytes
0a82b18 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 |
import cv2
import torch
import shapely
import numpy as np
from matching import viz2d
import matplotlib.pyplot as plt
import torchvision.transforms as tfm
from sklearn.linear_model import LogisticRegression
from torchvision.transforms.functional import InterpolationMode
from pathlib import Path
def path_to_footprint(path):
"""Given the path of a prediction, get its footprint (i.e. 4 corners) lat and lon"""
_, min_lat, min_lon, _, _, max_lat, max_lon = str(path).split("@")[:7]
min_lat, min_lon, max_lat, max_lon = float(min_lat), float(min_lon), float(max_lat), float(max_lon)
pred_footprint = np.array([min_lat, min_lon, max_lat, min_lon, max_lat, max_lon, min_lat, max_lon])
pred_footprint = pred_footprint.reshape(4, 2)
pred_footprint = torch.tensor(pred_footprint)
return pred_footprint
def apply_homography_to_corners(width, height, fm):
"""
Transform the four corners of an image of given width and height using the provided homography matrix.
:param width: Width of the image.
:param height: Height of the image.
:param fm: Homography fundamental matrix.
:return: New pixel coordinates of the four corners after homography.
"""
# Define the four corners of the image
corners = np.array([
[0, 0], # Top-left corner
[width, 0], # Top-right corner
[width, height], # Bottom-right corner
[0, height] # Bottom-left corner
], dtype='float32')
# Reshape the corners for homography transformation
corners = np.array([corners])
corners = np.reshape(corners, (4, 1, 2))
# Use the homography matrix to transform the corners
transformed_corners = cv2.perspectiveTransform(corners, fm)
return torch.tensor(transformed_corners).type(torch.int)[:, 0]
def compute_matching(image0, image1, matcher, save_images=False, viz_params=None):
# the output was changged in the BaseMatcher class, not sure why this code was not changed
# so changing it intead
output_dictionairy= matcher(image0, image1)
num_inliers = output_dictionairy['num_inliers']
# also the term fundamental matrix is used when dealing with epipoloar geometry
# I am not sure why the terms fundamenetal and homography matrix are interchanged in this code?
fm = output_dictionairy["H"]
# here just getting the matched keypoints from the two images after running RANSAC
mkpts0 = output_dictionairy['inlier_kpts0']
mkpts1 = output_dictionairy['inlier_kpts1']
if num_inliers == 0:
# The matcher did not find enough matches between img0 and img1
return num_inliers, fm
if num_inliers > 0:
if fm is None or not (isinstance(fm, np.ndarray) and fm.shape == (3, 3)):
# Optionally, set num_inliers to 0 to force early return
print("number of inliers is not zero but the homography is None")
return 0, None
if save_images:
path0 = viz_params["query_path"]
path1 = viz_params["pred_path"]
output_dir = viz_params["output_dir"]
output_file_suffix = viz_params["output_file_suffix"]
stem0, stem1 = path0.stem, path1.stem
matches_path = output_dir / f'{stem0}_{stem1}_matches_{output_file_suffix}.torch'
viz_path = output_dir / f'{stem0}_{stem1}_matches_{output_file_suffix}.jpg'
output_dir.mkdir(exist_ok=True)
viz2d.plot_images([image0, image1])
viz2d.plot_matches(mkpts0, mkpts1, color="lime", lw=0.2)
viz2d.add_text(0, f'{len(mkpts1)} matches', fs=20)
viz2d.save_plot(viz_path)
plt.close()
torch.save((num_inliers, fm, mkpts0, mkpts1), matches_path)
return num_inliers, fm
def footprint_to_minmax_latlon(footprint):
lats = footprint[:, 0]
lons = footprint[:, 1]
min_lat = lats.min()
max_lat = lats.max()
min_lon = lons.min()
max_lon = lons.max()
return min_lat, min_lon, max_lat, max_lon
def get_lat_lon_per_pixel(footprint, HW):
"""Return the change in lat lon per each pixel"""
min_lat, min_lon, max_lat, max_lon = footprint_to_minmax_latlon(footprint)
lat_per_pixel = (max_lat - min_lat) / HW
lon_per_pixel = (max_lon - min_lon) / HW
return lat_per_pixel, lon_per_pixel
def apply_homography_to_footprint(pred_footprint, transformed_corners, HW):
center = pred_footprint.mean(0)
diff = pred_footprint.max(0)[0] - pred_footprint.min(0)[0]
diff *= 1.5
min_lat, min_lon = center - diff
max_lat, max_lon = center + diff
surrounding_pred_footprint = np.array([min_lat, min_lon, max_lat, min_lon, max_lat, max_lon, min_lat, max_lon]).reshape(4, 2)
lat_per_pixel, lon_per_pixel = get_lat_lon_per_pixel(surrounding_pred_footprint, HW)
min_lat, min_lon, max_lat, max_lon = footprint_to_minmax_latlon(surrounding_pred_footprint)
px_lats = transformed_corners[:, 1]
px_lons = transformed_corners[:, 0]
ul_lat = max_lat - (px_lats[0] * lat_per_pixel)
ul_lon = min_lon + (px_lons[0] * lon_per_pixel)
ur_lat = max_lat - (px_lats[1] * lat_per_pixel)
ur_lon = min_lon + (px_lons[1] * lon_per_pixel)
ll_lat = max_lat - (px_lats[2] * lat_per_pixel)
ll_lon = min_lon + (px_lons[2] * lon_per_pixel)
lr_lat = max_lat - (px_lats[3] * lat_per_pixel)
lr_lon = min_lon + (px_lons[3] * lon_per_pixel)
warped_pred_footprint = torch.tensor([
[ul_lat, ul_lon], [ur_lat, ur_lon], [ll_lat, ll_lon], [lr_lat, lr_lon]
])
return warped_pred_footprint
def add_homographies_fm(fm1, fm2):
return np.linalg.inv(np.linalg.inv(fm2) @ np.linalg.inv(fm1))
def compute_threshold(true_matches, false_matches, thresh=0.999):
assert isinstance(true_matches, list)
assert isinstance(false_matches, list)
if (len(true_matches) < 4):
return 4
# logistic_model = lambda x: 1 / (1 + np.exp(-x))
X_r = np.array(true_matches).reshape(-1, 1)
X_w = np.array(false_matches).reshape(-1, 1)
X = np.concatenate((X_r, X_w))
Y_r = np.ones(len(true_matches), dtype=int)
Y_w = np.zeros(len(false_matches), dtype=int)
Y = np.concatenate((Y_r, Y_w))
lr = LogisticRegression()
lr.fit(X, Y)
f_y = - np.log((1-thresh)/thresh)
match_thresh = (f_y - lr.intercept_)/lr.coef_
return match_thresh.item()
def estimate_footprint(
fm, query_image, surrounding_image, matcher, pred_footprint, HW,
save_images=False, viz_params=None
):
"""Estimate the footprint of the query given a prediction/candidate.
This is equivalent of a single iteration of EarthMatch.
Parameters
----------
fm : np.array of shape (3, 3), the fundamental matrix from previous iteration (None if first iteration).
query_image : torch.tensor of shape (3, H, W) with the query image.
surrounding_image : torch.tensor of shape (3, H, W) with the surrounding image.
matcher : a matcher from the image-matching-models.
pred_footprint : torch.tensor of shape (4, 2) with the prediction's footprint.
HW : int, the height and width of the images.
save_images : bool, if true then save matching's visualizations.
viz_params : parameters used for visualizations if save_images is True
Returns
-------
num_inliers : int, number of inliers from the matching.
fm : np.array of shape (3, 3), the new fundamental matrix mapping query to prediction.
warped_pred_footprint : torch.tensor of shape (4, 2) with the pred_footprint after applying fm's homography.
pretty_printed_footprint : str, a pretty printed warped_pred_footprint.
If the prediction is deemed invalid, return (-1, None, None, None).
"""
assert surrounding_image.shape[1] == surrounding_image.shape[2], f"{surrounding_image.shape}"
if fm is not None:
# Use the FM (i.e. homography) from the previous iteration to generate the new candidate
transformed_corners = apply_homography_to_corners(HW, HW, fm) + HW
endpoints = [[HW, HW], [HW*2, HW], [HW*2, HW*2], [HW, HW*2]]
warped_surrounding_pred_img = tfm.functional.perspective(
surrounding_image, transformed_corners.numpy(), endpoints, InterpolationMode.BILINEAR
)
else:
warped_surrounding_pred_img = surrounding_image
assert tuple(warped_surrounding_pred_img.shape) == (3, HW*3, HW*3)
warped_pred_img = warped_surrounding_pred_img[:, HW:HW*2, HW:HW*2]
if save_images:
tfm.ToPILImage()(warped_pred_img).save(viz_params["pred_path"])
num_inliers, new_fm = compute_matching(
image0=query_image, image1=warped_pred_img,
matcher=matcher, save_images=save_images,
viz_params=viz_params
)
if num_inliers == 0:
# If no inliers are found, stop the iterative process
return -1, None, None, None
if fm is None: # At the first iteration fm is None
fm = new_fm
else:
fm = add_homographies_fm(fm, new_fm)
transformed_corners = apply_homography_to_corners(HW, HW, fm) + HW
pred_polygon = shapely.Polygon((transformed_corners - HW) / HW)
if not pred_polygon.convex_hull.equals(pred_polygon):
# If the prediction has a non-convex footprint, it is considered not valid
return -1, None, None, None
if pred_polygon.area > 9:
# If the prediction's area is bigger than the surrounding_image's area, it is considered not valid
return -1, None, None, None
warped_pred_footprint = apply_homography_to_footprint(pred_footprint, transformed_corners, HW*3)
pretty_printed_footprint = "; ".join([f"{lat_lon[0]:.5f}, {lat_lon[1]:.5f}" for lat_lon in warped_pred_footprint])
return num_inliers, fm, warped_pred_footprint, pretty_printed_footprint
def enlarge_polygon(polygon, scale_factor):
cntr = polygon.centroid
scaled_coords = [(cntr.x + (lat - cntr.x) * scale_factor, cntr.y + (lon - cntr.y) * scale_factor)
for lat, lon in zip(*polygon.exterior.xy)]
scaled_polygon = shapely.Polygon(scaled_coords)
return scaled_polygon
def get_polygon(lats_lons):
assert isinstance(lats_lons, np.ndarray)
assert lats_lons.shape == (4, 2)
polygon = shapely.Polygon(lats_lons)
return polygon
def get_query_metadata(query_path):
"""Given the path of the query, extract and return the
center_lat, center_lon, tilt (i.e. obliqueness), focal length and cloud percentage.
"""
_, lat, lon, nlat, nlon, tilt, fclt, cldp, mrf, _ = str(query_path).split("@")
return float(lat), float(lon), int(tilt), int(fclt), int(cldp)
def get_centerpoint_from_query_path(query_path):
lat, lon, tilt, fclt, cldp = get_query_metadata(query_path)
return shapely.Point(lat, lon)
def fclt_le_200(query_path):
lat, lon, tilt, fclt, cldp = get_query_metadata(query_path)
return fclt <= 200
def fclt_200_400(query_path):
lat, lon, tilt, fclt, cldp = get_query_metadata(query_path)
return 200 < fclt <= 400
def fclt_400_800(query_path):
lat, lon, tilt, fclt, cldp = get_query_metadata(query_path)
return 400 < fclt <= 800
def fclt_g_800(query_path):
lat, lon, tilt, fclt, cldp = get_query_metadata(query_path)
return 800 < fclt
def tilt_l_40(query_path):
lat, lon, tilt, fclt, cldp = get_query_metadata(query_path)
return tilt < 40
def tilt_ge_40(query_path):
lat, lon, tilt, fclt, cldp = get_query_metadata(query_path)
return tilt >= 40
def cldp_l_40(query_path):
lat, lon, tilt, fclt, cldp = get_query_metadata(query_path)
return cldp < 40
def cldp_ge_40(query_path):
lat, lon, tilt, fclt, cldp = get_query_metadata(query_path)
return cldp >= 40
#My functions
import cv2
import numpy as np
def to_uint8_numpy(img):
if hasattr(img, 'cpu'):
img = img.cpu()
if hasattr(img, 'detach'):
img = img.detach()
if hasattr(img, 'numpy'):
img = img.numpy()
if img.ndim == 3 and img.shape[0] in [1, 3]:
img = np.transpose(img, (1, 2, 0))
if img.dtype != np.uint8:
img = np.clip(img, 0, 1) if img.max() <= 1.0 else np.clip(img, 0, 255)
img = (img * 255).astype(np.uint8) if img.max() <= 1.0 else img.astype(np.uint8)
return img
def get_tile_indices(path):
"""
Extract the image_id field from the filename metadata and return (zoom, row, col).
For database images, image_id is expected at index 9 formatted as "zoom_row_col".
"""
splits = get_image_metadata_from_path(path)
try:
image_id_str = splits[9] # 9th field: image_id
parts = image_id_str.split("_")
zoom = int(parts[0])
row = int(parts[1])
col = int(parts[2])
return zoom, row, col
except Exception as e:
raise ValueError(
f"Could not extract tile indices from filename: {Path(path).name}"
) from e
# --- Helper Functions for Metadata/Footprint and HTML ---
def get_image_metadata_from_path(path):
"""
Assumes the filename is structured with '@' separators, e.g.,
"image@lat1@lon1@lat2@lon2@lat3@lon3@lat4@lon4@image_id@timestamp@nadir_lat@nadir_lon@sq_km_area@orientation.jpg"
"""
if isinstance(path, Path):
fname = path.stem
return fname.split("@")
elif isinstance(path, str):
fname = Path(path).stem
return fname.split("@")
def get_footprint_from_path(path):
"""
Extract the geographic footprint from the filename metadata.
Returns a list of four (lat, lon) tuples.
"""
splits = get_image_metadata_from_path(path)
coords = splits[1:9]
coords = [float(c) for c in coords]
return [
(coords[0], coords[1]),
(coords[2], coords[3]),
(coords[4], coords[5]),
(coords[6], coords[7]),
]
|