File size: 13,783 Bytes
0a82b18
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382

import cv2
import torch
import shapely
import numpy as np
from matching import viz2d
import matplotlib.pyplot as plt
import torchvision.transforms as tfm
from sklearn.linear_model import LogisticRegression
from torchvision.transforms.functional import InterpolationMode
from pathlib import Path

def path_to_footprint(path):
    """Given the path of a prediction, get its footprint (i.e. 4 corners) lat and lon"""
    _, min_lat, min_lon, _, _, max_lat, max_lon = str(path).split("@")[:7]
    min_lat, min_lon, max_lat, max_lon = float(min_lat), float(min_lon), float(max_lat), float(max_lon)
    pred_footprint = np.array([min_lat, min_lon, max_lat, min_lon, max_lat, max_lon, min_lat, max_lon])
    pred_footprint = pred_footprint.reshape(4, 2)
    pred_footprint = torch.tensor(pred_footprint)
    return pred_footprint


def apply_homography_to_corners(width, height, fm):
    """
    Transform the four corners of an image of given width and height using the provided homography matrix.
    :param width: Width of the image.
    :param height: Height of the image.
    :param fm: Homography fundamental matrix.
    :return: New pixel coordinates of the four corners after homography.
    """
    # Define the four corners of the image
    corners = np.array([
        [0, 0],  # Top-left corner
        [width, 0],  # Top-right corner
        [width, height],  # Bottom-right corner
        [0, height]  # Bottom-left corner
    ], dtype='float32')
    # Reshape the corners for homography transformation
    corners = np.array([corners])
    corners = np.reshape(corners, (4, 1, 2))
    # Use the homography matrix to transform the corners
    transformed_corners = cv2.perspectiveTransform(corners, fm)
    return torch.tensor(transformed_corners).type(torch.int)[:, 0]


def compute_matching(image0, image1, matcher, save_images=False, viz_params=None):
    # the output was changged in the BaseMatcher class, not sure why this code was not changed
    # so changing it intead
    output_dictionairy= matcher(image0, image1)

    num_inliers = output_dictionairy['num_inliers']

    # also the term fundamental matrix is used when dealing with epipoloar geometry 
    # I am not sure why the terms fundamenetal and homography matrix are interchanged in this code?

    fm = output_dictionairy["H"]

    # here just getting the matched keypoints from the two images after running RANSAC
    mkpts0 = output_dictionairy['inlier_kpts0']
    mkpts1 = output_dictionairy['inlier_kpts1']


    if num_inliers == 0:
        # The matcher did not find enough matches between img0 and img1
        return num_inliers, fm
        
    if num_inliers > 0:
        if fm is None or not (isinstance(fm, np.ndarray) and fm.shape == (3, 3)):
            # Optionally, set num_inliers to 0 to force early return
            print("number of inliers is not zero but the homography is None")
            return 0, None

    
    if save_images:
        path0 = viz_params["query_path"]
        path1 = viz_params["pred_path"]
        output_dir = viz_params["output_dir"]
        output_file_suffix = viz_params["output_file_suffix"]
        stem0, stem1 = path0.stem, path1.stem
        matches_path = output_dir / f'{stem0}_{stem1}_matches_{output_file_suffix}.torch'
        viz_path = output_dir / f'{stem0}_{stem1}_matches_{output_file_suffix}.jpg'
        output_dir.mkdir(exist_ok=True)
        viz2d.plot_images([image0, image1])
        viz2d.plot_matches(mkpts0, mkpts1, color="lime", lw=0.2)
        viz2d.add_text(0, f'{len(mkpts1)} matches', fs=20)
        viz2d.save_plot(viz_path)
        plt.close()
        torch.save((num_inliers, fm, mkpts0, mkpts1), matches_path)
    
    return num_inliers, fm


def footprint_to_minmax_latlon(footprint):
    lats = footprint[:, 0]
    lons = footprint[:, 1]
    min_lat = lats.min()
    max_lat = lats.max()
    min_lon = lons.min()
    max_lon = lons.max()
    return min_lat, min_lon, max_lat, max_lon


def get_lat_lon_per_pixel(footprint, HW):
    """Return the change in lat lon per each pixel"""
    min_lat, min_lon, max_lat, max_lon = footprint_to_minmax_latlon(footprint)
    lat_per_pixel = (max_lat - min_lat) / HW
    lon_per_pixel = (max_lon - min_lon) / HW
    return lat_per_pixel, lon_per_pixel


def apply_homography_to_footprint(pred_footprint, transformed_corners, HW):

    center = pred_footprint.mean(0)
    diff = pred_footprint.max(0)[0] - pred_footprint.min(0)[0]
    diff *= 1.5
    min_lat, min_lon = center - diff
    max_lat, max_lon = center + diff
    surrounding_pred_footprint = np.array([min_lat, min_lon, max_lat, min_lon, max_lat, max_lon, min_lat, max_lon]).reshape(4, 2)

    lat_per_pixel, lon_per_pixel = get_lat_lon_per_pixel(surrounding_pred_footprint, HW)
    min_lat, min_lon, max_lat, max_lon = footprint_to_minmax_latlon(surrounding_pred_footprint)
    
    px_lats = transformed_corners[:, 1]
    px_lons = transformed_corners[:, 0]
    
    ul_lat = max_lat - (px_lats[0] * lat_per_pixel)
    ul_lon = min_lon + (px_lons[0] * lon_per_pixel)
    
    ur_lat = max_lat - (px_lats[1] * lat_per_pixel)
    ur_lon = min_lon + (px_lons[1] * lon_per_pixel)
    
    ll_lat = max_lat - (px_lats[2] * lat_per_pixel)
    ll_lon = min_lon + (px_lons[2] * lon_per_pixel)
    
    lr_lat = max_lat - (px_lats[3] * lat_per_pixel)
    lr_lon = min_lon + (px_lons[3] * lon_per_pixel)
    
    warped_pred_footprint = torch.tensor([
        [ul_lat, ul_lon], [ur_lat, ur_lon], [ll_lat, ll_lon], [lr_lat, lr_lon]
    ])
    return warped_pred_footprint


def add_homographies_fm(fm1, fm2):
    return np.linalg.inv(np.linalg.inv(fm2) @ np.linalg.inv(fm1))


def compute_threshold(true_matches, false_matches, thresh=0.999):
    assert isinstance(true_matches, list)
    assert isinstance(false_matches, list)
    if (len(true_matches) < 4):
        return 4
    # logistic_model = lambda x: 1 / (1 + np.exp(-x))
    X_r = np.array(true_matches).reshape(-1, 1)
    X_w = np.array(false_matches).reshape(-1, 1)
    X = np.concatenate((X_r, X_w))
    Y_r = np.ones(len(true_matches), dtype=int)
    Y_w = np.zeros(len(false_matches), dtype=int)
    Y = np.concatenate((Y_r, Y_w))
    lr = LogisticRegression()
    lr.fit(X, Y)
    f_y = - np.log((1-thresh)/thresh)
    match_thresh = (f_y - lr.intercept_)/lr.coef_
    return match_thresh.item()


def estimate_footprint(
    fm, query_image, surrounding_image, matcher, pred_footprint, HW,
    save_images=False, viz_params=None
):
    """Estimate the footprint of the query given a prediction/candidate.
    This is equivalent of a single iteration of EarthMatch.

    Parameters
    ----------
    fm : np.array of shape (3, 3), the fundamental matrix from previous iteration (None if first iteration).
    query_image : torch.tensor of shape (3, H, W) with the query image.
    surrounding_image : torch.tensor of shape (3, H, W) with the surrounding image.
    matcher : a matcher from the image-matching-models.
    pred_footprint : torch.tensor of shape (4, 2) with the prediction's footprint.
    HW : int, the height and width of the images.
    save_images : bool, if true then save matching's visualizations.
    viz_params : parameters used for visualizations if save_images is True

    Returns
    -------
    num_inliers : int, number of inliers from the matching.
    fm : np.array of shape (3, 3), the new fundamental matrix mapping query to prediction.
    warped_pred_footprint : torch.tensor of shape (4, 2) with the pred_footprint after applying fm's homography.
    pretty_printed_footprint : str, a pretty printed warped_pred_footprint.

    If the prediction is deemed invalid, return (-1, None, None, None).
    """
    assert surrounding_image.shape[1] == surrounding_image.shape[2], f"{surrounding_image.shape}"
    
    if fm is not None:
        # Use the FM (i.e. homography) from the previous iteration to generate the new candidate
        transformed_corners = apply_homography_to_corners(HW, HW, fm) + HW
        endpoints = [[HW, HW], [HW*2, HW], [HW*2, HW*2], [HW, HW*2]]
        warped_surrounding_pred_img = tfm.functional.perspective(
            surrounding_image, transformed_corners.numpy(), endpoints, InterpolationMode.BILINEAR
        )
    else:
        warped_surrounding_pred_img = surrounding_image
    
    assert tuple(warped_surrounding_pred_img.shape) == (3, HW*3, HW*3)
    warped_pred_img = warped_surrounding_pred_img[:, HW:HW*2, HW:HW*2]
    
    if save_images:
        tfm.ToPILImage()(warped_pred_img).save(viz_params["pred_path"])
    
    num_inliers, new_fm = compute_matching(
        image0=query_image, image1=warped_pred_img,
        matcher=matcher, save_images=save_images,
        viz_params=viz_params
    )
    
    if num_inliers == 0:
        # If no inliers are found, stop the iterative process
        return -1, None, None, None
    
    if fm is None:  # At the first iteration fm is None
        fm = new_fm
    else:
        fm = add_homographies_fm(fm, new_fm)
    
    transformed_corners = apply_homography_to_corners(HW, HW, fm) + HW

    pred_polygon = shapely.Polygon((transformed_corners - HW) / HW)
    if not pred_polygon.convex_hull.equals(pred_polygon):
        # If the prediction has a non-convex footprint, it is considered not valid
        return -1, None, None, None
    if pred_polygon.area > 9:
        # If the prediction's area is bigger than the surrounding_image's area, it is considered not valid
        return -1, None, None, None

    warped_pred_footprint = apply_homography_to_footprint(pred_footprint, transformed_corners, HW*3)
    pretty_printed_footprint = "; ".join([f"{lat_lon[0]:.5f}, {lat_lon[1]:.5f}" for lat_lon in warped_pred_footprint])
    return num_inliers, fm, warped_pred_footprint, pretty_printed_footprint


def enlarge_polygon(polygon, scale_factor):
    cntr = polygon.centroid
    scaled_coords = [(cntr.x + (lat - cntr.x) * scale_factor, cntr.y + (lon - cntr.y) * scale_factor)
                     for lat, lon in zip(*polygon.exterior.xy)]
    scaled_polygon = shapely.Polygon(scaled_coords)
    return scaled_polygon


def get_polygon(lats_lons):
    assert isinstance(lats_lons, np.ndarray)
    assert lats_lons.shape == (4, 2)
    polygon = shapely.Polygon(lats_lons)
    return polygon


def get_query_metadata(query_path):
    """Given the path of the query, extract and return the
    center_lat, center_lon, tilt (i.e. obliqueness), focal length and cloud percentage.
    """
    _, lat, lon, nlat, nlon, tilt, fclt, cldp, mrf, _ = str(query_path).split("@")
    return float(lat), float(lon), int(tilt), int(fclt), int(cldp)


def get_centerpoint_from_query_path(query_path):
    lat, lon, tilt, fclt, cldp = get_query_metadata(query_path)
    return shapely.Point(lat, lon)


def fclt_le_200(query_path):
    lat, lon, tilt, fclt, cldp = get_query_metadata(query_path)
    return fclt <= 200


def fclt_200_400(query_path):
    lat, lon, tilt, fclt, cldp = get_query_metadata(query_path)
    return 200 < fclt <= 400


def fclt_400_800(query_path):
    lat, lon, tilt, fclt, cldp = get_query_metadata(query_path)
    return 400 < fclt <= 800


def fclt_g_800(query_path):
    lat, lon, tilt, fclt, cldp = get_query_metadata(query_path)
    return 800 < fclt


def tilt_l_40(query_path):
    lat, lon, tilt, fclt, cldp = get_query_metadata(query_path)
    return tilt < 40


def tilt_ge_40(query_path):
    lat, lon, tilt, fclt, cldp = get_query_metadata(query_path)
    return tilt >= 40


def cldp_l_40(query_path):
    lat, lon, tilt, fclt, cldp = get_query_metadata(query_path)
    return cldp < 40


def cldp_ge_40(query_path):
    lat, lon, tilt, fclt, cldp = get_query_metadata(query_path)
    return cldp >= 40


#My functions 


import cv2
import numpy as np

def to_uint8_numpy(img):
    if hasattr(img, 'cpu'):
        img = img.cpu()
    if hasattr(img, 'detach'):
        img = img.detach()
    if hasattr(img, 'numpy'):
        img = img.numpy()
    if img.ndim == 3 and img.shape[0] in [1, 3]:
        img = np.transpose(img, (1, 2, 0))
    if img.dtype != np.uint8:
        img = np.clip(img, 0, 1) if img.max() <= 1.0 else np.clip(img, 0, 255)
        img = (img * 255).astype(np.uint8) if img.max() <= 1.0 else img.astype(np.uint8)
    return img



def get_tile_indices(path):
    """
    Extract the image_id field from the filename metadata and return (zoom, row, col).
    For database images, image_id is expected at index 9 formatted as "zoom_row_col".
    """
    splits = get_image_metadata_from_path(path)
    try:
        image_id_str = splits[9]  # 9th field: image_id
        parts = image_id_str.split("_")
        zoom = int(parts[0])
        row = int(parts[1])
        col = int(parts[2])
        return zoom, row, col
    except Exception as e:
        raise ValueError(
            f"Could not extract tile indices from filename: {Path(path).name}"
        ) from e

# --- Helper Functions for Metadata/Footprint and HTML ---
def get_image_metadata_from_path(path):
    """
    Assumes the filename is structured with '@' separators, e.g.,
    "image@lat1@lon1@lat2@lon2@lat3@lon3@lat4@lon4@image_id@timestamp@nadir_lat@nadir_lon@sq_km_area@orientation.jpg"
    """
    if isinstance(path, Path):
        fname = path.stem
        return fname.split("@")
    elif isinstance(path, str):
        fname = Path(path).stem
        return fname.split("@")


def get_footprint_from_path(path):
    """
    Extract the geographic footprint from the filename metadata.
    Returns a list of four (lat, lon) tuples.
    """
    splits = get_image_metadata_from_path(path)
    coords = splits[1:9]
    coords = [float(c) for c in coords]
    return [
        (coords[0], coords[1]),
        (coords[2], coords[3]),
        (coords[4], coords[5]),
        (coords[6], coords[7]),
    ]