File size: 8,741 Bytes
f83d6df
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
#!/usr/bin/env python3
"""
Quick viewer for collected MapCrunch data
"""

import json
import os
from pathlib import Path
from PIL import Image
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from collections import Counter

def view_data_summary(data_file='data/golden_labels.json'):
    """Display summary of collected data"""
    
    try:
        with open(data_file, 'r') as f:
            data = json.load(f)
    except FileNotFoundError:
        print(f"❌ No data file found at {data_file}")
        print("πŸ’‘ Run data collection first: python main.py --mode data --samples 50")
        return
    
    samples = data.get('samples', [])
    metadata = data.get('metadata', {})
    
    print(f"πŸ“Š MapCrunch Data Collection Summary")
    print(f"{'='*50}")
    print(f"πŸ“… Collection Date: {metadata.get('collection_date', 'Unknown')}")
    print(f"πŸ“ Total Samples: {len(samples)}")
    print(f"πŸ™οΈ  Collection Options: {metadata.get('collection_options', {})}")
    
    # Statistics
    stats = metadata.get('statistics', {})
    if stats:
        print(f"\nπŸ“ˆ Statistics:")
        for key, value in stats.items():
            print(f"   {key}: {value}")
    
    # Country distribution
    countries = []
    for sample in samples:
        address = sample.get('address', '')
        if address and address != 'Unknown':
            # Extract country (usually last part after comma)
            country = address.split(', ')[-1].strip()
            countries.append(country)
    
    if countries:
        country_counts = Counter(countries)
        print(f"\n🌍 Top Countries:")
        for country, count in country_counts.most_common(10):
            print(f"   {country}: {count} samples")
    
    # Coordinate coverage
    coords_available = sum(1 for s in samples if s.get('lat') is not None)
    print(f"\nπŸ“ Coordinate Coverage: {coords_available}/{len(samples)} ({coords_available/len(samples)*100:.1f}%)")
    
    # Thumbnail coverage
    thumbnails_available = sum(1 for s in samples if s.get('has_thumbnail'))
    print(f"πŸ“Έ Thumbnail Coverage: {thumbnails_available}/{len(samples)} ({thumbnails_available/len(samples)*100:.1f}%)")
    
    # Sample locations
    print(f"\nπŸ“ Sample Locations:")
    for i, sample in enumerate(samples[:10]):
        address = sample.get('address', 'Unknown')
        lat = sample.get('lat', 'N/A')
        lng = sample.get('lng', 'N/A')
        has_thumb = "πŸ“Έ" if sample.get('has_thumbnail') else "❌"
        print(f"   {i+1}. {has_thumb} {address} ({lat}, {lng})")
    
    if len(samples) > 10:
        print(f"   ... and {len(samples) - 10} more")


def create_thumbnail_gallery(data_file='data/golden_labels.json', output_file='data/gallery.html', max_images=100):
    """Create an HTML gallery of collected thumbnails"""
    
    with open(data_file, 'r') as f:
        data = json.load(f)
    
    samples = data.get('samples', [])
    
    html = """
    <html>
    <head>
        <title>MapCrunch Collection Gallery</title>
        <style>
            body { font-family: Arial, sans-serif; background: #f0f0f0; }
            h1 { text-align: center; }
            .gallery { display: flex; flex-wrap: wrap; justify-content: center; }
            .item { 
                margin: 10px; 
                background: white; 
                padding: 10px; 
                border-radius: 8px;
                box-shadow: 0 2px 4px rgba(0,0,0,0.1);
                text-align: center;
            }
            .item img { max-width: 320px; border-radius: 4px; }
            .address { font-weight: bold; margin: 5px 0; }
            .coords { font-size: 0.9em; color: #666; }
            .stats { margin: 20px; text-align: center; }
        </style>
    </head>
    <body>
        <h1>MapCrunch Collection Gallery</h1>
    """
    
    # Add statistics
    total = len(samples)
    with_thumb = sum(1 for s in samples if s.get('has_thumbnail'))
    with_coords = sum(1 for s in samples if s.get('lat') is not None)
    
    html += f"""
        <div class="stats">
            <p>Total Samples: {total} | With Thumbnails: {with_thumb} | With Coordinates: {with_coords}</p>
        </div>
        <div class="gallery">
    """
    
    # Add thumbnails
    count = 0
    for sample in samples:
        if count >= max_images:
            break
            
        if sample.get('thumbnail_path'):
            thumb_path = f"thumbnails/{sample['thumbnail_path']}"
            address = sample.get('address', 'Unknown')
            lat = sample.get('lat', 'N/A')
            lng = sample.get('lng', 'N/A')
            
            html += f"""
            <div class="item">
                <img src="{thumb_path}" alt="{address}">
                <div class="address">{address}</div>
                <div class="coords">{lat}, {lng}</div>
            </div>
            """
            count += 1
    
    html += """
        </div>
    </body>
    </html>
    """
    
    with open(output_file, 'w') as f:
        f.write(html)
    
    print(f"βœ… Gallery created: {output_file}")
    print(f"πŸ“Έ Included {count} images")
    print(f"πŸ’‘ Open in browser: file://{os.path.abspath(output_file)}")


def plot_thumbnails_grid(data_file='data/golden_labels.json', max_images=20):
    """Display a grid of thumbnails using matplotlib"""
    
    with open(data_file, 'r') as f:
        data = json.load(f)
    
    samples = [s for s in data['samples'] if s.get('thumbnail_path')][:max_images]
    
    if not samples:
        print("❌ No samples with thumbnails found")
        return
    
    # Create grid
    cols = 5
    rows = (len(samples) + cols - 1) // cols
    
    fig, axes = plt.subplots(rows, cols, figsize=(15, rows * 3))
    if rows == 1:
        axes = axes.reshape(1, -1)
    
    for i, sample in enumerate(samples):
        row = i // cols
        col = i % cols
        
        thumb_path = f"data/thumbnails/{sample['thumbnail_path']}"
        if os.path.exists(thumb_path):
            img = mpimg.imread(thumb_path)
            axes[row, col].imshow(img)
            axes[row, col].set_title(sample.get('address', 'Unknown')[:30] + '...', fontsize=8)
        
        axes[row, col].axis('off')
    
    # Hide empty subplots
    for i in range(len(samples), rows * cols):
        row = i // cols
        col = i % cols
        axes[row, col].axis('off')
    
    plt.tight_layout()
    plt.suptitle(f'MapCrunch Collection Sample ({len(samples)} locations)', y=1.02)
    plt.show()


def export_coordinates_csv(data_file='data/golden_labels.json', output_file='data/coordinates.csv'):
    """Export coordinates to CSV for mapping"""
    
    import csv
    
    with open(data_file, 'r') as f:
        data = json.load(f)
    
    samples = data.get('samples', [])
    
    with open(output_file, 'w', newline='', encoding='utf-8') as f:
        writer = csv.writer(f)
        writer.writerow(['id', 'address', 'latitude', 'longitude', 'has_thumbnail'])
        
        count = 0
        for sample in samples:
            if sample.get('lat') is not None and sample.get('lng') is not None:
                writer.writerow([
                    sample['id'][:8],
                    sample.get('address', 'Unknown'),
                    sample['lat'],
                    sample['lng'],
                    'Yes' if sample.get('has_thumbnail') else 'No'
                ])
                count += 1
    
    print(f"βœ… Exported {count} coordinates to {output_file}")


def main():
    import argparse
    
    parser = argparse.ArgumentParser(description='View collected MapCrunch data')
    parser.add_argument('--gallery', action='store_true', help='Create HTML gallery')
    parser.add_argument('--grid', action='store_true', help='Show thumbnail grid')
    parser.add_argument('--csv', action='store_true', help='Export coordinates to CSV')
    parser.add_argument('--data', default='data/golden_labels.json', help='Data file path')
    parser.add_argument('--max-images', type=int, default=50, help='Max images for gallery/grid')
    
    args = parser.parse_args()
    
    if not os.path.exists(args.data):
        print(f"❌ Data file not found: {args.data}")
        print("πŸ’‘ Run data collection first: python main.py --mode data --samples 50")
        return
    
    # Always show summary
    view_data_summary(args.data)
    
    # Additional actions
    if args.gallery:
        print()
        create_thumbnail_gallery(args.data, max_images=args.max_images)
    
    if args.grid:
        print()
        plot_thumbnails_grid(args.data, max_images=args.max_images)
    
    if args.csv:
        print()
        export_coordinates_csv(args.data)


if __name__ == "__main__":
    main()