Spaces:
Running
Running
#!/usr/bin/env python3 | |
""" | |
Quick viewer for collected MapCrunch data | |
""" | |
import json | |
import os | |
from pathlib import Path | |
from PIL import Image | |
import matplotlib.pyplot as plt | |
import matplotlib.image as mpimg | |
from collections import Counter | |
def view_data_summary(data_file='data/golden_labels.json'): | |
"""Display summary of collected data""" | |
try: | |
with open(data_file, 'r') as f: | |
data = json.load(f) | |
except FileNotFoundError: | |
print(f"β No data file found at {data_file}") | |
print("π‘ Run data collection first: python main.py --mode data --samples 50") | |
return | |
samples = data.get('samples', []) | |
metadata = data.get('metadata', {}) | |
print(f"π MapCrunch Data Collection Summary") | |
print(f"{'='*50}") | |
print(f"π Collection Date: {metadata.get('collection_date', 'Unknown')}") | |
print(f"π Total Samples: {len(samples)}") | |
print(f"ποΈ Collection Options: {metadata.get('collection_options', {})}") | |
# Statistics | |
stats = metadata.get('statistics', {}) | |
if stats: | |
print(f"\nπ Statistics:") | |
for key, value in stats.items(): | |
print(f" {key}: {value}") | |
# Country distribution | |
countries = [] | |
for sample in samples: | |
address = sample.get('address', '') | |
if address and address != 'Unknown': | |
# Extract country (usually last part after comma) | |
country = address.split(', ')[-1].strip() | |
countries.append(country) | |
if countries: | |
country_counts = Counter(countries) | |
print(f"\nπ Top Countries:") | |
for country, count in country_counts.most_common(10): | |
print(f" {country}: {count} samples") | |
# Coordinate coverage | |
coords_available = sum(1 for s in samples if s.get('lat') is not None) | |
print(f"\nπ Coordinate Coverage: {coords_available}/{len(samples)} ({coords_available/len(samples)*100:.1f}%)") | |
# Thumbnail coverage | |
thumbnails_available = sum(1 for s in samples if s.get('has_thumbnail')) | |
print(f"πΈ Thumbnail Coverage: {thumbnails_available}/{len(samples)} ({thumbnails_available/len(samples)*100:.1f}%)") | |
# Sample locations | |
print(f"\nπ Sample Locations:") | |
for i, sample in enumerate(samples[:10]): | |
address = sample.get('address', 'Unknown') | |
lat = sample.get('lat', 'N/A') | |
lng = sample.get('lng', 'N/A') | |
has_thumb = "πΈ" if sample.get('has_thumbnail') else "β" | |
print(f" {i+1}. {has_thumb} {address} ({lat}, {lng})") | |
if len(samples) > 10: | |
print(f" ... and {len(samples) - 10} more") | |
def create_thumbnail_gallery(data_file='data/golden_labels.json', output_file='data/gallery.html', max_images=100): | |
"""Create an HTML gallery of collected thumbnails""" | |
with open(data_file, 'r') as f: | |
data = json.load(f) | |
samples = data.get('samples', []) | |
html = """ | |
<html> | |
<head> | |
<title>MapCrunch Collection Gallery</title> | |
<style> | |
body { font-family: Arial, sans-serif; background: #f0f0f0; } | |
h1 { text-align: center; } | |
.gallery { display: flex; flex-wrap: wrap; justify-content: center; } | |
.item { | |
margin: 10px; | |
background: white; | |
padding: 10px; | |
border-radius: 8px; | |
box-shadow: 0 2px 4px rgba(0,0,0,0.1); | |
text-align: center; | |
} | |
.item img { max-width: 320px; border-radius: 4px; } | |
.address { font-weight: bold; margin: 5px 0; } | |
.coords { font-size: 0.9em; color: #666; } | |
.stats { margin: 20px; text-align: center; } | |
</style> | |
</head> | |
<body> | |
<h1>MapCrunch Collection Gallery</h1> | |
""" | |
# Add statistics | |
total = len(samples) | |
with_thumb = sum(1 for s in samples if s.get('has_thumbnail')) | |
with_coords = sum(1 for s in samples if s.get('lat') is not None) | |
html += f""" | |
<div class="stats"> | |
<p>Total Samples: {total} | With Thumbnails: {with_thumb} | With Coordinates: {with_coords}</p> | |
</div> | |
<div class="gallery"> | |
""" | |
# Add thumbnails | |
count = 0 | |
for sample in samples: | |
if count >= max_images: | |
break | |
if sample.get('thumbnail_path'): | |
thumb_path = f"thumbnails/{sample['thumbnail_path']}" | |
address = sample.get('address', 'Unknown') | |
lat = sample.get('lat', 'N/A') | |
lng = sample.get('lng', 'N/A') | |
html += f""" | |
<div class="item"> | |
<img src="{thumb_path}" alt="{address}"> | |
<div class="address">{address}</div> | |
<div class="coords">{lat}, {lng}</div> | |
</div> | |
""" | |
count += 1 | |
html += """ | |
</div> | |
</body> | |
</html> | |
""" | |
with open(output_file, 'w') as f: | |
f.write(html) | |
print(f"β Gallery created: {output_file}") | |
print(f"πΈ Included {count} images") | |
print(f"π‘ Open in browser: file://{os.path.abspath(output_file)}") | |
def plot_thumbnails_grid(data_file='data/golden_labels.json', max_images=20): | |
"""Display a grid of thumbnails using matplotlib""" | |
with open(data_file, 'r') as f: | |
data = json.load(f) | |
samples = [s for s in data['samples'] if s.get('thumbnail_path')][:max_images] | |
if not samples: | |
print("β No samples with thumbnails found") | |
return | |
# Create grid | |
cols = 5 | |
rows = (len(samples) + cols - 1) // cols | |
fig, axes = plt.subplots(rows, cols, figsize=(15, rows * 3)) | |
if rows == 1: | |
axes = axes.reshape(1, -1) | |
for i, sample in enumerate(samples): | |
row = i // cols | |
col = i % cols | |
thumb_path = f"data/thumbnails/{sample['thumbnail_path']}" | |
if os.path.exists(thumb_path): | |
img = mpimg.imread(thumb_path) | |
axes[row, col].imshow(img) | |
axes[row, col].set_title(sample.get('address', 'Unknown')[:30] + '...', fontsize=8) | |
axes[row, col].axis('off') | |
# Hide empty subplots | |
for i in range(len(samples), rows * cols): | |
row = i // cols | |
col = i % cols | |
axes[row, col].axis('off') | |
plt.tight_layout() | |
plt.suptitle(f'MapCrunch Collection Sample ({len(samples)} locations)', y=1.02) | |
plt.show() | |
def export_coordinates_csv(data_file='data/golden_labels.json', output_file='data/coordinates.csv'): | |
"""Export coordinates to CSV for mapping""" | |
import csv | |
with open(data_file, 'r') as f: | |
data = json.load(f) | |
samples = data.get('samples', []) | |
with open(output_file, 'w', newline='', encoding='utf-8') as f: | |
writer = csv.writer(f) | |
writer.writerow(['id', 'address', 'latitude', 'longitude', 'has_thumbnail']) | |
count = 0 | |
for sample in samples: | |
if sample.get('lat') is not None and sample.get('lng') is not None: | |
writer.writerow([ | |
sample['id'][:8], | |
sample.get('address', 'Unknown'), | |
sample['lat'], | |
sample['lng'], | |
'Yes' if sample.get('has_thumbnail') else 'No' | |
]) | |
count += 1 | |
print(f"β Exported {count} coordinates to {output_file}") | |
def main(): | |
import argparse | |
parser = argparse.ArgumentParser(description='View collected MapCrunch data') | |
parser.add_argument('--gallery', action='store_true', help='Create HTML gallery') | |
parser.add_argument('--grid', action='store_true', help='Show thumbnail grid') | |
parser.add_argument('--csv', action='store_true', help='Export coordinates to CSV') | |
parser.add_argument('--data', default='data/golden_labels.json', help='Data file path') | |
parser.add_argument('--max-images', type=int, default=50, help='Max images for gallery/grid') | |
args = parser.parse_args() | |
if not os.path.exists(args.data): | |
print(f"β Data file not found: {args.data}") | |
print("π‘ Run data collection first: python main.py --mode data --samples 50") | |
return | |
# Always show summary | |
view_data_summary(args.data) | |
# Additional actions | |
if args.gallery: | |
print() | |
create_thumbnail_gallery(args.data, max_images=args.max_images) | |
if args.grid: | |
print() | |
plot_thumbnails_grid(args.data, max_images=args.max_images) | |
if args.csv: | |
print() | |
export_coordinates_csv(args.data) | |
if __name__ == "__main__": | |
main() |