Spaces:
Running
Running
Andy Lee
commited on
Commit
·
a9dca21
1
Parent(s):
8d1d528
feat: persistent dataset by name
Browse files- .gitignore +12 -2
- benchmark.py +21 -19
- config.py +22 -5
- data_collector.py +65 -34
- datasets/asia/golden_labels.json +159 -0
- list_datasets.py +72 -0
- main.py +67 -21
.gitignore
CHANGED
@@ -2,6 +2,16 @@ venv/
|
|
2 |
.env
|
3 |
__pycache__
|
4 |
.DS_Store
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5 |
data/
|
6 |
-
!data/golden_labels.json
|
7 |
-
results/
|
|
|
2 |
.env
|
3 |
__pycache__
|
4 |
.DS_Store
|
5 |
+
|
6 |
+
# Results directory (temporary benchmark results)
|
7 |
+
results/
|
8 |
+
|
9 |
+
# Dataset thumbnails (too large for git, can be regenerated)
|
10 |
+
datasets/*/thumbnails/
|
11 |
+
|
12 |
+
# Keep the actual dataset files (golden_labels.json)
|
13 |
+
!datasets/*/golden_labels.json
|
14 |
+
|
15 |
+
# Legacy data directory (can be removed if no longer used)
|
16 |
data/
|
17 |
+
!data/golden_labels.json
|
|
benchmark.py
CHANGED
@@ -1,4 +1,4 @@
|
|
1 |
-
# benchmark.py (
|
2 |
|
3 |
import os
|
4 |
import json
|
@@ -9,18 +9,22 @@ from pathlib import Path
|
|
9 |
import math
|
10 |
|
11 |
from geo_bot import GeoBot
|
12 |
-
from config import
|
13 |
|
14 |
|
15 |
class MapGuesserBenchmark:
|
16 |
-
def __init__(self, headless: bool = False):
|
|
|
|
|
17 |
self.headless = headless
|
18 |
self.golden_labels = self.load_golden_labels()
|
19 |
-
print(
|
|
|
|
|
20 |
|
21 |
def load_golden_labels(self) -> List[Dict]:
|
22 |
try:
|
23 |
-
with open(
|
24 |
return json.load(f).get("samples", [])
|
25 |
except Exception:
|
26 |
return []
|
@@ -75,10 +79,11 @@ class MapGuesserBenchmark:
|
|
75 |
**kwargs,
|
76 |
) -> Dict:
|
77 |
if not self.golden_labels:
|
78 |
-
raise ValueError(
|
|
|
|
|
79 |
|
80 |
models_to_test = models or list(MODELS_CONFIG.keys())
|
81 |
-
# 使用 max_samples 限制测试样本数量
|
82 |
num_to_test = (
|
83 |
min(max_samples, len(self.golden_labels))
|
84 |
if max_samples is not None
|
@@ -86,7 +91,7 @@ class MapGuesserBenchmark:
|
|
86 |
)
|
87 |
test_samples = self.golden_labels[:num_to_test]
|
88 |
|
89 |
-
print(f"🚀 Starting
|
90 |
print(f" Models: {models_to_test}")
|
91 |
print(f" Samples: {len(test_samples)}")
|
92 |
print(f" Temperature: {temperature}")
|
@@ -105,7 +110,9 @@ class MapGuesserBenchmark:
|
|
105 |
temperature=temperature,
|
106 |
) as bot:
|
107 |
for i, sample in enumerate(test_samples):
|
108 |
-
print(
|
|
|
|
|
109 |
print(f"📍 Sample {i + 1}/{len(test_samples)}")
|
110 |
try:
|
111 |
result = self.run_single_test_with_bot(bot, sample)
|
@@ -154,9 +161,6 @@ class MapGuesserBenchmark:
|
|
154 |
|
155 |
bot.controller.setup_clean_environment()
|
156 |
|
157 |
-
## TODO add interactive mode to go ahead, turn around and zoom in/out
|
158 |
-
# Mat still need JS to operate but can use selenium to do it or wrap a MCP server
|
159 |
-
|
160 |
screenshot = bot.take_screenshot()
|
161 |
if not screenshot:
|
162 |
return {
|
@@ -169,14 +173,11 @@ class MapGuesserBenchmark:
|
|
169 |
predicted_lat_lon = bot.analyze_image(screenshot)
|
170 |
inference_time = time.time() - start_time
|
171 |
|
172 |
-
# **核心修复**: 从顶级的 "lat" 和 "lng" 键构造真实坐标字典
|
173 |
true_coords = {"lat": location_data.get("lat"), "lng": location_data.get("lng")}
|
174 |
|
175 |
true_location = location_data["address"]
|
176 |
print(f"🔍 True location: {true_location}")
|
177 |
-
# print true coords
|
178 |
print(f"🔍 True coords: {true_coords}")
|
179 |
-
# print predicted coords
|
180 |
print(f"🔍 Predicted coords: {predicted_lat_lon}")
|
181 |
distance_km = self.calculate_distance(true_coords, predicted_lat_lon)
|
182 |
|
@@ -193,16 +194,18 @@ class MapGuesserBenchmark:
|
|
193 |
}
|
194 |
|
195 |
def save_results(self, results: List[Dict]):
|
196 |
-
# ... (此函数不变) ...
|
197 |
if not results:
|
198 |
return
|
199 |
try:
|
200 |
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
201 |
-
results_dir = Path(
|
202 |
results_dir.mkdir(parents=True, exist_ok=True)
|
203 |
results_file = results_dir / f"benchmark_results_{timestamp}.json"
|
204 |
output_data = {
|
205 |
-
"metadata": {
|
|
|
|
|
|
|
206 |
"results": results,
|
207 |
}
|
208 |
with open(results_file, "w") as f:
|
@@ -212,7 +215,6 @@ class MapGuesserBenchmark:
|
|
212 |
print(f"❌ Error saving results: {e}")
|
213 |
|
214 |
def generate_summary(self, results: List[Dict]) -> Dict:
|
215 |
-
# ... (此函数不变) ...
|
216 |
summary = {}
|
217 |
by_model = {}
|
218 |
for r in results:
|
|
|
1 |
+
# benchmark.py (Updated for Named Datasets)
|
2 |
|
3 |
import os
|
4 |
import json
|
|
|
9 |
import math
|
10 |
|
11 |
from geo_bot import GeoBot
|
12 |
+
from config import get_data_paths, MODELS_CONFIG, SUCCESS_THRESHOLD_KM
|
13 |
|
14 |
|
15 |
class MapGuesserBenchmark:
|
16 |
+
def __init__(self, dataset_name: str = "default", headless: bool = False):
|
17 |
+
self.dataset_name = dataset_name
|
18 |
+
self.data_paths = get_data_paths(dataset_name)
|
19 |
self.headless = headless
|
20 |
self.golden_labels = self.load_golden_labels()
|
21 |
+
print(
|
22 |
+
f"📊 Loaded {len(self.golden_labels)} samples from dataset '{dataset_name}'"
|
23 |
+
)
|
24 |
|
25 |
def load_golden_labels(self) -> List[Dict]:
|
26 |
try:
|
27 |
+
with open(self.data_paths["golden_labels"], "r") as f:
|
28 |
return json.load(f).get("samples", [])
|
29 |
except Exception:
|
30 |
return []
|
|
|
79 |
**kwargs,
|
80 |
) -> Dict:
|
81 |
if not self.golden_labels:
|
82 |
+
raise ValueError(
|
83 |
+
f"No golden labels available in dataset '{self.dataset_name}'."
|
84 |
+
)
|
85 |
|
86 |
models_to_test = models or list(MODELS_CONFIG.keys())
|
|
|
87 |
num_to_test = (
|
88 |
min(max_samples, len(self.golden_labels))
|
89 |
if max_samples is not None
|
|
|
91 |
)
|
92 |
test_samples = self.golden_labels[:num_to_test]
|
93 |
|
94 |
+
print(f"🚀 Starting benchmark on dataset '{self.dataset_name}':")
|
95 |
print(f" Models: {models_to_test}")
|
96 |
print(f" Samples: {len(test_samples)}")
|
97 |
print(f" Temperature: {temperature}")
|
|
|
110 |
temperature=temperature,
|
111 |
) as bot:
|
112 |
for i, sample in enumerate(test_samples):
|
113 |
+
print(
|
114 |
+
"########################################################"
|
115 |
+
)
|
116 |
print(f"📍 Sample {i + 1}/{len(test_samples)}")
|
117 |
try:
|
118 |
result = self.run_single_test_with_bot(bot, sample)
|
|
|
161 |
|
162 |
bot.controller.setup_clean_environment()
|
163 |
|
|
|
|
|
|
|
164 |
screenshot = bot.take_screenshot()
|
165 |
if not screenshot:
|
166 |
return {
|
|
|
173 |
predicted_lat_lon = bot.analyze_image(screenshot)
|
174 |
inference_time = time.time() - start_time
|
175 |
|
|
|
176 |
true_coords = {"lat": location_data.get("lat"), "lng": location_data.get("lng")}
|
177 |
|
178 |
true_location = location_data["address"]
|
179 |
print(f"🔍 True location: {true_location}")
|
|
|
180 |
print(f"🔍 True coords: {true_coords}")
|
|
|
181 |
print(f"🔍 Predicted coords: {predicted_lat_lon}")
|
182 |
distance_km = self.calculate_distance(true_coords, predicted_lat_lon)
|
183 |
|
|
|
194 |
}
|
195 |
|
196 |
def save_results(self, results: List[Dict]):
|
|
|
197 |
if not results:
|
198 |
return
|
199 |
try:
|
200 |
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
201 |
+
results_dir = Path(self.data_paths["results"])
|
202 |
results_dir.mkdir(parents=True, exist_ok=True)
|
203 |
results_file = results_dir / f"benchmark_results_{timestamp}.json"
|
204 |
output_data = {
|
205 |
+
"metadata": {
|
206 |
+
"dataset_name": self.dataset_name,
|
207 |
+
"timestamp": datetime.now().isoformat(),
|
208 |
+
},
|
209 |
"results": results,
|
210 |
}
|
211 |
with open(results_file, "w") as f:
|
|
|
215 |
print(f"❌ Error saving results: {e}")
|
216 |
|
217 |
def generate_summary(self, results: List[Dict]) -> Dict:
|
|
|
218 |
summary = {}
|
219 |
by_model = {}
|
220 |
for r in results:
|
config.py
CHANGED
@@ -15,8 +15,17 @@ SELECTORS = {
|
|
15 |
# Data collection settings
|
16 |
DATA_COLLECTION_CONFIG = {
|
17 |
"wait_after_go": 3,
|
|
|
18 |
}
|
19 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
20 |
# Model configurations
|
21 |
MODELS_CONFIG = {
|
22 |
"gpt-4o": {
|
@@ -37,8 +46,16 @@ MODELS_CONFIG = {
|
|
37 |
},
|
38 |
}
|
39 |
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
"
|
44 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
15 |
# Data collection settings
|
16 |
DATA_COLLECTION_CONFIG = {
|
17 |
"wait_after_go": 3,
|
18 |
+
"thumbnail_size": (320, 240),
|
19 |
}
|
20 |
|
21 |
+
# Benchmark settings
|
22 |
+
BENCHMARK_CONFIG = {
|
23 |
+
"data_collection_samples": 50,
|
24 |
+
}
|
25 |
+
|
26 |
+
# MapCrunch options
|
27 |
+
MAPCRUNCH_OPTIONS = {}
|
28 |
+
|
29 |
# Model configurations
|
30 |
MODELS_CONFIG = {
|
31 |
"gpt-4o": {
|
|
|
46 |
},
|
47 |
}
|
48 |
|
49 |
+
|
50 |
+
# Data paths - now supports named datasets
|
51 |
+
def get_data_paths(dataset_name: str = "default"):
|
52 |
+
"""Get data paths for a specific dataset"""
|
53 |
+
return {
|
54 |
+
"golden_labels": f"datasets/{dataset_name}/golden_labels.json",
|
55 |
+
"thumbnails": f"datasets/{dataset_name}/thumbnails/",
|
56 |
+
"results": f"results/{dataset_name}/",
|
57 |
+
}
|
58 |
+
|
59 |
+
|
60 |
+
# Backward compatibility - default paths
|
61 |
+
DATA_PATHS = get_data_paths("default")
|
data_collector.py
CHANGED
@@ -1,4 +1,4 @@
|
|
1 |
-
# data_collector.py (
|
2 |
|
3 |
import os
|
4 |
import json
|
@@ -12,7 +12,7 @@ from io import BytesIO
|
|
12 |
|
13 |
from mapcrunch_controller import MapCrunchController
|
14 |
from config import (
|
15 |
-
|
16 |
BENCHMARK_CONFIG,
|
17 |
DATA_COLLECTION_CONFIG,
|
18 |
MAPCRUNCH_OPTIONS,
|
@@ -20,14 +20,21 @@ from config import (
|
|
20 |
|
21 |
|
22 |
class DataCollector:
|
23 |
-
def __init__(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
24 |
self.controller = MapCrunchController(headless=headless)
|
25 |
self.data = []
|
26 |
self.options = options or MAPCRUNCH_OPTIONS
|
27 |
self.setup_directories()
|
28 |
|
29 |
def setup_directories(self):
|
30 |
-
for path in
|
31 |
if path.endswith("/"):
|
32 |
Path(path).mkdir(parents=True, exist_ok=True)
|
33 |
else:
|
@@ -37,9 +44,9 @@ class DataCollector:
|
|
37 |
self, num_samples: Optional[int] = None, **kwargs
|
38 |
) -> List[Dict]:
|
39 |
num_samples = num_samples or BENCHMARK_CONFIG["data_collection_samples"]
|
40 |
-
print(
|
41 |
-
|
42 |
-
|
43 |
|
44 |
successful_samples = 0
|
45 |
while successful_samples < num_samples:
|
@@ -63,57 +70,69 @@ class DataCollector:
|
|
63 |
self.save_data()
|
64 |
return self.data
|
65 |
|
66 |
-
# 在 data_collector.py 中替换此函数
|
67 |
-
|
68 |
def collect_single_location(self) -> Optional[Dict]:
|
69 |
-
"""Collects a single location
|
70 |
try:
|
71 |
-
#
|
72 |
coords = self.controller.driver.execute_script(
|
73 |
"return { lat: window.panorama.getPosition().lat(), lng: window.panorama.getPosition().lng() };"
|
74 |
)
|
75 |
if not coords:
|
76 |
raise ValueError("Could not get coordinates.")
|
77 |
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
83 |
|
84 |
-
# **2. 核心修复:在Python中手动构建url_slug**
|
85 |
lat = coords.get("lat")
|
86 |
lng = coords.get("lng")
|
87 |
-
pov = identifiers.get("pov")
|
88 |
-
# MapCrunch的URL slug中,zoom是0-based,而Google POV是1-based
|
89 |
-
zoom_for_slug = round(pov.get("zoom", 1.0)) - 1
|
90 |
|
91 |
-
#
|
92 |
def round_num(n, d):
|
93 |
return f"{n:.{d}f}"
|
94 |
|
|
|
95 |
url_slug = (
|
96 |
f"{round_num(lat, 6)}_"
|
97 |
f"{round_num(lng, 6)}_"
|
98 |
-
f"{round_num(
|
99 |
-
f"{round_num(
|
100 |
f"{zoom_for_slug}"
|
101 |
)
|
102 |
|
103 |
-
# 3. 构建数据样本
|
104 |
sample_id = str(uuid.uuid4())
|
105 |
location_data = {
|
106 |
"id": sample_id,
|
107 |
"timestamp": datetime.now().isoformat(),
|
108 |
"lat": lat,
|
109 |
"lng": lng,
|
110 |
-
"address": address
|
111 |
-
"pano_id":
|
112 |
-
"pov":
|
113 |
-
|
|
|
|
|
|
|
|
|
114 |
}
|
115 |
|
116 |
-
#
|
117 |
thumbnail_path = self.save_thumbnail(sample_id)
|
118 |
if thumbnail_path:
|
119 |
location_data["thumbnail_path"] = thumbnail_path
|
@@ -124,38 +143,50 @@ class DataCollector:
|
|
124 |
print(f"❌ Error in collect_single_location: {e}")
|
125 |
return None
|
126 |
|
127 |
-
# ... (save_thumbnail, save_data 等其他函数保持不变) ...
|
128 |
def save_thumbnail(self, sample_id: str) -> Optional[str]:
|
129 |
try:
|
130 |
screenshot_bytes = self.controller.take_street_view_screenshot()
|
131 |
if not screenshot_bytes:
|
|
|
|
|
|
|
132 |
return None
|
|
|
133 |
image = Image.open(BytesIO(screenshot_bytes))
|
134 |
thumbnail_size = DATA_COLLECTION_CONFIG.get("thumbnail_size", (320, 240))
|
135 |
image.thumbnail(thumbnail_size, Image.Resampling.LANCZOS)
|
136 |
thumbnail_filename = f"{sample_id}.jpg"
|
137 |
-
thumbnail_path = os.path.join(
|
|
|
|
|
|
|
138 |
if image.mode in ("RGBA", "LA"):
|
139 |
rgb_image = Image.new("RGB", image.size, (255, 255, 255))
|
140 |
rgb_image.paste(image, mask=image.split()[-1])
|
141 |
image = rgb_image
|
|
|
142 |
image.save(thumbnail_path, "JPEG", quality=85)
|
|
|
143 |
return thumbnail_filename
|
144 |
-
except Exception:
|
|
|
145 |
return None
|
146 |
|
147 |
def save_data(self):
|
148 |
try:
|
149 |
output_data = {
|
150 |
"metadata": {
|
|
|
151 |
"collection_date": datetime.now().isoformat(),
|
152 |
"collection_options": self.options,
|
153 |
},
|
154 |
"samples": self.data,
|
155 |
}
|
156 |
-
with open(
|
157 |
json.dump(output_data, f, indent=2)
|
158 |
-
print(
|
|
|
|
|
159 |
except Exception as e:
|
160 |
print(f"❌ Error saving data: {e}")
|
161 |
|
|
|
1 |
+
# data_collector.py (Updated for Named Datasets)
|
2 |
|
3 |
import os
|
4 |
import json
|
|
|
12 |
|
13 |
from mapcrunch_controller import MapCrunchController
|
14 |
from config import (
|
15 |
+
get_data_paths,
|
16 |
BENCHMARK_CONFIG,
|
17 |
DATA_COLLECTION_CONFIG,
|
18 |
MAPCRUNCH_OPTIONS,
|
|
|
20 |
|
21 |
|
22 |
class DataCollector:
|
23 |
+
def __init__(
|
24 |
+
self,
|
25 |
+
dataset_name: str = "default",
|
26 |
+
headless: bool = False,
|
27 |
+
options: Optional[Dict] = None,
|
28 |
+
):
|
29 |
+
self.dataset_name = dataset_name
|
30 |
+
self.data_paths = get_data_paths(dataset_name)
|
31 |
self.controller = MapCrunchController(headless=headless)
|
32 |
self.data = []
|
33 |
self.options = options or MAPCRUNCH_OPTIONS
|
34 |
self.setup_directories()
|
35 |
|
36 |
def setup_directories(self):
|
37 |
+
for path in self.data_paths.values():
|
38 |
if path.endswith("/"):
|
39 |
Path(path).mkdir(parents=True, exist_ok=True)
|
40 |
else:
|
|
|
44 |
self, num_samples: Optional[int] = None, **kwargs
|
45 |
) -> List[Dict]:
|
46 |
num_samples = num_samples or BENCHMARK_CONFIG["data_collection_samples"]
|
47 |
+
print(
|
48 |
+
f"🚀 Collecting {num_samples} samples for dataset '{self.dataset_name}'..."
|
49 |
+
)
|
50 |
|
51 |
successful_samples = 0
|
52 |
while successful_samples < num_samples:
|
|
|
70 |
self.save_data()
|
71 |
return self.data
|
72 |
|
|
|
|
|
73 |
def collect_single_location(self) -> Optional[Dict]:
|
74 |
+
"""Collects a single location with simplified data collection."""
|
75 |
try:
|
76 |
+
# Get coordinates
|
77 |
coords = self.controller.driver.execute_script(
|
78 |
"return { lat: window.panorama.getPosition().lat(), lng: window.panorama.getPosition().lng() };"
|
79 |
)
|
80 |
if not coords:
|
81 |
raise ValueError("Could not get coordinates.")
|
82 |
|
83 |
+
# Get POV data directly from panorama
|
84 |
+
pov_data = self.controller.driver.execute_script("""
|
85 |
+
return {
|
86 |
+
heading: window.panorama.getPov().heading,
|
87 |
+
pitch: window.panorama.getPov().pitch,
|
88 |
+
zoom: window.panorama.getZoom(),
|
89 |
+
panoId: window.panorama.getPano()
|
90 |
+
};
|
91 |
+
""")
|
92 |
+
|
93 |
+
if not pov_data:
|
94 |
+
raise ValueError("Could not get POV data.")
|
95 |
+
|
96 |
+
# Get address (simplified)
|
97 |
+
address = "Unknown"
|
98 |
+
try:
|
99 |
+
address = self.controller.get_current_address() or "Unknown"
|
100 |
+
except:
|
101 |
+
pass # Address is optional
|
102 |
|
|
|
103 |
lat = coords.get("lat")
|
104 |
lng = coords.get("lng")
|
|
|
|
|
|
|
105 |
|
106 |
+
# Simplified URL slug construction
|
107 |
def round_num(n, d):
|
108 |
return f"{n:.{d}f}"
|
109 |
|
110 |
+
zoom_for_slug = max(0, round(pov_data.get("zoom", 1.0)) - 1)
|
111 |
url_slug = (
|
112 |
f"{round_num(lat, 6)}_"
|
113 |
f"{round_num(lng, 6)}_"
|
114 |
+
f"{round_num(pov_data.get('heading', 0), 2)}_"
|
115 |
+
f"{round_num(pov_data.get('pitch', 0) * -1, 2)}_"
|
116 |
f"{zoom_for_slug}"
|
117 |
)
|
118 |
|
|
|
119 |
sample_id = str(uuid.uuid4())
|
120 |
location_data = {
|
121 |
"id": sample_id,
|
122 |
"timestamp": datetime.now().isoformat(),
|
123 |
"lat": lat,
|
124 |
"lng": lng,
|
125 |
+
"address": address,
|
126 |
+
"pano_id": pov_data.get("panoId"),
|
127 |
+
"pov": {
|
128 |
+
"heading": pov_data.get("heading", 0),
|
129 |
+
"pitch": pov_data.get("pitch", 0),
|
130 |
+
"zoom": pov_data.get("zoom", 1.0),
|
131 |
+
},
|
132 |
+
"url_slug": url_slug,
|
133 |
}
|
134 |
|
135 |
+
# Try to save thumbnail (optional)
|
136 |
thumbnail_path = self.save_thumbnail(sample_id)
|
137 |
if thumbnail_path:
|
138 |
location_data["thumbnail_path"] = thumbnail_path
|
|
|
143 |
print(f"❌ Error in collect_single_location: {e}")
|
144 |
return None
|
145 |
|
|
|
146 |
def save_thumbnail(self, sample_id: str) -> Optional[str]:
|
147 |
try:
|
148 |
screenshot_bytes = self.controller.take_street_view_screenshot()
|
149 |
if not screenshot_bytes:
|
150 |
+
print(
|
151 |
+
f"⚠️ Could not take screenshot for {sample_id} (this is OK in headless mode)"
|
152 |
+
)
|
153 |
return None
|
154 |
+
|
155 |
image = Image.open(BytesIO(screenshot_bytes))
|
156 |
thumbnail_size = DATA_COLLECTION_CONFIG.get("thumbnail_size", (320, 240))
|
157 |
image.thumbnail(thumbnail_size, Image.Resampling.LANCZOS)
|
158 |
thumbnail_filename = f"{sample_id}.jpg"
|
159 |
+
thumbnail_path = os.path.join(
|
160 |
+
self.data_paths["thumbnails"], thumbnail_filename
|
161 |
+
)
|
162 |
+
|
163 |
if image.mode in ("RGBA", "LA"):
|
164 |
rgb_image = Image.new("RGB", image.size, (255, 255, 255))
|
165 |
rgb_image.paste(image, mask=image.split()[-1])
|
166 |
image = rgb_image
|
167 |
+
|
168 |
image.save(thumbnail_path, "JPEG", quality=85)
|
169 |
+
print(f"✅ Saved thumbnail for {sample_id}")
|
170 |
return thumbnail_filename
|
171 |
+
except Exception as e:
|
172 |
+
print(f"⚠️ Could not save thumbnail for {sample_id}: {e}")
|
173 |
return None
|
174 |
|
175 |
def save_data(self):
|
176 |
try:
|
177 |
output_data = {
|
178 |
"metadata": {
|
179 |
+
"dataset_name": self.dataset_name,
|
180 |
"collection_date": datetime.now().isoformat(),
|
181 |
"collection_options": self.options,
|
182 |
},
|
183 |
"samples": self.data,
|
184 |
}
|
185 |
+
with open(self.data_paths["golden_labels"], "w") as f:
|
186 |
json.dump(output_data, f, indent=2)
|
187 |
+
print(
|
188 |
+
f"\n💾 Dataset '{self.dataset_name}' saved to {self.data_paths['golden_labels']}"
|
189 |
+
)
|
190 |
except Exception as e:
|
191 |
print(f"❌ Error saving data: {e}")
|
192 |
|
datasets/asia/golden_labels.json
ADDED
@@ -0,0 +1,159 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"metadata": {
|
3 |
+
"dataset_name": "asia",
|
4 |
+
"collection_date": "2025-06-11T21:13:45.005091",
|
5 |
+
"collection_options": {}
|
6 |
+
},
|
7 |
+
"samples": [
|
8 |
+
{
|
9 |
+
"id": "fdbb9997-c07c-4d4d-9095-82f162f0c27a",
|
10 |
+
"timestamp": "2025-06-11T21:13:15.310368",
|
11 |
+
"lat": 42.1322878067665,
|
12 |
+
"lng": 26.787410093767097,
|
13 |
+
"address": "Unknown",
|
14 |
+
"pano_id": "gsRLllGBndoh4EMBklXL9Q",
|
15 |
+
"pov": {
|
16 |
+
"heading": 240.93000000000006,
|
17 |
+
"pitch": 5,
|
18 |
+
"zoom": 1.0000051533649421
|
19 |
+
},
|
20 |
+
"url_slug": "42.132288_26.787410_240.93_-5.00_0",
|
21 |
+
"thumbnail_path": "fdbb9997-c07c-4d4d-9095-82f162f0c27a.jpg"
|
22 |
+
},
|
23 |
+
{
|
24 |
+
"id": "05ac7262-54b6-4b81-b9be-85d0830d7ed1",
|
25 |
+
"timestamp": "2025-06-11T21:13:18.720025",
|
26 |
+
"lat": 50.815127864126566,
|
27 |
+
"lng": 3.3295800788973042,
|
28 |
+
"address": "Unknown",
|
29 |
+
"pano_id": "lw4NuJ2I82JRsk5y8N7gGA",
|
30 |
+
"pov": {
|
31 |
+
"heading": -262.06999999999994,
|
32 |
+
"pitch": 5,
|
33 |
+
"zoom": 1.0000070740241276
|
34 |
+
},
|
35 |
+
"url_slug": "50.815128_3.329580_-262.07_-5.00_0",
|
36 |
+
"thumbnail_path": "05ac7262-54b6-4b81-b9be-85d0830d7ed1.jpg"
|
37 |
+
},
|
38 |
+
{
|
39 |
+
"id": "9fff9f32-e6f0-4f5f-a476-d019ec8b5bf2",
|
40 |
+
"timestamp": "2025-06-11T21:13:21.994062",
|
41 |
+
"lat": 45.43514041007389,
|
42 |
+
"lng": 21.364097624705536,
|
43 |
+
"address": "Unknown",
|
44 |
+
"pano_id": "XLiyeDvQ9SoaSaDBxn3GDA",
|
45 |
+
"pov": {
|
46 |
+
"heading": -168.06999999999994,
|
47 |
+
"pitch": 5,
|
48 |
+
"zoom": 1.000006919588194
|
49 |
+
},
|
50 |
+
"url_slug": "45.435140_21.364098_-168.07_-5.00_0",
|
51 |
+
"thumbnail_path": "9fff9f32-e6f0-4f5f-a476-d019ec8b5bf2.jpg"
|
52 |
+
},
|
53 |
+
{
|
54 |
+
"id": "2da4a9b9-e597-46e9-8c9d-8701f0c63462",
|
55 |
+
"timestamp": "2025-06-11T21:13:25.252476",
|
56 |
+
"lat": 42.30902518065906,
|
57 |
+
"lng": 77.8748629197877,
|
58 |
+
"address": "Unknown",
|
59 |
+
"pano_id": "t0HJFo38t3rh1U6W2OZ_VA",
|
60 |
+
"pov": {
|
61 |
+
"heading": 5.930000000000064,
|
62 |
+
"pitch": 5,
|
63 |
+
"zoom": 1.0000092331912114
|
64 |
+
},
|
65 |
+
"url_slug": "42.309025_77.874863_5.93_-5.00_0",
|
66 |
+
"thumbnail_path": "2da4a9b9-e597-46e9-8c9d-8701f0c63462.jpg"
|
67 |
+
},
|
68 |
+
{
|
69 |
+
"id": "b93858db-454e-4cc7-8f61-a3fe09cb0cab",
|
70 |
+
"timestamp": "2025-06-11T21:13:28.569143",
|
71 |
+
"lat": 14.647613688319248,
|
72 |
+
"lng": -16.980851505792,
|
73 |
+
"address": "Unknown",
|
74 |
+
"pano_id": "GE8DtAXvn2qZuSALopw8xA",
|
75 |
+
"pov": {
|
76 |
+
"heading": 270.93000000000006,
|
77 |
+
"pitch": 5,
|
78 |
+
"zoom": 1.0000015226193344
|
79 |
+
},
|
80 |
+
"url_slug": "14.647614_-16.980852_270.93_-5.00_0",
|
81 |
+
"thumbnail_path": "b93858db-454e-4cc7-8f61-a3fe09cb0cab.jpg"
|
82 |
+
},
|
83 |
+
{
|
84 |
+
"id": "1f0e20f1-3687-4939-be23-7c7b490cc707",
|
85 |
+
"timestamp": "2025-06-11T21:13:31.763851",
|
86 |
+
"lat": 11.208463091095442,
|
87 |
+
"lng": 105.72569729813453,
|
88 |
+
"address": "Unknown",
|
89 |
+
"pano_id": "2W3x5T-dMOrMJO57YtGq2Q",
|
90 |
+
"pov": {
|
91 |
+
"heading": -316.06999999999994,
|
92 |
+
"pitch": 5,
|
93 |
+
"zoom": 1.000004517085056
|
94 |
+
},
|
95 |
+
"url_slug": "11.208463_105.725697_-316.07_-5.00_0",
|
96 |
+
"thumbnail_path": "1f0e20f1-3687-4939-be23-7c7b490cc707.jpg"
|
97 |
+
},
|
98 |
+
{
|
99 |
+
"id": "7a2049cc-959c-4948-8574-0ffe9950e86e",
|
100 |
+
"timestamp": "2025-06-11T21:13:35.017287",
|
101 |
+
"lat": 40.668879231679576,
|
102 |
+
"lng": -8.21452809466328,
|
103 |
+
"address": "Unknown",
|
104 |
+
"pano_id": "bYHqbKkNgUUffaYf6fcKBQ",
|
105 |
+
"pov": {
|
106 |
+
"heading": -86.06999999999994,
|
107 |
+
"pitch": 5,
|
108 |
+
"zoom": 1.0000035631232127
|
109 |
+
},
|
110 |
+
"url_slug": "40.668879_-8.214528_-86.07_-5.00_0",
|
111 |
+
"thumbnail_path": "7a2049cc-959c-4948-8574-0ffe9950e86e.jpg"
|
112 |
+
},
|
113 |
+
{
|
114 |
+
"id": "fec89220-b130-49b3-8c19-f7e2e9551acf",
|
115 |
+
"timestamp": "2025-06-11T21:13:38.277525",
|
116 |
+
"lat": 5.459666786657994,
|
117 |
+
"lng": -2.1135681235966626,
|
118 |
+
"address": "Unknown",
|
119 |
+
"pano_id": "AaoI1zHJ4Pf18j94UXqksA",
|
120 |
+
"pov": {
|
121 |
+
"heading": 122.93000000000006,
|
122 |
+
"pitch": 5,
|
123 |
+
"zoom": 1.0000018586016313
|
124 |
+
},
|
125 |
+
"url_slug": "5.459667_-2.113568_122.93_-5.00_0",
|
126 |
+
"thumbnail_path": "fec89220-b130-49b3-8c19-f7e2e9551acf.jpg"
|
127 |
+
},
|
128 |
+
{
|
129 |
+
"id": "0fd8d569-c98a-4af3-b08c-173121c76043",
|
130 |
+
"timestamp": "2025-06-11T21:13:41.520505",
|
131 |
+
"lat": 1.3921814170475024,
|
132 |
+
"lng": 103.98320353936504,
|
133 |
+
"address": "Unknown",
|
134 |
+
"pano_id": "CAoSF0NJSE0wb2dLRUlDQWdJQ2t3T1h4bWdF",
|
135 |
+
"pov": {
|
136 |
+
"heading": 278.93000000000006,
|
137 |
+
"pitch": 5,
|
138 |
+
"zoom": 1.0000078101439185
|
139 |
+
},
|
140 |
+
"url_slug": "1.392181_103.983204_278.93_-5.00_0",
|
141 |
+
"thumbnail_path": "0fd8d569-c98a-4af3-b08c-173121c76043.jpg"
|
142 |
+
},
|
143 |
+
{
|
144 |
+
"id": "2426fad0-aeda-426a-96ad-f16724c360ce",
|
145 |
+
"timestamp": "2025-06-11T21:13:44.746699",
|
146 |
+
"lat": 51.89795854217673,
|
147 |
+
"lng": 4.96480321921333,
|
148 |
+
"address": "Unknown",
|
149 |
+
"pano_id": "bxBt_sZjG7ocUqPSmD1X0Q",
|
150 |
+
"pov": {
|
151 |
+
"heading": -175.06999999999994,
|
152 |
+
"pitch": 5,
|
153 |
+
"zoom": 1.0000093258133977
|
154 |
+
},
|
155 |
+
"url_slug": "51.897959_4.964803_-175.07_-5.00_0",
|
156 |
+
"thumbnail_path": "2426fad0-aeda-426a-96ad-f16724c360ce.jpg"
|
157 |
+
}
|
158 |
+
]
|
159 |
+
}
|
list_datasets.py
ADDED
@@ -0,0 +1,72 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python3
|
2 |
+
"""
|
3 |
+
Utility script to list available datasets
|
4 |
+
"""
|
5 |
+
|
6 |
+
import json
|
7 |
+
import os
|
8 |
+
from pathlib import Path
|
9 |
+
from config import get_data_paths
|
10 |
+
|
11 |
+
|
12 |
+
def list_datasets():
|
13 |
+
"""List all available datasets"""
|
14 |
+
datasets_dir = Path("datasets")
|
15 |
+
if not datasets_dir.exists():
|
16 |
+
print("No datasets directory found.")
|
17 |
+
return []
|
18 |
+
|
19 |
+
datasets = []
|
20 |
+
for dataset_dir in datasets_dir.iterdir():
|
21 |
+
if dataset_dir.is_dir():
|
22 |
+
dataset_name = dataset_dir.name
|
23 |
+
data_paths = get_data_paths(dataset_name)
|
24 |
+
golden_labels_path = data_paths["golden_labels"]
|
25 |
+
|
26 |
+
if os.path.exists(golden_labels_path):
|
27 |
+
try:
|
28 |
+
with open(golden_labels_path, "r") as f:
|
29 |
+
data = json.load(f)
|
30 |
+
samples = data.get("samples", [])
|
31 |
+
metadata = data.get("metadata", {})
|
32 |
+
|
33 |
+
datasets.append(
|
34 |
+
{
|
35 |
+
"name": dataset_name,
|
36 |
+
"samples": len(samples),
|
37 |
+
"created": metadata.get("collection_date", "Unknown"),
|
38 |
+
"path": golden_labels_path,
|
39 |
+
}
|
40 |
+
)
|
41 |
+
except Exception as e:
|
42 |
+
print(f"❌ Error reading dataset '{dataset_name}': {e}")
|
43 |
+
|
44 |
+
return datasets
|
45 |
+
|
46 |
+
|
47 |
+
def main():
|
48 |
+
print("📊 Available Datasets:")
|
49 |
+
print("=" * 50)
|
50 |
+
|
51 |
+
datasets = list_datasets()
|
52 |
+
|
53 |
+
if not datasets:
|
54 |
+
print("No datasets found.")
|
55 |
+
print("\nTo create a new dataset, run:")
|
56 |
+
print("python main.py --mode collect --dataset <name> --samples <count>")
|
57 |
+
return
|
58 |
+
|
59 |
+
for dataset in sorted(datasets, key=lambda x: x["name"]):
|
60 |
+
print(f"Dataset: {dataset['name']}")
|
61 |
+
print(f" Samples: {dataset['samples']}")
|
62 |
+
print(f" Created: {dataset['created']}")
|
63 |
+
print(f" Path: {dataset['path']}")
|
64 |
+
print()
|
65 |
+
|
66 |
+
print("To use a dataset, run:")
|
67 |
+
print("python main.py --mode benchmark --dataset <name>")
|
68 |
+
print("python main.py --mode agent --dataset <name>")
|
69 |
+
|
70 |
+
|
71 |
+
if __name__ == "__main__":
|
72 |
+
main()
|
main.py
CHANGED
@@ -9,49 +9,62 @@ from langchain_google_genai import ChatGoogleGenerativeAI
|
|
9 |
|
10 |
from geo_bot import GeoBot
|
11 |
from benchmark import MapGuesserBenchmark
|
12 |
-
from
|
13 |
-
|
14 |
-
|
15 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
16 |
"""
|
17 |
Runs the AI Agent in a benchmark loop over multiple samples,
|
18 |
using multi-step exploration for each.
|
19 |
"""
|
20 |
print(
|
21 |
-
f"Starting Agent Mode
|
22 |
)
|
23 |
|
|
|
24 |
try:
|
25 |
-
with open(
|
26 |
golden_labels = json.load(f).get("samples", [])
|
27 |
except FileNotFoundError:
|
28 |
-
print(
|
|
|
|
|
29 |
return
|
30 |
|
31 |
if not golden_labels:
|
32 |
-
print("Error: No samples found in
|
33 |
return
|
34 |
|
35 |
num_to_test = min(samples, len(golden_labels))
|
36 |
test_samples = golden_labels[:num_to_test]
|
37 |
-
print(f"Will run on {len(test_samples)} samples.")
|
38 |
|
39 |
config = MODELS_CONFIG.get(model_name)
|
40 |
model_class = globals()[config["class"]]
|
41 |
model_instance_name = config["model_name"]
|
42 |
|
43 |
-
benchmark_helper = MapGuesserBenchmark(headless=True)
|
44 |
all_results = []
|
45 |
|
46 |
with GeoBot(
|
47 |
-
model=model_class,
|
|
|
|
|
|
|
48 |
) as bot:
|
49 |
for i, sample in enumerate(test_samples):
|
50 |
print(
|
51 |
f"\n--- Running Sample {i + 1}/{len(test_samples)} (ID: {sample.get('id')}) ---"
|
52 |
)
|
53 |
|
54 |
-
# **FIXED**: Correct sequence: Load Data -> Clean Environment -> Run Loop
|
55 |
if not bot.controller.load_location_from_data(sample):
|
56 |
print(
|
57 |
f" ❌ Failed to load location for sample {sample.get('id')}. Skipping."
|
@@ -98,36 +111,61 @@ def agent_mode(model_name: str, steps: int, headless: bool, samples: int, temper
|
|
98 |
|
99 |
summary = benchmark_helper.generate_summary(all_results)
|
100 |
if summary:
|
101 |
-
print(
|
|
|
|
|
102 |
for model, stats in summary.items():
|
103 |
print(f"Model: {model}")
|
104 |
print(f" Success Rate: {stats['success_rate'] * 100:.1f}%")
|
105 |
print(f" Avg Distance: {stats['average_distance_km']:.1f} km")
|
106 |
|
107 |
-
print("
|
108 |
|
109 |
|
110 |
-
def benchmark_mode(
|
|
|
|
|
|
|
|
|
|
|
|
|
111 |
"""Runs the benchmark on pre-collected data."""
|
112 |
-
print(
|
113 |
-
|
114 |
-
|
|
|
|
|
|
|
|
|
115 |
if summary:
|
116 |
-
print("\n--- Benchmark Complete! Summary ---")
|
117 |
for model, stats in summary.items():
|
118 |
print(f"Model: {model}")
|
119 |
print(f" Success Rate: {stats['success_rate'] * 100:.1f}%")
|
120 |
print(f" Avg Distance: {stats['average_distance_km']:.1f} km")
|
121 |
|
122 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
123 |
def main():
|
124 |
parser = argparse.ArgumentParser(description="MapCrunch AI Agent & Benchmark")
|
125 |
parser.add_argument(
|
126 |
"--mode",
|
127 |
-
choices=["agent", "benchmark"],
|
128 |
default="agent",
|
129 |
help="Operation mode.",
|
130 |
)
|
|
|
|
|
|
|
|
|
|
|
131 |
parser.add_argument(
|
132 |
"--model",
|
133 |
choices=list(MODELS_CONFIG.keys()),
|
@@ -161,12 +199,19 @@ def main():
|
|
161 |
|
162 |
args = parser.parse_args()
|
163 |
|
164 |
-
if args.mode == "
|
|
|
|
|
|
|
|
|
|
|
|
|
165 |
agent_mode(
|
166 |
model_name=args.model,
|
167 |
steps=args.steps,
|
168 |
headless=args.headless,
|
169 |
samples=args.samples,
|
|
|
170 |
temperature=args.temperature,
|
171 |
)
|
172 |
elif args.mode == "benchmark":
|
@@ -174,6 +219,7 @@ def main():
|
|
174 |
models=args.models or [args.model],
|
175 |
samples=args.samples,
|
176 |
headless=args.headless,
|
|
|
177 |
temperature=args.temperature,
|
178 |
)
|
179 |
|
|
|
9 |
|
10 |
from geo_bot import GeoBot
|
11 |
from benchmark import MapGuesserBenchmark
|
12 |
+
from data_collector import DataCollector
|
13 |
+
from config import MODELS_CONFIG, get_data_paths, SUCCESS_THRESHOLD_KM
|
14 |
+
|
15 |
+
|
16 |
+
def agent_mode(
|
17 |
+
model_name: str,
|
18 |
+
steps: int,
|
19 |
+
headless: bool,
|
20 |
+
samples: int,
|
21 |
+
dataset_name: str = "default",
|
22 |
+
temperature: float = 0.0,
|
23 |
+
):
|
24 |
"""
|
25 |
Runs the AI Agent in a benchmark loop over multiple samples,
|
26 |
using multi-step exploration for each.
|
27 |
"""
|
28 |
print(
|
29 |
+
f"Starting Agent Mode: model={model_name}, steps={steps}, samples={samples}, dataset={dataset_name}, temperature={temperature}"
|
30 |
)
|
31 |
|
32 |
+
data_paths = get_data_paths(dataset_name)
|
33 |
try:
|
34 |
+
with open(data_paths["golden_labels"], "r", encoding="utf-8") as f:
|
35 |
golden_labels = json.load(f).get("samples", [])
|
36 |
except FileNotFoundError:
|
37 |
+
print(
|
38 |
+
f"Error: Dataset '{dataset_name}' not found at {data_paths['golden_labels']}."
|
39 |
+
)
|
40 |
return
|
41 |
|
42 |
if not golden_labels:
|
43 |
+
print(f"Error: No samples found in dataset '{dataset_name}'.")
|
44 |
return
|
45 |
|
46 |
num_to_test = min(samples, len(golden_labels))
|
47 |
test_samples = golden_labels[:num_to_test]
|
48 |
+
print(f"Will run on {len(test_samples)} samples from dataset '{dataset_name}'.")
|
49 |
|
50 |
config = MODELS_CONFIG.get(model_name)
|
51 |
model_class = globals()[config["class"]]
|
52 |
model_instance_name = config["model_name"]
|
53 |
|
54 |
+
benchmark_helper = MapGuesserBenchmark(dataset_name=dataset_name, headless=True)
|
55 |
all_results = []
|
56 |
|
57 |
with GeoBot(
|
58 |
+
model=model_class,
|
59 |
+
model_name=model_instance_name,
|
60 |
+
headless=headless,
|
61 |
+
temperature=temperature,
|
62 |
) as bot:
|
63 |
for i, sample in enumerate(test_samples):
|
64 |
print(
|
65 |
f"\n--- Running Sample {i + 1}/{len(test_samples)} (ID: {sample.get('id')}) ---"
|
66 |
)
|
67 |
|
|
|
68 |
if not bot.controller.load_location_from_data(sample):
|
69 |
print(
|
70 |
f" ❌ Failed to load location for sample {sample.get('id')}. Skipping."
|
|
|
111 |
|
112 |
summary = benchmark_helper.generate_summary(all_results)
|
113 |
if summary:
|
114 |
+
print(
|
115 |
+
f"\n\n--- Agent Benchmark Complete for dataset '{dataset_name}'! Summary ---"
|
116 |
+
)
|
117 |
for model, stats in summary.items():
|
118 |
print(f"Model: {model}")
|
119 |
print(f" Success Rate: {stats['success_rate'] * 100:.1f}%")
|
120 |
print(f" Avg Distance: {stats['average_distance_km']:.1f} km")
|
121 |
|
122 |
+
print("Agent Mode finished.")
|
123 |
|
124 |
|
125 |
+
def benchmark_mode(
|
126 |
+
models: list,
|
127 |
+
samples: int,
|
128 |
+
headless: bool,
|
129 |
+
dataset_name: str = "default",
|
130 |
+
temperature: float = 0.0,
|
131 |
+
):
|
132 |
"""Runs the benchmark on pre-collected data."""
|
133 |
+
print(
|
134 |
+
f"Starting Benchmark Mode: models={models}, samples={samples}, dataset={dataset_name}, temperature={temperature}"
|
135 |
+
)
|
136 |
+
benchmark = MapGuesserBenchmark(dataset_name=dataset_name, headless=headless)
|
137 |
+
summary = benchmark.run_benchmark(
|
138 |
+
models=models, max_samples=samples, temperature=temperature
|
139 |
+
)
|
140 |
if summary:
|
141 |
+
print(f"\n--- Benchmark Complete for dataset '{dataset_name}'! Summary ---")
|
142 |
for model, stats in summary.items():
|
143 |
print(f"Model: {model}")
|
144 |
print(f" Success Rate: {stats['success_rate'] * 100:.1f}%")
|
145 |
print(f" Avg Distance: {stats['average_distance_km']:.1f} km")
|
146 |
|
147 |
|
148 |
+
def collect_mode(dataset_name: str, samples: int, headless: bool):
|
149 |
+
"""Collects data for a new dataset."""
|
150 |
+
print(f"Starting Data Collection: dataset={dataset_name}, samples={samples}")
|
151 |
+
with DataCollector(dataset_name=dataset_name, headless=headless) as collector:
|
152 |
+
collector.collect_samples(num_samples=samples)
|
153 |
+
print(f"Data collection complete for dataset '{dataset_name}'.")
|
154 |
+
|
155 |
+
|
156 |
def main():
|
157 |
parser = argparse.ArgumentParser(description="MapCrunch AI Agent & Benchmark")
|
158 |
parser.add_argument(
|
159 |
"--mode",
|
160 |
+
choices=["agent", "benchmark", "collect"],
|
161 |
default="agent",
|
162 |
help="Operation mode.",
|
163 |
)
|
164 |
+
parser.add_argument(
|
165 |
+
"--dataset",
|
166 |
+
default="default",
|
167 |
+
help="Dataset name to use or create.",
|
168 |
+
)
|
169 |
parser.add_argument(
|
170 |
"--model",
|
171 |
choices=list(MODELS_CONFIG.keys()),
|
|
|
199 |
|
200 |
args = parser.parse_args()
|
201 |
|
202 |
+
if args.mode == "collect":
|
203 |
+
collect_mode(
|
204 |
+
dataset_name=args.dataset,
|
205 |
+
samples=args.samples,
|
206 |
+
headless=args.headless,
|
207 |
+
)
|
208 |
+
elif args.mode == "agent":
|
209 |
agent_mode(
|
210 |
model_name=args.model,
|
211 |
steps=args.steps,
|
212 |
headless=args.headless,
|
213 |
samples=args.samples,
|
214 |
+
dataset_name=args.dataset,
|
215 |
temperature=args.temperature,
|
216 |
)
|
217 |
elif args.mode == "benchmark":
|
|
|
219 |
models=args.models or [args.model],
|
220 |
samples=args.samples,
|
221 |
headless=args.headless,
|
222 |
+
dataset_name=args.dataset,
|
223 |
temperature=args.temperature,
|
224 |
)
|
225 |
|