Andy Lee commited on
Commit
a9dca21
·
1 Parent(s): 8d1d528

feat: persistent dataset by name

Browse files
Files changed (7) hide show
  1. .gitignore +12 -2
  2. benchmark.py +21 -19
  3. config.py +22 -5
  4. data_collector.py +65 -34
  5. datasets/asia/golden_labels.json +159 -0
  6. list_datasets.py +72 -0
  7. main.py +67 -21
.gitignore CHANGED
@@ -2,6 +2,16 @@ venv/
2
  .env
3
  __pycache__
4
  .DS_Store
 
 
 
 
 
 
 
 
 
 
 
5
  data/
6
- !data/golden_labels.json
7
- results/
 
2
  .env
3
  __pycache__
4
  .DS_Store
5
+
6
+ # Results directory (temporary benchmark results)
7
+ results/
8
+
9
+ # Dataset thumbnails (too large for git, can be regenerated)
10
+ datasets/*/thumbnails/
11
+
12
+ # Keep the actual dataset files (golden_labels.json)
13
+ !datasets/*/golden_labels.json
14
+
15
+ # Legacy data directory (can be removed if no longer used)
16
  data/
17
+ !data/golden_labels.json
 
benchmark.py CHANGED
@@ -1,4 +1,4 @@
1
- # benchmark.py (Final Fix)
2
 
3
  import os
4
  import json
@@ -9,18 +9,22 @@ from pathlib import Path
9
  import math
10
 
11
  from geo_bot import GeoBot
12
- from config import DATA_PATHS, MODELS_CONFIG, SUCCESS_THRESHOLD_KM
13
 
14
 
15
  class MapGuesserBenchmark:
16
- def __init__(self, headless: bool = False):
 
 
17
  self.headless = headless
18
  self.golden_labels = self.load_golden_labels()
19
- print(f"📊 Loaded {len(self.golden_labels)} golden label samples")
 
 
20
 
21
  def load_golden_labels(self) -> List[Dict]:
22
  try:
23
- with open(DATA_PATHS["golden_labels"], "r") as f:
24
  return json.load(f).get("samples", [])
25
  except Exception:
26
  return []
@@ -75,10 +79,11 @@ class MapGuesserBenchmark:
75
  **kwargs,
76
  ) -> Dict:
77
  if not self.golden_labels:
78
- raise ValueError("No golden labels available.")
 
 
79
 
80
  models_to_test = models or list(MODELS_CONFIG.keys())
81
- # 使用 max_samples 限制测试样本数量
82
  num_to_test = (
83
  min(max_samples, len(self.golden_labels))
84
  if max_samples is not None
@@ -86,7 +91,7 @@ class MapGuesserBenchmark:
86
  )
87
  test_samples = self.golden_labels[:num_to_test]
88
 
89
- print(f"🚀 Starting LIVE benchmark:")
90
  print(f" Models: {models_to_test}")
91
  print(f" Samples: {len(test_samples)}")
92
  print(f" Temperature: {temperature}")
@@ -105,7 +110,9 @@ class MapGuesserBenchmark:
105
  temperature=temperature,
106
  ) as bot:
107
  for i, sample in enumerate(test_samples):
108
- print('########################################################')
 
 
109
  print(f"📍 Sample {i + 1}/{len(test_samples)}")
110
  try:
111
  result = self.run_single_test_with_bot(bot, sample)
@@ -154,9 +161,6 @@ class MapGuesserBenchmark:
154
 
155
  bot.controller.setup_clean_environment()
156
 
157
- ## TODO add interactive mode to go ahead, turn around and zoom in/out
158
- # Mat still need JS to operate but can use selenium to do it or wrap a MCP server
159
-
160
  screenshot = bot.take_screenshot()
161
  if not screenshot:
162
  return {
@@ -169,14 +173,11 @@ class MapGuesserBenchmark:
169
  predicted_lat_lon = bot.analyze_image(screenshot)
170
  inference_time = time.time() - start_time
171
 
172
- # **核心修复**: 从顶级的 "lat" 和 "lng" 键构造真实坐标字典
173
  true_coords = {"lat": location_data.get("lat"), "lng": location_data.get("lng")}
174
 
175
  true_location = location_data["address"]
176
  print(f"🔍 True location: {true_location}")
177
- # print true coords
178
  print(f"🔍 True coords: {true_coords}")
179
- # print predicted coords
180
  print(f"🔍 Predicted coords: {predicted_lat_lon}")
181
  distance_km = self.calculate_distance(true_coords, predicted_lat_lon)
182
 
@@ -193,16 +194,18 @@ class MapGuesserBenchmark:
193
  }
194
 
195
  def save_results(self, results: List[Dict]):
196
- # ... (此函数不变) ...
197
  if not results:
198
  return
199
  try:
200
  timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
201
- results_dir = Path(DATA_PATHS["results"])
202
  results_dir.mkdir(parents=True, exist_ok=True)
203
  results_file = results_dir / f"benchmark_results_{timestamp}.json"
204
  output_data = {
205
- "metadata": {"timestamp": datetime.now().isoformat()},
 
 
 
206
  "results": results,
207
  }
208
  with open(results_file, "w") as f:
@@ -212,7 +215,6 @@ class MapGuesserBenchmark:
212
  print(f"❌ Error saving results: {e}")
213
 
214
  def generate_summary(self, results: List[Dict]) -> Dict:
215
- # ... (此函数不变) ...
216
  summary = {}
217
  by_model = {}
218
  for r in results:
 
1
+ # benchmark.py (Updated for Named Datasets)
2
 
3
  import os
4
  import json
 
9
  import math
10
 
11
  from geo_bot import GeoBot
12
+ from config import get_data_paths, MODELS_CONFIG, SUCCESS_THRESHOLD_KM
13
 
14
 
15
  class MapGuesserBenchmark:
16
+ def __init__(self, dataset_name: str = "default", headless: bool = False):
17
+ self.dataset_name = dataset_name
18
+ self.data_paths = get_data_paths(dataset_name)
19
  self.headless = headless
20
  self.golden_labels = self.load_golden_labels()
21
+ print(
22
+ f"📊 Loaded {len(self.golden_labels)} samples from dataset '{dataset_name}'"
23
+ )
24
 
25
  def load_golden_labels(self) -> List[Dict]:
26
  try:
27
+ with open(self.data_paths["golden_labels"], "r") as f:
28
  return json.load(f).get("samples", [])
29
  except Exception:
30
  return []
 
79
  **kwargs,
80
  ) -> Dict:
81
  if not self.golden_labels:
82
+ raise ValueError(
83
+ f"No golden labels available in dataset '{self.dataset_name}'."
84
+ )
85
 
86
  models_to_test = models or list(MODELS_CONFIG.keys())
 
87
  num_to_test = (
88
  min(max_samples, len(self.golden_labels))
89
  if max_samples is not None
 
91
  )
92
  test_samples = self.golden_labels[:num_to_test]
93
 
94
+ print(f"🚀 Starting benchmark on dataset '{self.dataset_name}':")
95
  print(f" Models: {models_to_test}")
96
  print(f" Samples: {len(test_samples)}")
97
  print(f" Temperature: {temperature}")
 
110
  temperature=temperature,
111
  ) as bot:
112
  for i, sample in enumerate(test_samples):
113
+ print(
114
+ "########################################################"
115
+ )
116
  print(f"📍 Sample {i + 1}/{len(test_samples)}")
117
  try:
118
  result = self.run_single_test_with_bot(bot, sample)
 
161
 
162
  bot.controller.setup_clean_environment()
163
 
 
 
 
164
  screenshot = bot.take_screenshot()
165
  if not screenshot:
166
  return {
 
173
  predicted_lat_lon = bot.analyze_image(screenshot)
174
  inference_time = time.time() - start_time
175
 
 
176
  true_coords = {"lat": location_data.get("lat"), "lng": location_data.get("lng")}
177
 
178
  true_location = location_data["address"]
179
  print(f"🔍 True location: {true_location}")
 
180
  print(f"🔍 True coords: {true_coords}")
 
181
  print(f"🔍 Predicted coords: {predicted_lat_lon}")
182
  distance_km = self.calculate_distance(true_coords, predicted_lat_lon)
183
 
 
194
  }
195
 
196
  def save_results(self, results: List[Dict]):
 
197
  if not results:
198
  return
199
  try:
200
  timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
201
+ results_dir = Path(self.data_paths["results"])
202
  results_dir.mkdir(parents=True, exist_ok=True)
203
  results_file = results_dir / f"benchmark_results_{timestamp}.json"
204
  output_data = {
205
+ "metadata": {
206
+ "dataset_name": self.dataset_name,
207
+ "timestamp": datetime.now().isoformat(),
208
+ },
209
  "results": results,
210
  }
211
  with open(results_file, "w") as f:
 
215
  print(f"❌ Error saving results: {e}")
216
 
217
  def generate_summary(self, results: List[Dict]) -> Dict:
 
218
  summary = {}
219
  by_model = {}
220
  for r in results:
config.py CHANGED
@@ -15,8 +15,17 @@ SELECTORS = {
15
  # Data collection settings
16
  DATA_COLLECTION_CONFIG = {
17
  "wait_after_go": 3,
 
18
  }
19
 
 
 
 
 
 
 
 
 
20
  # Model configurations
21
  MODELS_CONFIG = {
22
  "gpt-4o": {
@@ -37,8 +46,16 @@ MODELS_CONFIG = {
37
  },
38
  }
39
 
40
- # Data paths
41
- DATA_PATHS = {
42
- "golden_labels": "data/golden_labels.json",
43
- "results": "results/",
44
- }
 
 
 
 
 
 
 
 
 
15
  # Data collection settings
16
  DATA_COLLECTION_CONFIG = {
17
  "wait_after_go": 3,
18
+ "thumbnail_size": (320, 240),
19
  }
20
 
21
+ # Benchmark settings
22
+ BENCHMARK_CONFIG = {
23
+ "data_collection_samples": 50,
24
+ }
25
+
26
+ # MapCrunch options
27
+ MAPCRUNCH_OPTIONS = {}
28
+
29
  # Model configurations
30
  MODELS_CONFIG = {
31
  "gpt-4o": {
 
46
  },
47
  }
48
 
49
+
50
+ # Data paths - now supports named datasets
51
+ def get_data_paths(dataset_name: str = "default"):
52
+ """Get data paths for a specific dataset"""
53
+ return {
54
+ "golden_labels": f"datasets/{dataset_name}/golden_labels.json",
55
+ "thumbnails": f"datasets/{dataset_name}/thumbnails/",
56
+ "results": f"results/{dataset_name}/",
57
+ }
58
+
59
+
60
+ # Backward compatibility - default paths
61
+ DATA_PATHS = get_data_paths("default")
data_collector.py CHANGED
@@ -1,4 +1,4 @@
1
- # data_collector.py (Final Version for High-Quality Data)
2
 
3
  import os
4
  import json
@@ -12,7 +12,7 @@ from io import BytesIO
12
 
13
  from mapcrunch_controller import MapCrunchController
14
  from config import (
15
- DATA_PATHS,
16
  BENCHMARK_CONFIG,
17
  DATA_COLLECTION_CONFIG,
18
  MAPCRUNCH_OPTIONS,
@@ -20,14 +20,21 @@ from config import (
20
 
21
 
22
  class DataCollector:
23
- def __init__(self, headless: bool = False, options: Optional[Dict] = None):
 
 
 
 
 
 
 
24
  self.controller = MapCrunchController(headless=headless)
25
  self.data = []
26
  self.options = options or MAPCRUNCH_OPTIONS
27
  self.setup_directories()
28
 
29
  def setup_directories(self):
30
- for path in DATA_PATHS.values():
31
  if path.endswith("/"):
32
  Path(path).mkdir(parents=True, exist_ok=True)
33
  else:
@@ -37,9 +44,9 @@ class DataCollector:
37
  self, num_samples: Optional[int] = None, **kwargs
38
  ) -> List[Dict]:
39
  num_samples = num_samples or BENCHMARK_CONFIG["data_collection_samples"]
40
- print(f"🚀 Starting high-quality data collection for {num_samples} samples...")
41
-
42
- # NOTE: setup_collection_options is not implemented in the provided controller, assuming it's handled manually or not needed.
43
 
44
  successful_samples = 0
45
  while successful_samples < num_samples:
@@ -63,57 +70,69 @@ class DataCollector:
63
  self.save_data()
64
  return self.data
65
 
66
- # 在 data_collector.py 中替换此函数
67
-
68
  def collect_single_location(self) -> Optional[Dict]:
69
- """Collects a single location and manually constructs the url_slug."""
70
  try:
71
- # 1. 获取坐标和标识符
72
  coords = self.controller.driver.execute_script(
73
  "return { lat: window.panorama.getPosition().lat(), lng: window.panorama.getPosition().lng() };"
74
  )
75
  if not coords:
76
  raise ValueError("Could not get coordinates.")
77
 
78
- identifiers = self.controller.get_live_location_identifiers()
79
- if not identifiers or "pov" not in identifiers:
80
- raise ValueError("Could not get POV.")
81
-
82
- address = self.controller.get_current_address()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
83
 
84
- # **2. 核心修复:在Python中手动构建url_slug**
85
  lat = coords.get("lat")
86
  lng = coords.get("lng")
87
- pov = identifiers.get("pov")
88
- # MapCrunch的URL slug中,zoom是0-based,而Google POV是1-based
89
- zoom_for_slug = round(pov.get("zoom", 1.0)) - 1
90
 
91
- # 使用 roundNum 函数的逻辑来格式化数字
92
  def round_num(n, d):
93
  return f"{n:.{d}f}"
94
 
 
95
  url_slug = (
96
  f"{round_num(lat, 6)}_"
97
  f"{round_num(lng, 6)}_"
98
- f"{round_num(pov.get('heading', 0), 2)}_"
99
- f"{round_num(pov.get('pitch', 0) * -1, 2)}_" # Pitch在slug中是负数
100
  f"{zoom_for_slug}"
101
  )
102
 
103
- # 3. 构建数据样本
104
  sample_id = str(uuid.uuid4())
105
  location_data = {
106
  "id": sample_id,
107
  "timestamp": datetime.now().isoformat(),
108
  "lat": lat,
109
  "lng": lng,
110
- "address": address or "Unknown",
111
- "pano_id": identifiers.get("panoId"),
112
- "pov": pov,
113
- "url_slug": url_slug, # <-- 现在这里永远有正确的值
 
 
 
 
114
  }
115
 
116
- # 4. 保存缩略图
117
  thumbnail_path = self.save_thumbnail(sample_id)
118
  if thumbnail_path:
119
  location_data["thumbnail_path"] = thumbnail_path
@@ -124,38 +143,50 @@ class DataCollector:
124
  print(f"❌ Error in collect_single_location: {e}")
125
  return None
126
 
127
- # ... (save_thumbnail, save_data 等其他函数保持不变) ...
128
  def save_thumbnail(self, sample_id: str) -> Optional[str]:
129
  try:
130
  screenshot_bytes = self.controller.take_street_view_screenshot()
131
  if not screenshot_bytes:
 
 
 
132
  return None
 
133
  image = Image.open(BytesIO(screenshot_bytes))
134
  thumbnail_size = DATA_COLLECTION_CONFIG.get("thumbnail_size", (320, 240))
135
  image.thumbnail(thumbnail_size, Image.Resampling.LANCZOS)
136
  thumbnail_filename = f"{sample_id}.jpg"
137
- thumbnail_path = os.path.join(DATA_PATHS["thumbnails"], thumbnail_filename)
 
 
 
138
  if image.mode in ("RGBA", "LA"):
139
  rgb_image = Image.new("RGB", image.size, (255, 255, 255))
140
  rgb_image.paste(image, mask=image.split()[-1])
141
  image = rgb_image
 
142
  image.save(thumbnail_path, "JPEG", quality=85)
 
143
  return thumbnail_filename
144
- except Exception:
 
145
  return None
146
 
147
  def save_data(self):
148
  try:
149
  output_data = {
150
  "metadata": {
 
151
  "collection_date": datetime.now().isoformat(),
152
  "collection_options": self.options,
153
  },
154
  "samples": self.data,
155
  }
156
- with open(DATA_PATHS["golden_labels"], "w") as f:
157
  json.dump(output_data, f, indent=2)
158
- print(f"\n💾 High-quality data saved to {DATA_PATHS['golden_labels']}")
 
 
159
  except Exception as e:
160
  print(f"❌ Error saving data: {e}")
161
 
 
1
+ # data_collector.py (Updated for Named Datasets)
2
 
3
  import os
4
  import json
 
12
 
13
  from mapcrunch_controller import MapCrunchController
14
  from config import (
15
+ get_data_paths,
16
  BENCHMARK_CONFIG,
17
  DATA_COLLECTION_CONFIG,
18
  MAPCRUNCH_OPTIONS,
 
20
 
21
 
22
  class DataCollector:
23
+ def __init__(
24
+ self,
25
+ dataset_name: str = "default",
26
+ headless: bool = False,
27
+ options: Optional[Dict] = None,
28
+ ):
29
+ self.dataset_name = dataset_name
30
+ self.data_paths = get_data_paths(dataset_name)
31
  self.controller = MapCrunchController(headless=headless)
32
  self.data = []
33
  self.options = options or MAPCRUNCH_OPTIONS
34
  self.setup_directories()
35
 
36
  def setup_directories(self):
37
+ for path in self.data_paths.values():
38
  if path.endswith("/"):
39
  Path(path).mkdir(parents=True, exist_ok=True)
40
  else:
 
44
  self, num_samples: Optional[int] = None, **kwargs
45
  ) -> List[Dict]:
46
  num_samples = num_samples or BENCHMARK_CONFIG["data_collection_samples"]
47
+ print(
48
+ f"🚀 Collecting {num_samples} samples for dataset '{self.dataset_name}'..."
49
+ )
50
 
51
  successful_samples = 0
52
  while successful_samples < num_samples:
 
70
  self.save_data()
71
  return self.data
72
 
 
 
73
  def collect_single_location(self) -> Optional[Dict]:
74
+ """Collects a single location with simplified data collection."""
75
  try:
76
+ # Get coordinates
77
  coords = self.controller.driver.execute_script(
78
  "return { lat: window.panorama.getPosition().lat(), lng: window.panorama.getPosition().lng() };"
79
  )
80
  if not coords:
81
  raise ValueError("Could not get coordinates.")
82
 
83
+ # Get POV data directly from panorama
84
+ pov_data = self.controller.driver.execute_script("""
85
+ return {
86
+ heading: window.panorama.getPov().heading,
87
+ pitch: window.panorama.getPov().pitch,
88
+ zoom: window.panorama.getZoom(),
89
+ panoId: window.panorama.getPano()
90
+ };
91
+ """)
92
+
93
+ if not pov_data:
94
+ raise ValueError("Could not get POV data.")
95
+
96
+ # Get address (simplified)
97
+ address = "Unknown"
98
+ try:
99
+ address = self.controller.get_current_address() or "Unknown"
100
+ except:
101
+ pass # Address is optional
102
 
 
103
  lat = coords.get("lat")
104
  lng = coords.get("lng")
 
 
 
105
 
106
+ # Simplified URL slug construction
107
  def round_num(n, d):
108
  return f"{n:.{d}f}"
109
 
110
+ zoom_for_slug = max(0, round(pov_data.get("zoom", 1.0)) - 1)
111
  url_slug = (
112
  f"{round_num(lat, 6)}_"
113
  f"{round_num(lng, 6)}_"
114
+ f"{round_num(pov_data.get('heading', 0), 2)}_"
115
+ f"{round_num(pov_data.get('pitch', 0) * -1, 2)}_"
116
  f"{zoom_for_slug}"
117
  )
118
 
 
119
  sample_id = str(uuid.uuid4())
120
  location_data = {
121
  "id": sample_id,
122
  "timestamp": datetime.now().isoformat(),
123
  "lat": lat,
124
  "lng": lng,
125
+ "address": address,
126
+ "pano_id": pov_data.get("panoId"),
127
+ "pov": {
128
+ "heading": pov_data.get("heading", 0),
129
+ "pitch": pov_data.get("pitch", 0),
130
+ "zoom": pov_data.get("zoom", 1.0),
131
+ },
132
+ "url_slug": url_slug,
133
  }
134
 
135
+ # Try to save thumbnail (optional)
136
  thumbnail_path = self.save_thumbnail(sample_id)
137
  if thumbnail_path:
138
  location_data["thumbnail_path"] = thumbnail_path
 
143
  print(f"❌ Error in collect_single_location: {e}")
144
  return None
145
 
 
146
  def save_thumbnail(self, sample_id: str) -> Optional[str]:
147
  try:
148
  screenshot_bytes = self.controller.take_street_view_screenshot()
149
  if not screenshot_bytes:
150
+ print(
151
+ f"⚠️ Could not take screenshot for {sample_id} (this is OK in headless mode)"
152
+ )
153
  return None
154
+
155
  image = Image.open(BytesIO(screenshot_bytes))
156
  thumbnail_size = DATA_COLLECTION_CONFIG.get("thumbnail_size", (320, 240))
157
  image.thumbnail(thumbnail_size, Image.Resampling.LANCZOS)
158
  thumbnail_filename = f"{sample_id}.jpg"
159
+ thumbnail_path = os.path.join(
160
+ self.data_paths["thumbnails"], thumbnail_filename
161
+ )
162
+
163
  if image.mode in ("RGBA", "LA"):
164
  rgb_image = Image.new("RGB", image.size, (255, 255, 255))
165
  rgb_image.paste(image, mask=image.split()[-1])
166
  image = rgb_image
167
+
168
  image.save(thumbnail_path, "JPEG", quality=85)
169
+ print(f"✅ Saved thumbnail for {sample_id}")
170
  return thumbnail_filename
171
+ except Exception as e:
172
+ print(f"⚠️ Could not save thumbnail for {sample_id}: {e}")
173
  return None
174
 
175
  def save_data(self):
176
  try:
177
  output_data = {
178
  "metadata": {
179
+ "dataset_name": self.dataset_name,
180
  "collection_date": datetime.now().isoformat(),
181
  "collection_options": self.options,
182
  },
183
  "samples": self.data,
184
  }
185
+ with open(self.data_paths["golden_labels"], "w") as f:
186
  json.dump(output_data, f, indent=2)
187
+ print(
188
+ f"\n💾 Dataset '{self.dataset_name}' saved to {self.data_paths['golden_labels']}"
189
+ )
190
  except Exception as e:
191
  print(f"❌ Error saving data: {e}")
192
 
datasets/asia/golden_labels.json ADDED
@@ -0,0 +1,159 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "metadata": {
3
+ "dataset_name": "asia",
4
+ "collection_date": "2025-06-11T21:13:45.005091",
5
+ "collection_options": {}
6
+ },
7
+ "samples": [
8
+ {
9
+ "id": "fdbb9997-c07c-4d4d-9095-82f162f0c27a",
10
+ "timestamp": "2025-06-11T21:13:15.310368",
11
+ "lat": 42.1322878067665,
12
+ "lng": 26.787410093767097,
13
+ "address": "Unknown",
14
+ "pano_id": "gsRLllGBndoh4EMBklXL9Q",
15
+ "pov": {
16
+ "heading": 240.93000000000006,
17
+ "pitch": 5,
18
+ "zoom": 1.0000051533649421
19
+ },
20
+ "url_slug": "42.132288_26.787410_240.93_-5.00_0",
21
+ "thumbnail_path": "fdbb9997-c07c-4d4d-9095-82f162f0c27a.jpg"
22
+ },
23
+ {
24
+ "id": "05ac7262-54b6-4b81-b9be-85d0830d7ed1",
25
+ "timestamp": "2025-06-11T21:13:18.720025",
26
+ "lat": 50.815127864126566,
27
+ "lng": 3.3295800788973042,
28
+ "address": "Unknown",
29
+ "pano_id": "lw4NuJ2I82JRsk5y8N7gGA",
30
+ "pov": {
31
+ "heading": -262.06999999999994,
32
+ "pitch": 5,
33
+ "zoom": 1.0000070740241276
34
+ },
35
+ "url_slug": "50.815128_3.329580_-262.07_-5.00_0",
36
+ "thumbnail_path": "05ac7262-54b6-4b81-b9be-85d0830d7ed1.jpg"
37
+ },
38
+ {
39
+ "id": "9fff9f32-e6f0-4f5f-a476-d019ec8b5bf2",
40
+ "timestamp": "2025-06-11T21:13:21.994062",
41
+ "lat": 45.43514041007389,
42
+ "lng": 21.364097624705536,
43
+ "address": "Unknown",
44
+ "pano_id": "XLiyeDvQ9SoaSaDBxn3GDA",
45
+ "pov": {
46
+ "heading": -168.06999999999994,
47
+ "pitch": 5,
48
+ "zoom": 1.000006919588194
49
+ },
50
+ "url_slug": "45.435140_21.364098_-168.07_-5.00_0",
51
+ "thumbnail_path": "9fff9f32-e6f0-4f5f-a476-d019ec8b5bf2.jpg"
52
+ },
53
+ {
54
+ "id": "2da4a9b9-e597-46e9-8c9d-8701f0c63462",
55
+ "timestamp": "2025-06-11T21:13:25.252476",
56
+ "lat": 42.30902518065906,
57
+ "lng": 77.8748629197877,
58
+ "address": "Unknown",
59
+ "pano_id": "t0HJFo38t3rh1U6W2OZ_VA",
60
+ "pov": {
61
+ "heading": 5.930000000000064,
62
+ "pitch": 5,
63
+ "zoom": 1.0000092331912114
64
+ },
65
+ "url_slug": "42.309025_77.874863_5.93_-5.00_0",
66
+ "thumbnail_path": "2da4a9b9-e597-46e9-8c9d-8701f0c63462.jpg"
67
+ },
68
+ {
69
+ "id": "b93858db-454e-4cc7-8f61-a3fe09cb0cab",
70
+ "timestamp": "2025-06-11T21:13:28.569143",
71
+ "lat": 14.647613688319248,
72
+ "lng": -16.980851505792,
73
+ "address": "Unknown",
74
+ "pano_id": "GE8DtAXvn2qZuSALopw8xA",
75
+ "pov": {
76
+ "heading": 270.93000000000006,
77
+ "pitch": 5,
78
+ "zoom": 1.0000015226193344
79
+ },
80
+ "url_slug": "14.647614_-16.980852_270.93_-5.00_0",
81
+ "thumbnail_path": "b93858db-454e-4cc7-8f61-a3fe09cb0cab.jpg"
82
+ },
83
+ {
84
+ "id": "1f0e20f1-3687-4939-be23-7c7b490cc707",
85
+ "timestamp": "2025-06-11T21:13:31.763851",
86
+ "lat": 11.208463091095442,
87
+ "lng": 105.72569729813453,
88
+ "address": "Unknown",
89
+ "pano_id": "2W3x5T-dMOrMJO57YtGq2Q",
90
+ "pov": {
91
+ "heading": -316.06999999999994,
92
+ "pitch": 5,
93
+ "zoom": 1.000004517085056
94
+ },
95
+ "url_slug": "11.208463_105.725697_-316.07_-5.00_0",
96
+ "thumbnail_path": "1f0e20f1-3687-4939-be23-7c7b490cc707.jpg"
97
+ },
98
+ {
99
+ "id": "7a2049cc-959c-4948-8574-0ffe9950e86e",
100
+ "timestamp": "2025-06-11T21:13:35.017287",
101
+ "lat": 40.668879231679576,
102
+ "lng": -8.21452809466328,
103
+ "address": "Unknown",
104
+ "pano_id": "bYHqbKkNgUUffaYf6fcKBQ",
105
+ "pov": {
106
+ "heading": -86.06999999999994,
107
+ "pitch": 5,
108
+ "zoom": 1.0000035631232127
109
+ },
110
+ "url_slug": "40.668879_-8.214528_-86.07_-5.00_0",
111
+ "thumbnail_path": "7a2049cc-959c-4948-8574-0ffe9950e86e.jpg"
112
+ },
113
+ {
114
+ "id": "fec89220-b130-49b3-8c19-f7e2e9551acf",
115
+ "timestamp": "2025-06-11T21:13:38.277525",
116
+ "lat": 5.459666786657994,
117
+ "lng": -2.1135681235966626,
118
+ "address": "Unknown",
119
+ "pano_id": "AaoI1zHJ4Pf18j94UXqksA",
120
+ "pov": {
121
+ "heading": 122.93000000000006,
122
+ "pitch": 5,
123
+ "zoom": 1.0000018586016313
124
+ },
125
+ "url_slug": "5.459667_-2.113568_122.93_-5.00_0",
126
+ "thumbnail_path": "fec89220-b130-49b3-8c19-f7e2e9551acf.jpg"
127
+ },
128
+ {
129
+ "id": "0fd8d569-c98a-4af3-b08c-173121c76043",
130
+ "timestamp": "2025-06-11T21:13:41.520505",
131
+ "lat": 1.3921814170475024,
132
+ "lng": 103.98320353936504,
133
+ "address": "Unknown",
134
+ "pano_id": "CAoSF0NJSE0wb2dLRUlDQWdJQ2t3T1h4bWdF",
135
+ "pov": {
136
+ "heading": 278.93000000000006,
137
+ "pitch": 5,
138
+ "zoom": 1.0000078101439185
139
+ },
140
+ "url_slug": "1.392181_103.983204_278.93_-5.00_0",
141
+ "thumbnail_path": "0fd8d569-c98a-4af3-b08c-173121c76043.jpg"
142
+ },
143
+ {
144
+ "id": "2426fad0-aeda-426a-96ad-f16724c360ce",
145
+ "timestamp": "2025-06-11T21:13:44.746699",
146
+ "lat": 51.89795854217673,
147
+ "lng": 4.96480321921333,
148
+ "address": "Unknown",
149
+ "pano_id": "bxBt_sZjG7ocUqPSmD1X0Q",
150
+ "pov": {
151
+ "heading": -175.06999999999994,
152
+ "pitch": 5,
153
+ "zoom": 1.0000093258133977
154
+ },
155
+ "url_slug": "51.897959_4.964803_-175.07_-5.00_0",
156
+ "thumbnail_path": "2426fad0-aeda-426a-96ad-f16724c360ce.jpg"
157
+ }
158
+ ]
159
+ }
list_datasets.py ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Utility script to list available datasets
4
+ """
5
+
6
+ import json
7
+ import os
8
+ from pathlib import Path
9
+ from config import get_data_paths
10
+
11
+
12
+ def list_datasets():
13
+ """List all available datasets"""
14
+ datasets_dir = Path("datasets")
15
+ if not datasets_dir.exists():
16
+ print("No datasets directory found.")
17
+ return []
18
+
19
+ datasets = []
20
+ for dataset_dir in datasets_dir.iterdir():
21
+ if dataset_dir.is_dir():
22
+ dataset_name = dataset_dir.name
23
+ data_paths = get_data_paths(dataset_name)
24
+ golden_labels_path = data_paths["golden_labels"]
25
+
26
+ if os.path.exists(golden_labels_path):
27
+ try:
28
+ with open(golden_labels_path, "r") as f:
29
+ data = json.load(f)
30
+ samples = data.get("samples", [])
31
+ metadata = data.get("metadata", {})
32
+
33
+ datasets.append(
34
+ {
35
+ "name": dataset_name,
36
+ "samples": len(samples),
37
+ "created": metadata.get("collection_date", "Unknown"),
38
+ "path": golden_labels_path,
39
+ }
40
+ )
41
+ except Exception as e:
42
+ print(f"❌ Error reading dataset '{dataset_name}': {e}")
43
+
44
+ return datasets
45
+
46
+
47
+ def main():
48
+ print("📊 Available Datasets:")
49
+ print("=" * 50)
50
+
51
+ datasets = list_datasets()
52
+
53
+ if not datasets:
54
+ print("No datasets found.")
55
+ print("\nTo create a new dataset, run:")
56
+ print("python main.py --mode collect --dataset <name> --samples <count>")
57
+ return
58
+
59
+ for dataset in sorted(datasets, key=lambda x: x["name"]):
60
+ print(f"Dataset: {dataset['name']}")
61
+ print(f" Samples: {dataset['samples']}")
62
+ print(f" Created: {dataset['created']}")
63
+ print(f" Path: {dataset['path']}")
64
+ print()
65
+
66
+ print("To use a dataset, run:")
67
+ print("python main.py --mode benchmark --dataset <name>")
68
+ print("python main.py --mode agent --dataset <name>")
69
+
70
+
71
+ if __name__ == "__main__":
72
+ main()
main.py CHANGED
@@ -9,49 +9,62 @@ from langchain_google_genai import ChatGoogleGenerativeAI
9
 
10
  from geo_bot import GeoBot
11
  from benchmark import MapGuesserBenchmark
12
- from config import MODELS_CONFIG, DATA_PATHS, SUCCESS_THRESHOLD_KM
13
-
14
-
15
- def agent_mode(model_name: str, steps: int, headless: bool, samples: int, temperature: float = 0.0):
 
 
 
 
 
 
 
 
16
  """
17
  Runs the AI Agent in a benchmark loop over multiple samples,
18
  using multi-step exploration for each.
19
  """
20
  print(
21
- f"Starting Agent Mode (as a benchmark): model={model_name}, steps={steps}, samples={samples}, temperature={temperature}"
22
  )
23
 
 
24
  try:
25
- with open(DATA_PATHS["golden_labels"], "r", encoding="utf-8") as f:
26
  golden_labels = json.load(f).get("samples", [])
27
  except FileNotFoundError:
28
- print(f"Error: Golden labels file not found at {DATA_PATHS['golden_labels']}.")
 
 
29
  return
30
 
31
  if not golden_labels:
32
- print("Error: No samples found in golden_labels.json.")
33
  return
34
 
35
  num_to_test = min(samples, len(golden_labels))
36
  test_samples = golden_labels[:num_to_test]
37
- print(f"Will run on {len(test_samples)} samples.")
38
 
39
  config = MODELS_CONFIG.get(model_name)
40
  model_class = globals()[config["class"]]
41
  model_instance_name = config["model_name"]
42
 
43
- benchmark_helper = MapGuesserBenchmark(headless=True)
44
  all_results = []
45
 
46
  with GeoBot(
47
- model=model_class, model_name=model_instance_name, headless=headless, temperature=temperature
 
 
 
48
  ) as bot:
49
  for i, sample in enumerate(test_samples):
50
  print(
51
  f"\n--- Running Sample {i + 1}/{len(test_samples)} (ID: {sample.get('id')}) ---"
52
  )
53
 
54
- # **FIXED**: Correct sequence: Load Data -> Clean Environment -> Run Loop
55
  if not bot.controller.load_location_from_data(sample):
56
  print(
57
  f" ❌ Failed to load location for sample {sample.get('id')}. Skipping."
@@ -98,36 +111,61 @@ def agent_mode(model_name: str, steps: int, headless: bool, samples: int, temper
98
 
99
  summary = benchmark_helper.generate_summary(all_results)
100
  if summary:
101
- print("\n\n--- Agent Benchmark Complete! Summary ---")
 
 
102
  for model, stats in summary.items():
103
  print(f"Model: {model}")
104
  print(f" Success Rate: {stats['success_rate'] * 100:.1f}%")
105
  print(f" Avg Distance: {stats['average_distance_km']:.1f} km")
106
 
107
- print("\nAgent Mode finished.")
108
 
109
 
110
- def benchmark_mode(models: list, samples: int, headless: bool, temperature: float = 0.0):
 
 
 
 
 
 
111
  """Runs the benchmark on pre-collected data."""
112
- print(f"Starting Benchmark Mode: models={models}, samples={samples}, temperature={temperature}")
113
- benchmark = MapGuesserBenchmark(headless=headless)
114
- summary = benchmark.run_benchmark(models=models, max_samples=samples, temperature=temperature)
 
 
 
 
115
  if summary:
116
- print("\n--- Benchmark Complete! Summary ---")
117
  for model, stats in summary.items():
118
  print(f"Model: {model}")
119
  print(f" Success Rate: {stats['success_rate'] * 100:.1f}%")
120
  print(f" Avg Distance: {stats['average_distance_km']:.1f} km")
121
 
122
 
 
 
 
 
 
 
 
 
123
  def main():
124
  parser = argparse.ArgumentParser(description="MapCrunch AI Agent & Benchmark")
125
  parser.add_argument(
126
  "--mode",
127
- choices=["agent", "benchmark"],
128
  default="agent",
129
  help="Operation mode.",
130
  )
 
 
 
 
 
131
  parser.add_argument(
132
  "--model",
133
  choices=list(MODELS_CONFIG.keys()),
@@ -161,12 +199,19 @@ def main():
161
 
162
  args = parser.parse_args()
163
 
164
- if args.mode == "agent":
 
 
 
 
 
 
165
  agent_mode(
166
  model_name=args.model,
167
  steps=args.steps,
168
  headless=args.headless,
169
  samples=args.samples,
 
170
  temperature=args.temperature,
171
  )
172
  elif args.mode == "benchmark":
@@ -174,6 +219,7 @@ def main():
174
  models=args.models or [args.model],
175
  samples=args.samples,
176
  headless=args.headless,
 
177
  temperature=args.temperature,
178
  )
179
 
 
9
 
10
  from geo_bot import GeoBot
11
  from benchmark import MapGuesserBenchmark
12
+ from data_collector import DataCollector
13
+ from config import MODELS_CONFIG, get_data_paths, SUCCESS_THRESHOLD_KM
14
+
15
+
16
+ def agent_mode(
17
+ model_name: str,
18
+ steps: int,
19
+ headless: bool,
20
+ samples: int,
21
+ dataset_name: str = "default",
22
+ temperature: float = 0.0,
23
+ ):
24
  """
25
  Runs the AI Agent in a benchmark loop over multiple samples,
26
  using multi-step exploration for each.
27
  """
28
  print(
29
+ f"Starting Agent Mode: model={model_name}, steps={steps}, samples={samples}, dataset={dataset_name}, temperature={temperature}"
30
  )
31
 
32
+ data_paths = get_data_paths(dataset_name)
33
  try:
34
+ with open(data_paths["golden_labels"], "r", encoding="utf-8") as f:
35
  golden_labels = json.load(f).get("samples", [])
36
  except FileNotFoundError:
37
+ print(
38
+ f"Error: Dataset '{dataset_name}' not found at {data_paths['golden_labels']}."
39
+ )
40
  return
41
 
42
  if not golden_labels:
43
+ print(f"Error: No samples found in dataset '{dataset_name}'.")
44
  return
45
 
46
  num_to_test = min(samples, len(golden_labels))
47
  test_samples = golden_labels[:num_to_test]
48
+ print(f"Will run on {len(test_samples)} samples from dataset '{dataset_name}'.")
49
 
50
  config = MODELS_CONFIG.get(model_name)
51
  model_class = globals()[config["class"]]
52
  model_instance_name = config["model_name"]
53
 
54
+ benchmark_helper = MapGuesserBenchmark(dataset_name=dataset_name, headless=True)
55
  all_results = []
56
 
57
  with GeoBot(
58
+ model=model_class,
59
+ model_name=model_instance_name,
60
+ headless=headless,
61
+ temperature=temperature,
62
  ) as bot:
63
  for i, sample in enumerate(test_samples):
64
  print(
65
  f"\n--- Running Sample {i + 1}/{len(test_samples)} (ID: {sample.get('id')}) ---"
66
  )
67
 
 
68
  if not bot.controller.load_location_from_data(sample):
69
  print(
70
  f" ❌ Failed to load location for sample {sample.get('id')}. Skipping."
 
111
 
112
  summary = benchmark_helper.generate_summary(all_results)
113
  if summary:
114
+ print(
115
+ f"\n\n--- Agent Benchmark Complete for dataset '{dataset_name}'! Summary ---"
116
+ )
117
  for model, stats in summary.items():
118
  print(f"Model: {model}")
119
  print(f" Success Rate: {stats['success_rate'] * 100:.1f}%")
120
  print(f" Avg Distance: {stats['average_distance_km']:.1f} km")
121
 
122
+ print("Agent Mode finished.")
123
 
124
 
125
+ def benchmark_mode(
126
+ models: list,
127
+ samples: int,
128
+ headless: bool,
129
+ dataset_name: str = "default",
130
+ temperature: float = 0.0,
131
+ ):
132
  """Runs the benchmark on pre-collected data."""
133
+ print(
134
+ f"Starting Benchmark Mode: models={models}, samples={samples}, dataset={dataset_name}, temperature={temperature}"
135
+ )
136
+ benchmark = MapGuesserBenchmark(dataset_name=dataset_name, headless=headless)
137
+ summary = benchmark.run_benchmark(
138
+ models=models, max_samples=samples, temperature=temperature
139
+ )
140
  if summary:
141
+ print(f"\n--- Benchmark Complete for dataset '{dataset_name}'! Summary ---")
142
  for model, stats in summary.items():
143
  print(f"Model: {model}")
144
  print(f" Success Rate: {stats['success_rate'] * 100:.1f}%")
145
  print(f" Avg Distance: {stats['average_distance_km']:.1f} km")
146
 
147
 
148
+ def collect_mode(dataset_name: str, samples: int, headless: bool):
149
+ """Collects data for a new dataset."""
150
+ print(f"Starting Data Collection: dataset={dataset_name}, samples={samples}")
151
+ with DataCollector(dataset_name=dataset_name, headless=headless) as collector:
152
+ collector.collect_samples(num_samples=samples)
153
+ print(f"Data collection complete for dataset '{dataset_name}'.")
154
+
155
+
156
  def main():
157
  parser = argparse.ArgumentParser(description="MapCrunch AI Agent & Benchmark")
158
  parser.add_argument(
159
  "--mode",
160
+ choices=["agent", "benchmark", "collect"],
161
  default="agent",
162
  help="Operation mode.",
163
  )
164
+ parser.add_argument(
165
+ "--dataset",
166
+ default="default",
167
+ help="Dataset name to use or create.",
168
+ )
169
  parser.add_argument(
170
  "--model",
171
  choices=list(MODELS_CONFIG.keys()),
 
199
 
200
  args = parser.parse_args()
201
 
202
+ if args.mode == "collect":
203
+ collect_mode(
204
+ dataset_name=args.dataset,
205
+ samples=args.samples,
206
+ headless=args.headless,
207
+ )
208
+ elif args.mode == "agent":
209
  agent_mode(
210
  model_name=args.model,
211
  steps=args.steps,
212
  headless=args.headless,
213
  samples=args.samples,
214
+ dataset_name=args.dataset,
215
  temperature=args.temperature,
216
  )
217
  elif args.mode == "benchmark":
 
219
  models=args.models or [args.model],
220
  samples=args.samples,
221
  headless=args.headless,
222
+ dataset_name=args.dataset,
223
  temperature=args.temperature,
224
  )
225