Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -4,6 +4,7 @@ import requests
|
|
4 |
import os
|
5 |
import pandas as pd
|
6 |
import folium
|
|
|
7 |
from geopy.geocoders import Nominatim
|
8 |
from geopy.exc import GeocoderTimedOut, GeocoderServiceError
|
9 |
import time
|
@@ -11,10 +12,6 @@ import random
|
|
11 |
from typing import List, Tuple, Optional
|
12 |
import io
|
13 |
|
14 |
-
# NuExtract API configuration
|
15 |
-
API_URL = "https://api-inference.huggingface.co/models/numind/NuExtract-1.5"
|
16 |
-
headers = {"Authorization": f"Bearer {os.environ.get('HF_TOKEN', '')}"}
|
17 |
-
|
18 |
# Geocoding Service
|
19 |
class GeocodingService:
|
20 |
def __init__(self, user_agent: str = None, timeout: int = 10, rate_limit: float = 1.1):
|
@@ -92,55 +89,86 @@ class GeocodingService:
|
|
92 |
# Mapping Functions
|
93 |
def create_location_map(df: pd.DataFrame,
|
94 |
coordinates_col: str = 'coordinates',
|
95 |
-
places_col: str = '
|
96 |
title_col: Optional[str] = None) -> folium.Map:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
97 |
# Initialize the map
|
98 |
m = folium.Map(location=[0, 0], zoom_start=2)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
99 |
all_coords = []
|
100 |
|
101 |
# Process each row in the DataFrame
|
102 |
for idx, row in df.iterrows():
|
103 |
coordinates = row[coordinates_col]
|
104 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
105 |
title = row[title_col] if title_col and pd.notna(row[title_col]) else None
|
106 |
|
107 |
# Skip if no coordinates
|
108 |
if not coordinates:
|
109 |
continue
|
110 |
|
111 |
-
# Parse places into a list
|
112 |
-
try:
|
113 |
-
places = [p.strip() for p in places_text.split(',') if p.strip()]
|
114 |
-
except:
|
115 |
-
# Fall back to treating it as a single place if splitting fails
|
116 |
-
places = [places_text] if places_text else []
|
117 |
-
|
118 |
# Ensure places and coordinates have compatible lengths
|
119 |
-
# If places is shorter, add placeholder names
|
120 |
while len(places) < len(coordinates):
|
121 |
places.append(f"Location {len(places) + 1}")
|
122 |
-
|
123 |
-
# Add markers for each
|
124 |
for i, coord in enumerate(coordinates):
|
125 |
if coord is not None: # Skip None coordinates
|
126 |
lat, lon = coord
|
127 |
|
128 |
# Get place name safely
|
129 |
-
if i < len(places)
|
130 |
-
place_name = places[i]
|
131 |
-
else:
|
132 |
-
place_name = f"Location {i + 1}"
|
133 |
|
134 |
# Create popup content
|
135 |
popup_content = f"<b>{place_name}</b>"
|
136 |
if title:
|
137 |
popup_content += f"<br>{title}"
|
138 |
|
139 |
-
# Add marker to the map
|
140 |
folium.Marker(
|
141 |
location=[lat, lon],
|
142 |
popup=folium.Popup(popup_content, max_width=300),
|
143 |
tooltip=place_name,
|
|
|
|
|
144 |
).add_to(m)
|
145 |
|
146 |
all_coords.append([lat, lon])
|
@@ -148,6 +176,10 @@ def create_location_map(df: pd.DataFrame,
|
|
148 |
# If we have coordinates, fit the map bounds to include all points
|
149 |
if all_coords:
|
150 |
m.fit_bounds(all_coords)
|
|
|
|
|
|
|
|
|
151 |
|
152 |
return m
|
153 |
|
@@ -177,7 +209,7 @@ def process_excel(file, places_column):
|
|
177 |
return None, f"Column '{places_column}' not found in the Excel file. Available columns: {', '.join(df.columns)}", None
|
178 |
|
179 |
# Initialize the geocoding service
|
180 |
-
geocoder = GeocodingService(user_agent="
|
181 |
|
182 |
# Process locations and add coordinates
|
183 |
print(f"Processing locations from column: {places_column}")
|
@@ -213,140 +245,84 @@ def process_excel(file, places_column):
|
|
213 |
print(f"Error processing file: {e}\n{trace}")
|
214 |
return None, f"Error processing file: {str(e)}", None
|
215 |
|
216 |
-
#
|
217 |
-
|
218 |
-
|
219 |
-
|
220 |
-
|
221 |
-
|
222 |
-
|
223 |
-
|
224 |
-
"
|
225 |
-
"parameters": {
|
226 |
-
"max_new_tokens": 1000,
|
227 |
-
"do_sample": False
|
228 |
-
}
|
229 |
-
}
|
230 |
-
|
231 |
-
response = requests.post(API_URL, headers=headers, json=payload)
|
232 |
-
|
233 |
-
# If the model is loading, inform the user
|
234 |
-
if response.status_code == 503:
|
235 |
-
response_json = response.json()
|
236 |
-
if "error" in response_json and "loading" in response_json["error"]:
|
237 |
-
estimated_time = response_json.get("estimated_time", "unknown")
|
238 |
-
return f"⏳ Model is loading (ETA: {int(float(estimated_time)) if isinstance(estimated_time, (int, float, str)) else 'unknown'} seconds)", "Please try again in a few minutes"
|
239 |
-
|
240 |
-
if response.status_code != 200:
|
241 |
-
return f"❌ API Error: {response.status_code}", response.text
|
242 |
|
243 |
-
|
244 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
245 |
|
246 |
-
# Handle different response formats
|
247 |
try:
|
248 |
-
|
249 |
-
if len(result) > 0:
|
250 |
-
result_text = result[0].get("generated_text", "")
|
251 |
-
else:
|
252 |
-
return "❌ Empty result list", "{}"
|
253 |
-
else:
|
254 |
-
result_text = str(result)
|
255 |
|
256 |
-
|
257 |
-
|
258 |
-
|
259 |
-
if len(parts) > 1:
|
260 |
-
json_text = parts[1].strip()
|
261 |
-
else:
|
262 |
-
json_text = result_text
|
263 |
-
else:
|
264 |
-
json_text = result_text
|
265 |
-
|
266 |
-
# Try to parse as JSON
|
267 |
-
try:
|
268 |
-
extracted = json.loads(json_text)
|
269 |
-
formatted = json.dumps(extracted, indent=2)
|
270 |
-
except json.JSONDecodeError:
|
271 |
-
return "❌ JSON parsing error", json_text
|
272 |
|
273 |
-
|
274 |
-
|
275 |
-
|
276 |
-
|
277 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
278 |
|
279 |
-
#
|
280 |
-
|
281 |
-
|
|
|
|
|
282 |
|
283 |
-
|
284 |
-
|
285 |
-
|
286 |
-
|
287 |
-
with gr.Row():
|
288 |
-
with gr.Column():
|
289 |
-
template = gr.Textbox(
|
290 |
-
label="JSON Template",
|
291 |
-
value='{"earthquake location": "", "dateline location": ""}',
|
292 |
-
lines=5
|
293 |
-
)
|
294 |
-
text = gr.Textbox(
|
295 |
-
label="Text to Extract From",
|
296 |
-
value="Neues Erdbeben in Japan. Aus Tokio wird berichtet, daß in Yokohama bei einem Erdbeben sechs Personen getötet und 22 verwundet, in Tokio vier getötet und 22 verwundet wurden. In Yokohama seien 6VV Häuser zerstört worden. Die telephonische und telegraphische Verbindung zwischen Tokio und Osaka ist unterbrochen worden. Der Trambahnverkehr in Tokio liegt still. Auch der Eisenbahnverkehr zwischen Tokio und Yokohama ist unterbrochen. In Sngamo, einer Vorstadt von Tokio sind Brände ausgebrochen. Ein Eisenbahnzug stürzte in den Vajugawafluß zwischen Gotemba und Tokio. Sechs Züge wurden umgeworfen. Mit dem letzten japanischen Erdbeben sind seit eineinhalb Jahrtausenden bis heute in Japan 229 größere Erdbeben zu verzeichnen gewesen.",
|
297 |
-
lines=8
|
298 |
-
)
|
299 |
-
extract_btn = gr.Button("Extract Information", variant="primary")
|
300 |
-
|
301 |
-
with gr.Column():
|
302 |
-
status = gr.Textbox(label="Status")
|
303 |
-
output = gr.Textbox(label="Output", lines=10)
|
304 |
-
|
305 |
-
extract_btn.click(
|
306 |
-
fn=extract_info,
|
307 |
-
inputs=[template, text],
|
308 |
-
outputs=[status, output]
|
309 |
-
)
|
310 |
|
311 |
-
|
312 |
-
|
313 |
-
|
314 |
-
|
315 |
-
|
316 |
-
|
317 |
-
|
318 |
-
|
319 |
-
|
320 |
-
|
321 |
-
|
322 |
-
|
323 |
-
|
324 |
-
|
325 |
-
|
326 |
-
|
327 |
-
|
328 |
-
|
329 |
-
|
330 |
-
|
331 |
-
|
332 |
-
|
333 |
-
|
334 |
-
|
335 |
-
|
336 |
-
|
337 |
-
else:
|
338 |
-
return None, stats, None
|
339 |
-
except Exception as e:
|
340 |
-
import traceback
|
341 |
-
trace = traceback.format_exc()
|
342 |
-
print(f"Error in process_and_map: {e}\n{trace}")
|
343 |
-
return None, f"Error: {str(e)}", None
|
344 |
-
|
345 |
-
process_btn.click(
|
346 |
-
fn=process_and_map,
|
347 |
-
inputs=[excel_file, places_column],
|
348 |
-
outputs=[map_output, stats_output, processed_file]
|
349 |
-
)
|
350 |
|
|
|
351 |
if __name__ == "__main__":
|
352 |
demo.launch()
|
|
|
4 |
import os
|
5 |
import pandas as pd
|
6 |
import folium
|
7 |
+
from folium import plugins
|
8 |
from geopy.geocoders import Nominatim
|
9 |
from geopy.exc import GeocoderTimedOut, GeocoderServiceError
|
10 |
import time
|
|
|
12 |
from typing import List, Tuple, Optional
|
13 |
import io
|
14 |
|
|
|
|
|
|
|
|
|
15 |
# Geocoding Service
|
16 |
class GeocodingService:
|
17 |
def __init__(self, user_agent: str = None, timeout: int = 10, rate_limit: float = 1.1):
|
|
|
89 |
# Mapping Functions
|
90 |
def create_location_map(df: pd.DataFrame,
|
91 |
coordinates_col: str = 'coordinates',
|
92 |
+
places_col: str = 'place_of_distribution', # Updated to match your column name
|
93 |
title_col: Optional[str] = None) -> folium.Map:
|
94 |
+
"""
|
95 |
+
Create an interactive map with individual markers for all locations.
|
96 |
+
|
97 |
+
Args:
|
98 |
+
df: DataFrame containing coordinates and location names
|
99 |
+
coordinates_col: Name of column containing coordinates
|
100 |
+
places_col: Name of column containing location names
|
101 |
+
title_col: Optional column name for additional marker information
|
102 |
+
|
103 |
+
Returns:
|
104 |
+
folium.Map object with all locations marked individually
|
105 |
+
"""
|
106 |
# Initialize the map
|
107 |
m = folium.Map(location=[0, 0], zoom_start=2)
|
108 |
+
|
109 |
+
# Add fullscreen option
|
110 |
+
plugins.Fullscreen().add_to(m)
|
111 |
+
|
112 |
+
# Add search functionality
|
113 |
+
plugins.Search(
|
114 |
+
layer=None,
|
115 |
+
geom_type="Point",
|
116 |
+
placeholder="Search for a place...",
|
117 |
+
collapsed=True,
|
118 |
+
search_label="name"
|
119 |
+
).add_to(m)
|
120 |
+
|
121 |
+
# Keep track of all valid coordinates for setting bounds
|
122 |
all_coords = []
|
123 |
|
124 |
# Process each row in the DataFrame
|
125 |
for idx, row in df.iterrows():
|
126 |
coordinates = row[coordinates_col]
|
127 |
+
|
128 |
+
# Handle places column - try different methods to parse places
|
129 |
+
try:
|
130 |
+
if pd.notna(row[places_col]):
|
131 |
+
if isinstance(row[places_col], str) and ',' in row[places_col]:
|
132 |
+
places = [p.strip() for p in row[places_col].split(',')]
|
133 |
+
else:
|
134 |
+
places = [str(row[places_col])]
|
135 |
+
else:
|
136 |
+
places = []
|
137 |
+
except Exception as e:
|
138 |
+
print(f"Error processing places for row {idx}: {e}")
|
139 |
+
places = []
|
140 |
+
|
141 |
+
# Get optional title information
|
142 |
title = row[title_col] if title_col and pd.notna(row[title_col]) else None
|
143 |
|
144 |
# Skip if no coordinates
|
145 |
if not coordinates:
|
146 |
continue
|
147 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
148 |
# Ensure places and coordinates have compatible lengths
|
|
|
149 |
while len(places) < len(coordinates):
|
150 |
places.append(f"Location {len(places) + 1}")
|
151 |
+
|
152 |
+
# Add individual markers for each location
|
153 |
for i, coord in enumerate(coordinates):
|
154 |
if coord is not None: # Skip None coordinates
|
155 |
lat, lon = coord
|
156 |
|
157 |
# Get place name safely
|
158 |
+
place_name = places[i] if i < len(places) else f"Location {i + 1}"
|
|
|
|
|
|
|
159 |
|
160 |
# Create popup content
|
161 |
popup_content = f"<b>{place_name}</b>"
|
162 |
if title:
|
163 |
popup_content += f"<br>{title}"
|
164 |
|
165 |
+
# Add marker directly to the map (not in a cluster)
|
166 |
folium.Marker(
|
167 |
location=[lat, lon],
|
168 |
popup=folium.Popup(popup_content, max_width=300),
|
169 |
tooltip=place_name,
|
170 |
+
# Uncomment for different icons
|
171 |
+
# icon=folium.Icon(color='red', icon='info-sign')
|
172 |
).add_to(m)
|
173 |
|
174 |
all_coords.append([lat, lon])
|
|
|
176 |
# If we have coordinates, fit the map bounds to include all points
|
177 |
if all_coords:
|
178 |
m.fit_bounds(all_coords)
|
179 |
+
|
180 |
+
# Add layer control and measure tool
|
181 |
+
folium.LayerControl().add_to(m)
|
182 |
+
plugins.MeasureControl(position='topright', primary_length_unit='kilometers').add_to(m)
|
183 |
|
184 |
return m
|
185 |
|
|
|
209 |
return None, f"Column '{places_column}' not found in the Excel file. Available columns: {', '.join(df.columns)}", None
|
210 |
|
211 |
# Initialize the geocoding service
|
212 |
+
geocoder = GeocodingService(user_agent="map_visualization_app")
|
213 |
|
214 |
# Process locations and add coordinates
|
215 |
print(f"Processing locations from column: {places_column}")
|
|
|
245 |
print(f"Error processing file: {e}\n{trace}")
|
246 |
return None, f"Error processing file: {str(e)}", None
|
247 |
|
248 |
+
# Create the Gradio interface
|
249 |
+
with gr.Blocks() as demo:
|
250 |
+
gr.Markdown("# Location Mapping Tool")
|
251 |
+
|
252 |
+
with gr.Row():
|
253 |
+
with gr.Column():
|
254 |
+
excel_file = gr.File(label="Upload Excel File")
|
255 |
+
places_column = gr.Textbox(label="Places Column Name", value="place_of_distribution")
|
256 |
+
process_btn = gr.Button("Process and Map", variant="primary")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
257 |
|
258 |
+
with gr.Column():
|
259 |
+
map_output = gr.HTML(label="Map Visualization")
|
260 |
+
stats_output = gr.Textbox(label="Statistics", lines=3)
|
261 |
+
processed_file = gr.File(label="Processed Data", visible=True, interactive=False)
|
262 |
+
|
263 |
+
def process_and_map(file, column):
|
264 |
+
if file is None:
|
265 |
+
return None, "Please upload an Excel file", None
|
266 |
|
|
|
267 |
try:
|
268 |
+
map_path, stats, processed_path = process_excel(file, column)
|
|
|
|
|
|
|
|
|
|
|
|
|
269 |
|
270 |
+
if map_path and processed_path:
|
271 |
+
with open(map_path, "r") as f:
|
272 |
+
map_html = f.read()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
273 |
|
274 |
+
return map_html, stats, processed_path
|
275 |
+
else:
|
276 |
+
return None, stats, None
|
277 |
+
except Exception as e:
|
278 |
+
import traceback
|
279 |
+
trace = traceback.format_exc()
|
280 |
+
print(f"Error in process_and_map: {e}\n{trace}")
|
281 |
+
return None, f"Error: {str(e)}", None
|
282 |
+
|
283 |
+
process_btn.click(
|
284 |
+
fn=process_and_map,
|
285 |
+
inputs=[excel_file, places_column],
|
286 |
+
outputs=[map_output, stats_output, processed_file]
|
287 |
+
)
|
288 |
|
289 |
+
# For direct use in Jupyter or standalone Python scripts
|
290 |
+
def create_map_from_excel(file_path, places_column='place_of_distribution'):
|
291 |
+
"""
|
292 |
+
Function to create a map directly from an Excel file without using Gradio.
|
293 |
+
Useful for Jupyter notebooks or standalone scripts.
|
294 |
|
295 |
+
Args:
|
296 |
+
file_path: Path to the Excel file
|
297 |
+
places_column: Name of the column containing location names
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
298 |
|
299 |
+
Returns:
|
300 |
+
folium.Map object
|
301 |
+
"""
|
302 |
+
try:
|
303 |
+
# Read Excel file
|
304 |
+
df = pd.read_excel(file_path)
|
305 |
+
|
306 |
+
# Check if column exists
|
307 |
+
if places_column not in df.columns:
|
308 |
+
print(f"Column '{places_column}' not found. Available columns: {', '.join(df.columns)}")
|
309 |
+
return None
|
310 |
+
|
311 |
+
# Initialize geocoder
|
312 |
+
geocoder = GeocodingService(user_agent="jupyter_map_app")
|
313 |
+
|
314 |
+
# Process locations
|
315 |
+
df['coordinates'] = df[places_column].apply(geocoder.process_locations)
|
316 |
+
|
317 |
+
# Create map
|
318 |
+
map_obj = create_location_map(df, coordinates_col='coordinates', places_col=places_column)
|
319 |
+
|
320 |
+
return map_obj
|
321 |
+
|
322 |
+
except Exception as e:
|
323 |
+
print(f"Error creating map: {e}")
|
324 |
+
return None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
325 |
|
326 |
+
# For use in Hugging Face Spaces
|
327 |
if __name__ == "__main__":
|
328 |
demo.launch()
|