oberbics commited on
Commit
35509b3
·
verified ·
1 Parent(s): 47c367a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +113 -119
app.py CHANGED
@@ -1,36 +1,47 @@
1
  import gradio as gr
2
  import pandas as pd
3
  import folium
4
- import requests
5
- import os
6
- import tempfile
7
- import time
8
- import json
9
  from geopy.geocoders import Nominatim
10
  from geopy.extra.rate_limiter import RateLimiter
 
11
  from typing import Optional, Tuple
12
  import warnings
13
 
14
  # Suppress warnings
15
  warnings.filterwarnings("ignore")
16
 
17
- # --- Configuration ---
18
- API_URL = "https://api-inference.huggingface.co/models/numind/NuExtract-1.5"
19
- HF_TOKEN = os.environ.get("HF_TOKEN", "")
20
- HEADERS = {"Authorization": f"Bearer {HF_TOKEN}"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
 
22
- # --- Geocoding Service ---
23
  class Geocoder:
24
  def __init__(self):
25
- self.geolocator = Nominatim(
26
- user_agent="historical_data_mapper_v2",
27
- timeout=10
28
- )
29
- self.geocode = RateLimiter(
30
- self.geolocator.geocode,
31
- min_delay_seconds=1,
32
- max_retries=2
33
- )
34
  self.cache = {}
35
 
36
  def get_coords(self, location: str) -> Optional[Tuple[float, float]]:
@@ -51,55 +62,85 @@ class Geocoder:
51
  self.cache[location] = None
52
  return None
53
 
54
- # --- Map Generation ---
55
- def create_map(df: pd.DataFrame, location_col: str) -> str:
 
 
 
 
 
 
 
 
 
 
 
56
  geocoder = Geocoder()
57
- m = folium.Map(tiles="CartoDB positron", control_scale=True)
58
- coords_list = []
59
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
60
  for loc in df[location_col].dropna().unique():
61
  coords = geocoder.get_coords(str(loc))
62
  if coords:
63
  folium.Marker(
64
  location=coords,
65
- popup=loc,
66
- icon=folium.Icon(color="blue")
 
 
 
 
67
  ).add_to(m)
68
  coords_list.append(coords)
69
 
 
 
70
  if coords_list:
71
  m.fit_bounds(coords_list)
72
- else:
73
- m.location = [20, 0]
74
- return "<div style='color:red;text-align:center'>No valid locations found</div>"
75
 
76
  return m._repr_html_()
77
 
78
- # --- File Processing ---
79
- def process_file(file_obj, location_col: str):
80
  try:
81
- # Read file
82
  df = pd.read_excel(file_obj.name)
83
 
84
- # Validate column
85
  if location_col not in df.columns:
86
  return None, f"Column '{location_col}' not found", None
87
 
88
- # Generate map
89
- map_html = create_map(df, location_col)
90
 
91
  # Save processed data
92
  with tempfile.NamedTemporaryFile(suffix=".xlsx", delete=False) as tmp:
93
  df.to_excel(tmp.name, index=False)
94
  processed_path = tmp.name
95
 
 
96
  stats = (
97
- f"Total rows: {len(df)}\n"
98
- f"Unique locations: {df[location_col].nunique()}"
 
99
  )
100
 
101
  return (
102
- f"<div style='width:100%; height:65vh'>{map_html}</div>",
103
  stats,
104
  processed_path
105
  )
@@ -107,92 +148,45 @@ def process_file(file_obj, location_col: str):
107
  except Exception as e:
108
  return None, f"Error: {str(e)}", None
109
 
110
- # --- NuExtract API ---
111
- def extract_info(template: str, text: str):
112
- try:
113
- prompt = f"<|input|>\n### Template:\n{template}\n### Text:\n{text}\n\n<|output|>"
114
- response = requests.post(
115
- API_URL,
116
- headers=HEADERS,
117
- json={"inputs": prompt, "parameters": {"max_new_tokens": 1000}}
118
- )
119
-
120
- if response.status_code == 503:
121
- return "⏳ Model is loading...", "Try again later"
122
-
123
- result = response.json()
124
- if isinstance(result, list):
125
- output = result[0].get("generated_text", "").split("<|output|>")[-1].strip()
126
- try:
127
- json.loads(output) # Validate JSON
128
- return "✅ Success", output
129
- except:
130
- return "⚠️ Partial Output", output
131
- return "❌ Unexpected Response", str(result)
132
-
133
- except Exception as e:
134
- return f"❌ Error: {str(e)}", ""
135
-
136
- # --- Gradio Interface ---
137
- with gr.Blocks(title="Historical Data Tools", theme=gr.themes.Soft()) as app:
138
- gr.Markdown("# Historical Data Analysis Tools")
139
 
140
- with gr.Tabs():
141
- with gr.Tab("Text Extraction"):
142
- gr.Markdown("## NuExtract-1.5 Structured Data Extraction")
143
- with gr.Row():
144
- with gr.Column():
145
- template = gr.Textbox(
146
- label="JSON Template",
147
- value='{"location": "", "date": ""}',
148
- lines=5
149
- )
150
- input_text = gr.Textbox(
151
- label="Input Text",
152
- value="The earthquake occurred in San Francisco on April 18, 1906.",
153
- lines=5
154
- )
155
- extract_btn = gr.Button("Extract", variant="primary")
156
-
157
- with gr.Column():
158
- status = gr.Textbox(label="Status")
159
- output = gr.Textbox(label="Extracted Data", lines=10)
160
-
161
- extract_btn.click(
162
- extract_info,
163
- inputs=[template, input_text],
164
- outputs=[status, output]
165
  )
 
 
 
 
 
 
 
 
166
 
167
- with gr.Tab("Location Mapping"):
168
- gr.Markdown("## Geocode and Map Locations")
169
- with gr.Row():
170
- with gr.Column():
171
- file_input = gr.File(
172
- label="Upload Excel File",
173
- file_types=[".xlsx", ".xls"]
174
- )
175
- location_col = gr.Textbox(
176
- label="Location Column Name",
177
- value="locations",
178
- placeholder="Enter exact column name"
179
- )
180
- map_btn = gr.Button("Generate Map", variant="primary")
181
-
182
- with gr.Column():
183
- map_display = gr.HTML(
184
- label="Interactive Map",
185
- value="<div style='text-align:center;padding:20px;'>"
186
- "Map will appear here</div>"
187
- )
188
- stats = gr.Textbox(label="Statistics")
189
- download = gr.File(label="Processed Data", visible=False)
190
-
191
- map_btn.click(
192
- process_file,
193
- inputs=[file_input, location_col],
194
- outputs=[map_display, stats, download]
195
  )
 
 
 
 
 
 
 
 
196
 
197
  if __name__ == "__main__":
198
  app.launch()
 
1
  import gradio as gr
2
  import pandas as pd
3
  import folium
 
 
 
 
 
4
  from geopy.geocoders import Nominatim
5
  from geopy.extra.rate_limiter import RateLimiter
6
+ import tempfile
7
  from typing import Optional, Tuple
8
  import warnings
9
 
10
  # Suppress warnings
11
  warnings.filterwarnings("ignore")
12
 
13
+ # Historical Tile Providers
14
+ HISTORICAL_TILES = {
15
+ "David Rumsey (1790)": {
16
+ "url": "https://map1.davidrumsey.com/tiles/rumsey/SDSC1790/{z}/{x}/{y}.png",
17
+ "attr": "David Rumsey Map Collection",
18
+ "min_year": 1700,
19
+ "max_year": 1800
20
+ },
21
+ "David Rumsey (1860)": {
22
+ "url": "https://map1.davidrumsey.com/tiles/rumsey/SDSC1860/{z}/{x}/{y}.png",
23
+ "attr": "David Rumsey Map Collection",
24
+ "min_year": 1801,
25
+ "max_year": 1900
26
+ },
27
+ "Stamen (1915)": {
28
+ "url": "https://stamen-tiles.a.ssl.fastly.net/toner-lite/{z}/{x}/{y}.png",
29
+ "attr": "Stamen Maps",
30
+ "min_year": 1901,
31
+ "max_year": 1920
32
+ },
33
+ "OpenHistoricalMap": {
34
+ "url": "https://tile.openhistoricalmap.org/{z}/{x}/{y}.png",
35
+ "attr": "OpenHistoricalMap",
36
+ "min_year": 1700,
37
+ "max_year": 2023
38
+ }
39
+ }
40
 
 
41
  class Geocoder:
42
  def __init__(self):
43
+ self.geolocator = Nominatim(user_agent="historical_mapper", timeout=10)
44
+ self.geocode = RateLimiter(self.geolocator.geocode, min_delay_seconds=1)
 
 
 
 
 
 
 
45
  self.cache = {}
46
 
47
  def get_coords(self, location: str) -> Optional[Tuple[float, float]]:
 
62
  self.cache[location] = None
63
  return None
64
 
65
+ def get_tile_layer(year: int):
66
+ """Select the most appropriate tile layer for the given year"""
67
+ for name, config in HISTORICAL_TILES.items():
68
+ if config["min_year"] <= year <= config["max_year"]:
69
+ return folium.TileLayer(
70
+ tiles=config["url"],
71
+ attr=config["attr"],
72
+ name=name,
73
+ overlay=False
74
+ )
75
+ return folium.TileLayer("OpenStreetMap")
76
+
77
+ def create_historical_map(df: pd.DataFrame, location_col: str, year: int = 1900) -> str:
78
  geocoder = Geocoder()
 
 
79
 
80
+ # Create map with historical base layer
81
+ base_layer = get_tile_layer(year)
82
+ m = folium.Map(location=[40, -10], zoom_start=2, control_scale=True)
83
+ base_layer.add_to(m)
84
+
85
+ # Add all other historical layers as options
86
+ for name, config in HISTORICAL_TILES.items():
87
+ if config["url"] != base_layer.tiles:
88
+ folium.TileLayer(
89
+ tiles=config["url"],
90
+ attr=config["attr"],
91
+ name=f"{name} ({config['min_year']}-{config['max_year']})",
92
+ overlay=False
93
+ ).add_to(m)
94
+
95
+ # Add markers with historical styling
96
+ coords_list = []
97
  for loc in df[location_col].dropna().unique():
98
  coords = geocoder.get_coords(str(loc))
99
  if coords:
100
  folium.Marker(
101
  location=coords,
102
+ popup=f"<b>{loc}</b><br>Year: {year}",
103
+ icon=folium.Icon(
104
+ color="red",
105
+ icon="info-sign",
106
+ prefix="fa"
107
+ )
108
  ).add_to(m)
109
  coords_list.append(coords)
110
 
111
+ # Add layer control and fit bounds
112
+ folium.LayerControl().add_to(m)
113
  if coords_list:
114
  m.fit_bounds(coords_list)
 
 
 
115
 
116
  return m._repr_html_()
117
 
118
+ def process_file(file_obj, location_col, year):
 
119
  try:
120
+ # Read input file
121
  df = pd.read_excel(file_obj.name)
122
 
123
+ # Validate column exists
124
  if location_col not in df.columns:
125
  return None, f"Column '{location_col}' not found", None
126
 
127
+ # Create historical map
128
+ map_html = create_historical_map(df, location_col, year)
129
 
130
  # Save processed data
131
  with tempfile.NamedTemporaryFile(suffix=".xlsx", delete=False) as tmp:
132
  df.to_excel(tmp.name, index=False)
133
  processed_path = tmp.name
134
 
135
+ # Generate stats
136
  stats = (
137
+ f"Total locations: {len(df)}\n"
138
+ f"Unique places: {df[location_col].nunique()}\n"
139
+ f"Map year: {year}"
140
  )
141
 
142
  return (
143
+ f"<div style='width:100%; height:70vh'>{map_html}</div>",
144
  stats,
145
  processed_path
146
  )
 
148
  except Exception as e:
149
  return None, f"Error: {str(e)}", None
150
 
151
+ # Gradio Interface
152
+ with gr.Blocks(title="Historical Map Explorer", theme=gr.themes.Soft()) as app:
153
+ gr.Markdown("# Historical Location Mapper")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
154
 
155
+ with gr.Row():
156
+ with gr.Column():
157
+ file_input = gr.File(
158
+ label="Upload Excel File",
159
+ file_types=[".xlsx", ".xls"],
160
+ type="filepath"
161
+ )
162
+ location_col = gr.Textbox(
163
+ label="Location Column Name",
164
+ value="locations",
165
+ placeholder="Enter exact column name with locations"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
166
  )
167
+ year = gr.Slider(
168
+ minimum=1700,
169
+ maximum=2023,
170
+ value=1900,
171
+ step=1,
172
+ label="Map Year"
173
+ )
174
+ map_btn = gr.Button("Generate Historical Map", variant="primary")
175
 
176
+ with gr.Column():
177
+ map_display = gr.HTML(
178
+ label="Historical Map",
179
+ value="<div style='text-align:center;padding:20px;'>"
180
+ "Map will appear here after processing</div>"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
181
  )
182
+ stats_output = gr.Textbox(label="Statistics")
183
+ download_output = gr.File(label="Download Processed Data")
184
+
185
+ map_btn.click(
186
+ process_file,
187
+ inputs=[file_input, location_col, year],
188
+ outputs=[map_display, stats_output, download_output]
189
+ )
190
 
191
  if __name__ == "__main__":
192
  app.launch()