oberbics commited on
Commit
bb19da4
Β·
verified Β·
1 Parent(s): 70748ba

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +126 -158
app.py CHANGED
@@ -65,13 +65,10 @@ class SafeGeocoder:
65
  self.cache[location] = None
66
  return None
67
 
68
- # NuExtract Functions
69
  def extract_info(template, text):
70
  try:
71
- # Format prompt according to NuExtract-1.5 requirements
72
  prompt = f"<|input|>\n### Template:\n{template}\n### Text:\n{text}\n\n<|output|>"
73
 
74
- # Call API
75
  payload = {
76
  "inputs": prompt,
77
  "parameters": {
@@ -82,7 +79,6 @@ def extract_info(template, text):
82
 
83
  response = requests.post(API_URL, headers=headers, json=payload)
84
 
85
- # If the model is loading, inform the user
86
  if response.status_code == 503:
87
  response_json = response.json()
88
  if "error" in response_json and "loading" in response_json["error"]:
@@ -92,22 +88,18 @@ def extract_info(template, text):
92
  if response.status_code != 200:
93
  return f"❌ API Error: {response.status_code}", response.text
94
 
95
- # Process result
96
  result = response.json()
97
 
98
- # Handle different response formats
99
  if isinstance(result, list) and len(result) > 0:
100
  result_text = result[0].get("generated_text", "")
101
  else:
102
  result_text = str(result)
103
 
104
- # Split at output marker if present
105
  if "<|output|>" in result_text:
106
  json_text = result_text.split("<|output|>")[1].strip()
107
  else:
108
  json_text = result_text
109
 
110
- # Try to parse as JSON
111
  try:
112
  extracted = json.loads(json_text)
113
  formatted = json.dumps(extracted, indent=2)
@@ -119,16 +111,12 @@ def extract_info(template, text):
119
  return f"❌ Error: {str(e)}", "{}"
120
 
121
  def create_map(df, location_col):
122
- # Initialize map with satellite imagery which shows greenery
123
  m = folium.Map(
124
  location=[20, 0],
125
  zoom_start=2,
126
- control_scale=True,
127
- width='100%',
128
- height='100%'
129
  )
130
 
131
- # Add the satellite tile layer - this will show green areas
132
  folium.TileLayer(
133
  tiles=MAP_TILES["GreenMap"]["url"],
134
  attr=MAP_TILES["GreenMap"]["attr"],
@@ -137,11 +125,9 @@ def create_map(df, location_col):
137
  control=False
138
  ).add_to(m)
139
 
140
- # Add plugins
141
  Fullscreen().add_to(m)
142
  MeasureControl(position='topright', primary_length_unit='kilometers').add_to(m)
143
 
144
- # Process markers
145
  geocoder = SafeGeocoder()
146
  coords = []
147
  marker_cluster = MarkerCluster(name="Locations").add_to(m)
@@ -153,13 +139,11 @@ def create_map(df, location_col):
153
 
154
  location = str(row[location_col]).strip()
155
 
156
- # Get additional info
157
  additional_info = ""
158
  for col in df.columns:
159
  if col != location_col and not pd.isna(row[col]):
160
  additional_info += f"<br><b>{col}:</b> {row[col]}"
161
 
162
- # Parse locations
163
  try:
164
  locations = [loc.strip() for loc in location.split(',') if loc.strip()]
165
  if not locations:
@@ -167,7 +151,6 @@ def create_map(df, location_col):
167
  except:
168
  locations = [location]
169
 
170
- # Process each location
171
  for loc in locations:
172
  point = geocoder.get_coords(loc)
173
  if point:
@@ -190,11 +173,9 @@ def create_map(df, location_col):
190
  coords.append(point)
191
  processed_count += 1
192
 
193
- # Set bounds
194
  if coords:
195
  m.fit_bounds(coords)
196
 
197
- # Add custom font CSS and ensure proper styling
198
  custom_css = """
199
  <style>
200
  @import url('https://fonts.googleapis.com/css2?family=Source+Sans+Pro:wght@400;600&display=swap');
@@ -219,7 +200,6 @@ def process_excel(file, places_column):
219
  return None, "No file uploaded", None
220
 
221
  try:
222
- # Handle file
223
  if hasattr(file, 'name'):
224
  df = pd.read_excel(file.name)
225
  elif isinstance(file, bytes):
@@ -232,15 +212,12 @@ def process_excel(file, places_column):
232
  if places_column not in df.columns:
233
  return None, f"Column '{places_column}' not found in the Excel file. Available columns: {', '.join(df.columns)}", None
234
 
235
- # Create map
236
  map_html, processed_count = create_map(df, places_column)
237
 
238
- # Save processed data
239
  with tempfile.NamedTemporaryFile(suffix=".xlsx", delete=False) as tmp:
240
  processed_path = tmp.name
241
  df.to_excel(processed_path, index=False)
242
 
243
- # Stats
244
  total_locations = df[places_column].count()
245
  success_rate = (processed_count / total_locations * 100) if total_locations > 0 else 0
246
 
@@ -253,9 +230,6 @@ def process_excel(file, places_column):
253
  print(f"Error processing file: {e}\n{trace}")
254
  return None, f"Error processing file: {str(e)}", None
255
 
256
- # Create separate interfaces for each tab to avoid conflicts
257
-
258
- # CSS for improved styling
259
  custom_css = """
260
  <style>
261
  @import url('https://fonts.googleapis.com/css2?family=Source+Sans+Pro:wght@300;400;600;700&display=swap');
@@ -299,135 +273,30 @@ h2 {
299
  text-align: center;
300
  transition: all 0.3s ease;
301
  }
302
- </style>
303
- """
304
 
305
- # Text Extraction tab as a separate Blocks interface
306
- with gr.Blocks(css=custom_css) as extraction_interface:
307
- gr.HTML("""
308
- <div class="info-box">
309
- <h3 style="margin-top: 0;">Extract Structured Data from Text</h3>
310
- <p>Use NuExtract-1.5 to automatically extract structured information from historical texts. Define the JSON template for the data you want to extract.</p>
311
- </div>
312
- """)
313
-
314
- with gr.Row():
315
- with gr.Column():
316
- template = gr.Textbox(
317
- label="JSON Template",
318
- value='{"earthquake location": "", "dateline location": ""}',
319
- lines=5
320
- )
321
- text = gr.Textbox(
322
- label="Text to Extract From",
323
- value="Neues Erdbeben in Japan. Aus Tokio wird berichtet, daß in Yokohama bei einem Erdbeben sechs Personen getâtet und 22 verwundet, in Tokio vier getâtet und 22 verwundet wurden. In Yokohama seien 6VV HÀuser zerstârt worden. Die telephonische und telegraphische Verbindung zwischen Tokio und Osaka ist unterbrochen worden. Der Trambahnverkehr in Tokio liegt still. Auch der Eisenbahnverkehr zwischen Tokio und Yokohama ist unterbrochen. In Sngamo, einer Vorstadt von Tokio sind BrÀnde ausgebrochen. Ein Eisenbahnzug stürzte in den Vajugawafluß zwischen Gotemba und Tokio. Sechs Züge wurden umgeworfen. Mit dem letzten japanischen Erdbeben sind seit eineinhalb Jahrtausenden bis heute in Japan 229 grâßere Erdbeben zu verzeichnen gewesen.",
324
- lines=8
325
- )
326
- extract_btn = gr.Button("Extract Information", variant="primary")
327
-
328
- with gr.Column():
329
- status = gr.Textbox(label="Status")
330
- output = gr.Textbox(label="Output", lines=10)
331
-
332
- extract_btn.click(
333
- fn=extract_info,
334
- inputs=[template, text],
335
- outputs=[status, output]
336
- )
337
 
338
- # Mapping tab as a separate Blocks interface
339
- with gr.Blocks(css=custom_css) as mapping_interface:
340
- gr.HTML("""
341
- <div class="info-box">
342
- <h3 style="margin-top: 0;">Map Your Historical Locations</h3>
343
- <p>Upload an Excel file containing location data to create an interactive map visualization. The tool will geocode your locations and display them on a map.</p>
344
- </div>
345
- """)
346
-
347
- with gr.Row():
348
- with gr.Column():
349
- excel_file = gr.File(
350
- label="Upload Excel File",
351
- file_types=[".xlsx", ".xls"],
352
- elem_classes="file-upload-box"
353
- )
354
- places_column = gr.Textbox(
355
- label="Location Column Name",
356
- value="dateline_locations",
357
- placeholder="e.g., 'dateline_locations', 'earthquake_locations', or 'place_of_distribution'"
358
- )
359
- process_btn = gr.Button("Generate Map", variant="primary")
360
-
361
- with gr.Column():
362
- map_output = gr.HTML(
363
- label="Interactive Map",
364
- value="""
365
- <div style="text-align:center; height:70vh; width:100%; display:flex; align-items:center; justify-content:center;
366
- background-color:#f5f5f5; border:1px solid #e0e0e0; border-radius:8px;">
367
- <div>
368
- <img src="https://cdn-icons-png.flaticon.com/512/854/854878.png" width="100">
369
- <p style="margin-top:20px; color:#666;">Your map will appear here after processing</p>
370
- </div>
371
- </div>
372
- """,
373
- elem_id="map_container"
374
- )
375
- stats_output = gr.Textbox(
376
- label="Location Statistics",
377
- lines=2
378
- )
379
- processed_file = gr.File(
380
- label="Download Processed Data",
381
- visible=True,
382
- interactive=False
383
- )
384
-
385
- def process_and_map(file, column):
386
- if file is None:
387
- return None, "Please upload an Excel file", None
388
-
389
- try:
390
- map_html, stats, processed_path = process_excel(file, column)
391
-
392
- if map_html and processed_path:
393
- # Create responsive container for the map that fills the available space
394
- # Remove extra height that was causing empty space below
395
- responsive_html = f"""
396
- <div style="width:100%; height:60vh; margin:0 0 -150px 0; padding:0; border:1px solid #e0e0e0; border-radius:8px; overflow:hidden; position:relative;">
397
- <style>
398
- /* Fix map sizing issues */
399
- .leaflet-container {{
400
- width: 100% !important;
401
- height: 100% !important;
402
- position: absolute !important;
403
- top: 0;
404
- left: 0;
405
- }}
406
- /* Hide any overflow that might cause extra space */
407
- #map_container {{
408
- overflow: hidden !important;
409
- margin-bottom: 0 !important;
410
- }}
411
- </style>
412
- {map_html}
413
- </div>
414
- """
415
- return responsive_html, stats, processed_path
416
- else:
417
- return None, stats, None
418
- except Exception as e:
419
- import traceback
420
- trace = traceback.format_exc()
421
- print(f"Error in process_and_map: {e}\n{trace}")
422
- return None, f"Error: {str(e)}", None
423
-
424
- process_btn.click(
425
- fn=process_and_map,
426
- inputs=[excel_file, places_column],
427
- outputs=[map_output, stats_output, processed_file]
428
- )
429
 
430
- # Main app with proper tab separation
431
  with gr.Blocks(css=custom_css, title="Historical Data Analysis") as demo:
432
  gr.HTML("""
433
  <div style="text-align: center; margin-bottom: 1rem">
@@ -438,16 +307,115 @@ with gr.Blocks(css=custom_css, title="Historical Data Analysis") as demo:
438
 
439
  with gr.Tabs() as tabs:
440
  with gr.TabItem("πŸ” Text Extraction"):
441
- # Instead of duplicating content, use the interface
442
- extraction_interface.render()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
443
 
 
 
 
 
 
 
444
  with gr.TabItem("πŸ“ Location Mapping"):
445
- # Instead of duplicating content, use the interface
446
- mapping_interface.render()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
447
 
448
  gr.HTML("""
449
  <div style="text-align: center; margin-top: 2rem; padding-top: 1rem; border-top: 1px solid #eee; font-size: 0.9rem; color: #666;">
450
- <p>Made with <span style="color: #e25555;">❀</span> for historical data research</p>
451
  </div>
452
  """)
453
 
 
65
  self.cache[location] = None
66
  return None
67
 
 
68
  def extract_info(template, text):
69
  try:
 
70
  prompt = f"<|input|>\n### Template:\n{template}\n### Text:\n{text}\n\n<|output|>"
71
 
 
72
  payload = {
73
  "inputs": prompt,
74
  "parameters": {
 
79
 
80
  response = requests.post(API_URL, headers=headers, json=payload)
81
 
 
82
  if response.status_code == 503:
83
  response_json = response.json()
84
  if "error" in response_json and "loading" in response_json["error"]:
 
88
  if response.status_code != 200:
89
  return f"❌ API Error: {response.status_code}", response.text
90
 
 
91
  result = response.json()
92
 
 
93
  if isinstance(result, list) and len(result) > 0:
94
  result_text = result[0].get("generated_text", "")
95
  else:
96
  result_text = str(result)
97
 
 
98
  if "<|output|>" in result_text:
99
  json_text = result_text.split("<|output|>")[1].strip()
100
  else:
101
  json_text = result_text
102
 
 
103
  try:
104
  extracted = json.loads(json_text)
105
  formatted = json.dumps(extracted, indent=2)
 
111
  return f"❌ Error: {str(e)}", "{}"
112
 
113
  def create_map(df, location_col):
 
114
  m = folium.Map(
115
  location=[20, 0],
116
  zoom_start=2,
117
+ control_scale=True
 
 
118
  )
119
 
 
120
  folium.TileLayer(
121
  tiles=MAP_TILES["GreenMap"]["url"],
122
  attr=MAP_TILES["GreenMap"]["attr"],
 
125
  control=False
126
  ).add_to(m)
127
 
 
128
  Fullscreen().add_to(m)
129
  MeasureControl(position='topright', primary_length_unit='kilometers').add_to(m)
130
 
 
131
  geocoder = SafeGeocoder()
132
  coords = []
133
  marker_cluster = MarkerCluster(name="Locations").add_to(m)
 
139
 
140
  location = str(row[location_col]).strip()
141
 
 
142
  additional_info = ""
143
  for col in df.columns:
144
  if col != location_col and not pd.isna(row[col]):
145
  additional_info += f"<br><b>{col}:</b> {row[col]}"
146
 
 
147
  try:
148
  locations = [loc.strip() for loc in location.split(',') if loc.strip()]
149
  if not locations:
 
151
  except:
152
  locations = [location]
153
 
 
154
  for loc in locations:
155
  point = geocoder.get_coords(loc)
156
  if point:
 
173
  coords.append(point)
174
  processed_count += 1
175
 
 
176
  if coords:
177
  m.fit_bounds(coords)
178
 
 
179
  custom_css = """
180
  <style>
181
  @import url('https://fonts.googleapis.com/css2?family=Source+Sans+Pro:wght@400;600&display=swap');
 
200
  return None, "No file uploaded", None
201
 
202
  try:
 
203
  if hasattr(file, 'name'):
204
  df = pd.read_excel(file.name)
205
  elif isinstance(file, bytes):
 
212
  if places_column not in df.columns:
213
  return None, f"Column '{places_column}' not found in the Excel file. Available columns: {', '.join(df.columns)}", None
214
 
 
215
  map_html, processed_count = create_map(df, places_column)
216
 
 
217
  with tempfile.NamedTemporaryFile(suffix=".xlsx", delete=False) as tmp:
218
  processed_path = tmp.name
219
  df.to_excel(processed_path, index=False)
220
 
 
221
  total_locations = df[places_column].count()
222
  success_rate = (processed_count / total_locations * 100) if total_locations > 0 else 0
223
 
 
230
  print(f"Error processing file: {e}\n{trace}")
231
  return None, f"Error processing file: {str(e)}", None
232
 
 
 
 
233
  custom_css = """
234
  <style>
235
  @import url('https://fonts.googleapis.com/css2?family=Source+Sans+Pro:wght@300;400;600;700&display=swap');
 
273
  text-align: center;
274
  transition: all 0.3s ease;
275
  }
 
 
276
 
277
+ /* Fix for map container spacing */
278
+ #map-container {
279
+ height: 65vh !important;
280
+ margin-bottom: 0 !important;
281
+ padding-bottom: 0 !important;
282
+ }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
283
 
284
+ /* Stats box styling */
285
+ .stats-box {
286
+ margin-top: 10px !important;
287
+ margin-bottom: 0 !important;
288
+ padding: 10px;
289
+ background: #f8f9fa;
290
+ border-radius: 4px;
291
+ }
292
+
293
+ /* Remove extra space around components */
294
+ .gr-box {
295
+ margin-bottom: 0 !important;
296
+ }
297
+ </style>
298
+ """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
299
 
 
300
  with gr.Blocks(css=custom_css, title="Historical Data Analysis") as demo:
301
  gr.HTML("""
302
  <div style="text-align: center; margin-bottom: 1rem">
 
307
 
308
  with gr.Tabs() as tabs:
309
  with gr.TabItem("πŸ” Text Extraction"):
310
+ gr.HTML("""
311
+ <div class="info-box">
312
+ <h3 style="margin-top: 0;">Extract Structured Data from Text</h3>
313
+ <p>Use NuExtract-1.5 to automatically extract structured information from historical texts.</p>
314
+ </div>
315
+ """)
316
+
317
+ with gr.Row():
318
+ with gr.Column():
319
+ template = gr.Textbox(
320
+ label="JSON Template",
321
+ value='{"earthquake location": "", "dateline location": ""}',
322
+ lines=5
323
+ )
324
+ text = gr.Textbox(
325
+ label="Text to Extract From",
326
+ value="Neues Erdbeben in Japan. Aus Tokio wird berichtet...",
327
+ lines=8
328
+ )
329
+ extract_btn = gr.Button("Extract Information", variant="primary")
330
+
331
+ with gr.Column():
332
+ status = gr.Textbox(label="Status")
333
+ output = gr.Textbox(label="Output", lines=10)
334
 
335
+ extract_btn.click(
336
+ fn=extract_info,
337
+ inputs=[template, text],
338
+ outputs=[status, output]
339
+ )
340
+
341
  with gr.TabItem("πŸ“ Location Mapping"):
342
+ gr.HTML("""
343
+ <div class="info-box">
344
+ <h3 style="margin-top: 0;">Map Your Historical Locations</h3>
345
+ <p>Upload an Excel file containing location data to create an interactive map visualization.</p>
346
+ </div>
347
+ """)
348
+
349
+ with gr.Row():
350
+ with gr.Column():
351
+ excel_file = gr.File(
352
+ label="Upload Excel File",
353
+ file_types=[".xlsx", ".xls"],
354
+ elem_classes="file-upload-box"
355
+ )
356
+ places_column = gr.Textbox(
357
+ label="Location Column Name",
358
+ value="dateline_locations",
359
+ placeholder="Enter the column containing locations"
360
+ )
361
+ process_btn = gr.Button("Generate Map", variant="primary")
362
+
363
+ with gr.Column():
364
+ map_output = gr.HTML(
365
+ label="Interactive Map",
366
+ value="""
367
+ <div style="text-align:center; height:65vh; width:100%; display:flex; align-items:center; justify-content:center;
368
+ background-color:#f5f5f5; border:1px solid #e0e0e0; border-radius:8px;">
369
+ <div>
370
+ <img src="https://cdn-icons-png.flaticon.com/512/854/854878.png" width="100">
371
+ <p style="margin-top:20px; color:#666;">Your map will appear here after processing</p>
372
+ </div>
373
+ </div>
374
+ """,
375
+ elem_id="map-container"
376
+ )
377
+ stats_output = gr.Textbox(
378
+ label="Location Statistics",
379
+ lines=2,
380
+ elem_classes="stats-box"
381
+ )
382
+ processed_file = gr.File(
383
+ label="Download Processed Data",
384
+ visible=True,
385
+ interactive=False
386
+ )
387
+
388
+ def process_and_map(file, column):
389
+ if file is None:
390
+ return None, "Please upload an Excel file", None
391
+
392
+ try:
393
+ map_html, stats, processed_path = process_excel(file, column)
394
+
395
+ if map_html and processed_path:
396
+ responsive_html = f"""
397
+ <div style="width:100%; height:65vh; margin:0; padding:0; border:1px solid #e0e0e0; border-radius:8px; overflow:hidden;">
398
+ {map_html}
399
+ </div>
400
+ """
401
+ return responsive_html, stats, processed_path
402
+ else:
403
+ return None, stats, None
404
+ except Exception as e:
405
+ import traceback
406
+ trace = traceback.format_exc()
407
+ print(f"Error in process_and_map: {e}\n{trace}")
408
+ return None, f"Error: {str(e)}", None
409
+
410
+ process_btn.click(
411
+ fn=process_and_map,
412
+ inputs=[excel_file, places_column],
413
+ outputs=[map_output, stats_output, processed_file]
414
+ )
415
 
416
  gr.HTML("""
417
  <div style="text-align: center; margin-top: 2rem; padding-top: 1rem; border-top: 1px solid #eee; font-size: 0.9rem; color: #666;">
418
+ <p>Made with <span style="color: #e25555;">❀</span> for historical research</p>
419
  </div>
420
  """)
421