Ashwin V. Mohanan commited on
Commit
ee45a15
·
1 Parent(s): 9606423

Allow file upload

Browse files
Files changed (3) hide show
  1. .gitignore +2 -0
  2. app/tabs/submit.py +106 -30
  3. uv.lock +0 -0
.gitignore CHANGED
@@ -11,3 +11,5 @@ wheels/
11
  dawsonia.log
12
  output/*/
13
  .gradio_cache
 
 
 
11
  dawsonia.log
12
  output/*/
13
  .gradio_cache
14
+ data/
15
+ dawsonia.log.*
app/tabs/submit.py CHANGED
@@ -2,7 +2,7 @@ import json
2
  import logging
3
  import os
4
  from pathlib import Path
5
- import time
6
  import warnings
7
 
8
  from PIL import Image
@@ -16,7 +16,6 @@ import numpy as np
16
  from numpy.typing import NDArray
17
  import pandas as pd
18
  import pooch
19
- import yaml
20
 
21
  from .visualizer import Page, TableCell
22
 
@@ -50,7 +49,8 @@ def run_dawsonia(
50
  first_page,
51
  last_page,
52
  prob_thresh,
53
- book,
 
54
  gallery,
55
  progress=gr.Progress(),
56
  ):
@@ -61,11 +61,13 @@ def run_dawsonia(
61
 
62
  model_path = Path("data/models/dawsonia/2024-07-02")
63
  output_path = Path("output")
 
64
 
65
  print("Dawsonia: digitizing", book)
66
  table_fmt = book.table_format
67
 
68
- output_path_book = output_path / book.station_name
 
69
  output_path_book.mkdir(exist_ok=True, parents=True)
70
  (output_path_book / "probablities").mkdir(exist_ok=True)
71
 
@@ -111,6 +113,7 @@ def run_dawsonia(
111
  prob_thresh,
112
  progress,
113
  progress_value,
 
114
  ): # , im_from_gallery[0])
115
  page, im = results
116
  collection.append(page)
@@ -118,6 +121,10 @@ def run_dawsonia(
118
  else:
119
  gr.Info(f"No tables detected in {page_number = }")
120
 
 
 
 
 
121
  gr.Info("Pages were succesfully digitized ✨")
122
 
123
  # yield collection, images
@@ -130,6 +137,7 @@ def read_page(
130
  prob_thresh: float,
131
  progress,
132
  progress_value,
 
133
  im_path_from_gallery: str = "",
134
  ):
135
  stats = digitize.Statistics.from_json(
@@ -153,7 +161,9 @@ def read_page(
153
 
154
  values_array = values_df.values.flatten()
155
  prob_array = prob_df.values.flatten()
156
- bbox_array = np.hstack(table_meta["table_positions"]).reshape(-1, 4)
 
 
157
  cells = [
158
  make_cell(value, bbox)
159
  for value, prob, bbox in zip(values_array, prob_array, bbox_array)
@@ -183,12 +193,13 @@ def all_example_images() -> list[str]:
183
 
184
  def get_selected_example_image(
185
  first_page, last_page, event: gr.SelectData
186
- ) -> tuple[str, io.Book, str] | None:
187
  """
188
  Get the name of the pipeline that corresponds to the selected image.
189
  """
 
190
  # for name, details in PIPELINES.items():
191
- name, _ext = event.value["image"]["orig_name"].split(".")
192
 
193
  station_tf = Path("table_formats", name).with_suffix(".toml")
194
 
@@ -204,10 +215,29 @@ def get_selected_example_image(
204
  [book.read_image(pg) for pg in range(first_page, last_page)],
205
  book,
206
  book_path,
 
207
  station_tf.read_text(),
208
  )
209
 
210
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
211
  def overwrite_table_format_file(book: io.Book, book_path, table_fmt: str):
212
  name = book.station_name
213
  table_fmt_dir = Path("table_formats")
@@ -231,24 +261,55 @@ with gr.Blocks() as submit:
231
  with gr.Column(scale=5):
232
  batch_image_gallery = gr.Gallery(
233
  # file_types=[".pdf", ".zarr.zip"],
234
- label="Book to digitize (should be a .pdf or .zarr.zip file)",
235
- interactive=True,
236
- object_fit="scale-down",
237
- scale=1.0,
238
  )
239
 
240
  with gr.Column(scale=2):
241
- first_page = gr.Number(3, label="First page of the book", precision=0)
242
- last_page = gr.Number(5, label="Last page of the book", precision=0)
 
 
 
 
 
 
243
  examples = gr.Gallery(
244
  all_example_images(),
245
- label="Examples",
246
  interactive=False,
247
  allow_preview=False,
248
  object_fit="scale-down",
249
  min_width=250,
 
 
 
 
 
 
250
  )
251
- upload_button = gr.UploadButton(min_width=200)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
252
 
253
  with Modal(visible=False) as edit_table_fmt_modal:
254
  with gr.Column():
@@ -295,26 +356,40 @@ with gr.Blocks() as submit:
295
  batch_image_gallery,
296
  batch_book_state,
297
  batch_book_path_state,
 
298
  table_fmt_config_override,
299
  ),
300
  trigger_mode="always_last",
301
  )
302
 
303
- @batch_image_gallery.upload(
304
- inputs=batch_image_gallery,
305
- outputs=[batch_image_gallery],
 
 
 
 
 
 
 
 
306
  )
307
- def validate_images(images):
308
- print(images)
309
- if len(images) > MAX_IMAGES:
310
- gr.Warning(f"Maximum images you can upload is set to: {MAX_IMAGES}")
311
- return gr.update(value=None)
312
-
313
- gr.Warning(
314
- "Digitizing uploaded images is not implemented yet! Work in progress!"
315
- )
316
- raise NotImplementedError("WIP")
317
- return images
 
 
 
 
 
318
 
319
  run_button.click(
320
  fn=run_dawsonia,
@@ -324,6 +399,7 @@ with gr.Blocks() as submit:
324
  last_page,
325
  prob_thresh,
326
  batch_book_state,
 
327
  batch_image_gallery,
328
  ),
329
  outputs=(collection_submit_state, batch_image_gallery),
 
2
  import logging
3
  import os
4
  from pathlib import Path
5
+ import shutil
6
  import warnings
7
 
8
  from PIL import Image
 
16
  from numpy.typing import NDArray
17
  import pandas as pd
18
  import pooch
 
19
 
20
  from .visualizer import Page, TableCell
21
 
 
49
  first_page,
50
  last_page,
51
  prob_thresh,
52
+ book: io.Book,
53
+ book_path,
54
  gallery,
55
  progress=gr.Progress(),
56
  ):
 
61
 
62
  model_path = Path("data/models/dawsonia/2024-07-02")
63
  output_path = Path("output")
64
+ output_path.mkdir(exist_ok=True)
65
 
66
  print("Dawsonia: digitizing", book)
67
  table_fmt = book.table_format
68
 
69
+ final_output_path_book = output_path / book.station_name
70
+ output_path_book = Path(book_path).parent / "output"
71
  output_path_book.mkdir(exist_ok=True, parents=True)
72
  (output_path_book / "probablities").mkdir(exist_ok=True)
73
 
 
113
  prob_thresh,
114
  progress,
115
  progress_value,
116
+ table_fmt.preproc.idx_tables_size_verify,
117
  ): # , im_from_gallery[0])
118
  page, im = results
119
  collection.append(page)
 
121
  else:
122
  gr.Info(f"No tables detected in {page_number = }")
123
 
124
+ if final_output_path_book.exists():
125
+ shutil.rmtree(final_output_path_book)
126
+
127
+ shutil.copytree(output_path_book, final_output_path_book)
128
  gr.Info("Pages were succesfully digitized ✨")
129
 
130
  # yield collection, images
 
137
  prob_thresh: float,
138
  progress,
139
  progress_value,
140
+ idx_tables_size_verify: list[int],
141
  im_path_from_gallery: str = "",
142
  ):
143
  stats = digitize.Statistics.from_json(
 
161
 
162
  values_array = values_df.values.flatten()
163
  prob_array = prob_df.values.flatten()
164
+ # FIXME: hardcoded. Use idx_tables_size_verify and reconstruct bbox_array
165
+ bbox_array = np.hstack(table_meta["table_positions"][:2]).reshape(-1, 4)\
166
+
167
  cells = [
168
  make_cell(value, bbox)
169
  for value, prob, bbox in zip(values_array, prob_array, bbox_array)
 
193
 
194
  def get_selected_example_image(
195
  first_page, last_page, event: gr.SelectData
196
+ ) -> tuple[list[Image.Image], io.Book, str, str, str] | None:
197
  """
198
  Get the name of the pipeline that corresponds to the selected image.
199
  """
200
+ orig_name = event.value["image"]["orig_name"]
201
  # for name, details in PIPELINES.items():
202
+ name, _ext = orig_name.split(".")
203
 
204
  station_tf = Path("table_formats", name).with_suffix(".toml")
205
 
 
215
  [book.read_image(pg) for pg in range(first_page, last_page)],
216
  book,
217
  book_path,
218
+ station_tf.name,
219
  station_tf.read_text(),
220
  )
221
 
222
+ def get_uploaded_image(
223
+ first_page:int, last_page:int, table_fmt_filename:str, filename: str
224
+ ) -> tuple[list[NDArray], io.Book, str, str] | None:
225
+
226
+ name, _ext = filename.split(".")
227
+ station_tf = Path("table_formats", table_fmt_filename)
228
+ if not station_tf.exists():
229
+ station_tf = Path("table_formats", "bjuröklubb.toml")
230
+
231
+ first, last, book = io.read_book(Path(filename))
232
+ book._name = name
233
+ book.size_cell = [1.0, 1.0, 1.0, 1.0]
234
+ return (
235
+ [book.read_page(pg) for pg in range(first_page, last_page)],
236
+ book,
237
+ filename,
238
+ station_tf.read_text(),
239
+ )
240
+
241
  def overwrite_table_format_file(book: io.Book, book_path, table_fmt: str):
242
  name = book.station_name
243
  table_fmt_dir = Path("table_formats")
 
261
  with gr.Column(scale=5):
262
  batch_image_gallery = gr.Gallery(
263
  # file_types=[".pdf", ".zarr.zip"],
264
+ label="Preview",
265
+ interactive=False,
266
+ object_fit="contain",
267
+ # scale=0.8,
268
  )
269
 
270
  with gr.Column(scale=2):
271
+ first_page = gr.Number(3, label="First page", precision=0,)
272
+ last_page = gr.Number(5, label="Last page", precision=0,)
273
+ table_fmt_filename = gr.Dropdown(
274
+ [f.name for f in Path("table_formats").iterdir()],
275
+ interactive=True,
276
+ label="Select Table Format",
277
+ )
278
+
279
  examples = gr.Gallery(
280
  all_example_images(),
281
+ label="1a. Choose from the examples below, or",
282
  interactive=False,
283
  allow_preview=False,
284
  object_fit="scale-down",
285
  min_width=250,
286
+ height=160,
287
+ )
288
+
289
+ upload_file = gr.File(
290
+ label="1b. Upload a .pdf or .zarr.zip file",
291
+ file_types=[".pdf", ".zarr.zip"],
292
  )
293
+
294
+ # upload_file_true_path = gr.Textbox(visible=False)
295
+
296
+ def move_uploaded_file(uploaded, table_fmt_filename):
297
+ current_directory = Path(uploaded).parent
298
+
299
+ # Define the target directory where you want to save the uploaded files
300
+ target_directory = current_directory / table_fmt_filename.removesuffix(".toml")
301
+ os.makedirs(target_directory, exist_ok=True)
302
+
303
+ # Move the uploaded file to the target directory
304
+ true_path = Path(target_directory / Path(uploaded).name)
305
+ # if true_path.exists():
306
+ # true_path.unlink()
307
+
308
+ shutil.copy2(uploaded, true_path)
309
+ print(f"Copy created", true_path)
310
+ return str(true_path)
311
+
312
+ upload_button = gr.Button(value="Upload", min_width=200)
313
 
314
  with Modal(visible=False) as edit_table_fmt_modal:
315
  with gr.Column():
 
356
  batch_image_gallery,
357
  batch_book_state,
358
  batch_book_path_state,
359
+ table_fmt_filename,
360
  table_fmt_config_override,
361
  ),
362
  trigger_mode="always_last",
363
  )
364
 
365
+ upload_file.upload(move_uploaded_file, inputs=[upload_file, table_fmt_filename], outputs=batch_book_path_state)
366
+
367
+ upload_button.click(
368
+ get_uploaded_image,
369
+ (first_page, last_page, table_fmt_filename, batch_book_path_state),
370
+ (
371
+ batch_image_gallery,
372
+ batch_book_state,
373
+ batch_book_path_state,
374
+ table_fmt_config_override,
375
+ ),
376
  )
377
+
378
+ # @batch_image_gallery.upload(
379
+ # inputs=batch_image_gallery,
380
+ # outputs=[batch_image_gallery],
381
+ # )
382
+ # def validate_images(images):
383
+ # print(images)
384
+ # if len(images) > MAX_IMAGES:
385
+ # gr.Warning(f"Maximum images you can upload is set to: {MAX_IMAGES}")
386
+ # return gr.update(value=None)
387
+
388
+ # gr.Warning(
389
+ # "Digitizing uploaded images is not implemented yet! Work in progress!"
390
+ # )
391
+ # raise NotImplementedError("WIP")
392
+ # return images
393
 
394
  run_button.click(
395
  fn=run_dawsonia,
 
399
  last_page,
400
  prob_thresh,
401
  batch_book_state,
402
+ batch_book_path_state,
403
  batch_image_gallery,
404
  ),
405
  outputs=(collection_submit_state, batch_image_gallery),
uv.lock CHANGED
The diff for this file is too large to render. See raw diff