Spaces:
Running
Running
Ashwin V. Mohanan
commited on
Commit
·
ee45a15
1
Parent(s):
9606423
Allow file upload
Browse files- .gitignore +2 -0
- app/tabs/submit.py +106 -30
- uv.lock +0 -0
.gitignore
CHANGED
@@ -11,3 +11,5 @@ wheels/
|
|
11 |
dawsonia.log
|
12 |
output/*/
|
13 |
.gradio_cache
|
|
|
|
|
|
11 |
dawsonia.log
|
12 |
output/*/
|
13 |
.gradio_cache
|
14 |
+
data/
|
15 |
+
dawsonia.log.*
|
app/tabs/submit.py
CHANGED
@@ -2,7 +2,7 @@ import json
|
|
2 |
import logging
|
3 |
import os
|
4 |
from pathlib import Path
|
5 |
-
import
|
6 |
import warnings
|
7 |
|
8 |
from PIL import Image
|
@@ -16,7 +16,6 @@ import numpy as np
|
|
16 |
from numpy.typing import NDArray
|
17 |
import pandas as pd
|
18 |
import pooch
|
19 |
-
import yaml
|
20 |
|
21 |
from .visualizer import Page, TableCell
|
22 |
|
@@ -50,7 +49,8 @@ def run_dawsonia(
|
|
50 |
first_page,
|
51 |
last_page,
|
52 |
prob_thresh,
|
53 |
-
book,
|
|
|
54 |
gallery,
|
55 |
progress=gr.Progress(),
|
56 |
):
|
@@ -61,11 +61,13 @@ def run_dawsonia(
|
|
61 |
|
62 |
model_path = Path("data/models/dawsonia/2024-07-02")
|
63 |
output_path = Path("output")
|
|
|
64 |
|
65 |
print("Dawsonia: digitizing", book)
|
66 |
table_fmt = book.table_format
|
67 |
|
68 |
-
|
|
|
69 |
output_path_book.mkdir(exist_ok=True, parents=True)
|
70 |
(output_path_book / "probablities").mkdir(exist_ok=True)
|
71 |
|
@@ -111,6 +113,7 @@ def run_dawsonia(
|
|
111 |
prob_thresh,
|
112 |
progress,
|
113 |
progress_value,
|
|
|
114 |
): # , im_from_gallery[0])
|
115 |
page, im = results
|
116 |
collection.append(page)
|
@@ -118,6 +121,10 @@ def run_dawsonia(
|
|
118 |
else:
|
119 |
gr.Info(f"No tables detected in {page_number = }")
|
120 |
|
|
|
|
|
|
|
|
|
121 |
gr.Info("Pages were succesfully digitized ✨")
|
122 |
|
123 |
# yield collection, images
|
@@ -130,6 +137,7 @@ def read_page(
|
|
130 |
prob_thresh: float,
|
131 |
progress,
|
132 |
progress_value,
|
|
|
133 |
im_path_from_gallery: str = "",
|
134 |
):
|
135 |
stats = digitize.Statistics.from_json(
|
@@ -153,7 +161,9 @@ def read_page(
|
|
153 |
|
154 |
values_array = values_df.values.flatten()
|
155 |
prob_array = prob_df.values.flatten()
|
156 |
-
|
|
|
|
|
157 |
cells = [
|
158 |
make_cell(value, bbox)
|
159 |
for value, prob, bbox in zip(values_array, prob_array, bbox_array)
|
@@ -183,12 +193,13 @@ def all_example_images() -> list[str]:
|
|
183 |
|
184 |
def get_selected_example_image(
|
185 |
first_page, last_page, event: gr.SelectData
|
186 |
-
) -> tuple[
|
187 |
"""
|
188 |
Get the name of the pipeline that corresponds to the selected image.
|
189 |
"""
|
|
|
190 |
# for name, details in PIPELINES.items():
|
191 |
-
name, _ext =
|
192 |
|
193 |
station_tf = Path("table_formats", name).with_suffix(".toml")
|
194 |
|
@@ -204,10 +215,29 @@ def get_selected_example_image(
|
|
204 |
[book.read_image(pg) for pg in range(first_page, last_page)],
|
205 |
book,
|
206 |
book_path,
|
|
|
207 |
station_tf.read_text(),
|
208 |
)
|
209 |
|
210 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
211 |
def overwrite_table_format_file(book: io.Book, book_path, table_fmt: str):
|
212 |
name = book.station_name
|
213 |
table_fmt_dir = Path("table_formats")
|
@@ -231,24 +261,55 @@ with gr.Blocks() as submit:
|
|
231 |
with gr.Column(scale=5):
|
232 |
batch_image_gallery = gr.Gallery(
|
233 |
# file_types=[".pdf", ".zarr.zip"],
|
234 |
-
label="
|
235 |
-
interactive=
|
236 |
-
object_fit="
|
237 |
-
scale=
|
238 |
)
|
239 |
|
240 |
with gr.Column(scale=2):
|
241 |
-
first_page = gr.Number(3, label="First page
|
242 |
-
last_page = gr.Number(5, label="Last page
|
|
|
|
|
|
|
|
|
|
|
|
|
243 |
examples = gr.Gallery(
|
244 |
all_example_images(),
|
245 |
-
label="
|
246 |
interactive=False,
|
247 |
allow_preview=False,
|
248 |
object_fit="scale-down",
|
249 |
min_width=250,
|
|
|
|
|
|
|
|
|
|
|
|
|
250 |
)
|
251 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
252 |
|
253 |
with Modal(visible=False) as edit_table_fmt_modal:
|
254 |
with gr.Column():
|
@@ -295,26 +356,40 @@ with gr.Blocks() as submit:
|
|
295 |
batch_image_gallery,
|
296 |
batch_book_state,
|
297 |
batch_book_path_state,
|
|
|
298 |
table_fmt_config_override,
|
299 |
),
|
300 |
trigger_mode="always_last",
|
301 |
)
|
302 |
|
303 |
-
|
304 |
-
|
305 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
306 |
)
|
307 |
-
|
308 |
-
|
309 |
-
|
310 |
-
|
311 |
-
|
312 |
-
|
313 |
-
|
314 |
-
|
315 |
-
|
316 |
-
|
317 |
-
|
|
|
|
|
|
|
|
|
|
|
318 |
|
319 |
run_button.click(
|
320 |
fn=run_dawsonia,
|
@@ -324,6 +399,7 @@ with gr.Blocks() as submit:
|
|
324 |
last_page,
|
325 |
prob_thresh,
|
326 |
batch_book_state,
|
|
|
327 |
batch_image_gallery,
|
328 |
),
|
329 |
outputs=(collection_submit_state, batch_image_gallery),
|
|
|
2 |
import logging
|
3 |
import os
|
4 |
from pathlib import Path
|
5 |
+
import shutil
|
6 |
import warnings
|
7 |
|
8 |
from PIL import Image
|
|
|
16 |
from numpy.typing import NDArray
|
17 |
import pandas as pd
|
18 |
import pooch
|
|
|
19 |
|
20 |
from .visualizer import Page, TableCell
|
21 |
|
|
|
49 |
first_page,
|
50 |
last_page,
|
51 |
prob_thresh,
|
52 |
+
book: io.Book,
|
53 |
+
book_path,
|
54 |
gallery,
|
55 |
progress=gr.Progress(),
|
56 |
):
|
|
|
61 |
|
62 |
model_path = Path("data/models/dawsonia/2024-07-02")
|
63 |
output_path = Path("output")
|
64 |
+
output_path.mkdir(exist_ok=True)
|
65 |
|
66 |
print("Dawsonia: digitizing", book)
|
67 |
table_fmt = book.table_format
|
68 |
|
69 |
+
final_output_path_book = output_path / book.station_name
|
70 |
+
output_path_book = Path(book_path).parent / "output"
|
71 |
output_path_book.mkdir(exist_ok=True, parents=True)
|
72 |
(output_path_book / "probablities").mkdir(exist_ok=True)
|
73 |
|
|
|
113 |
prob_thresh,
|
114 |
progress,
|
115 |
progress_value,
|
116 |
+
table_fmt.preproc.idx_tables_size_verify,
|
117 |
): # , im_from_gallery[0])
|
118 |
page, im = results
|
119 |
collection.append(page)
|
|
|
121 |
else:
|
122 |
gr.Info(f"No tables detected in {page_number = }")
|
123 |
|
124 |
+
if final_output_path_book.exists():
|
125 |
+
shutil.rmtree(final_output_path_book)
|
126 |
+
|
127 |
+
shutil.copytree(output_path_book, final_output_path_book)
|
128 |
gr.Info("Pages were succesfully digitized ✨")
|
129 |
|
130 |
# yield collection, images
|
|
|
137 |
prob_thresh: float,
|
138 |
progress,
|
139 |
progress_value,
|
140 |
+
idx_tables_size_verify: list[int],
|
141 |
im_path_from_gallery: str = "",
|
142 |
):
|
143 |
stats = digitize.Statistics.from_json(
|
|
|
161 |
|
162 |
values_array = values_df.values.flatten()
|
163 |
prob_array = prob_df.values.flatten()
|
164 |
+
# FIXME: hardcoded. Use idx_tables_size_verify and reconstruct bbox_array
|
165 |
+
bbox_array = np.hstack(table_meta["table_positions"][:2]).reshape(-1, 4)\
|
166 |
+
|
167 |
cells = [
|
168 |
make_cell(value, bbox)
|
169 |
for value, prob, bbox in zip(values_array, prob_array, bbox_array)
|
|
|
193 |
|
194 |
def get_selected_example_image(
|
195 |
first_page, last_page, event: gr.SelectData
|
196 |
+
) -> tuple[list[Image.Image], io.Book, str, str, str] | None:
|
197 |
"""
|
198 |
Get the name of the pipeline that corresponds to the selected image.
|
199 |
"""
|
200 |
+
orig_name = event.value["image"]["orig_name"]
|
201 |
# for name, details in PIPELINES.items():
|
202 |
+
name, _ext = orig_name.split(".")
|
203 |
|
204 |
station_tf = Path("table_formats", name).with_suffix(".toml")
|
205 |
|
|
|
215 |
[book.read_image(pg) for pg in range(first_page, last_page)],
|
216 |
book,
|
217 |
book_path,
|
218 |
+
station_tf.name,
|
219 |
station_tf.read_text(),
|
220 |
)
|
221 |
|
222 |
+
def get_uploaded_image(
|
223 |
+
first_page:int, last_page:int, table_fmt_filename:str, filename: str
|
224 |
+
) -> tuple[list[NDArray], io.Book, str, str] | None:
|
225 |
+
|
226 |
+
name, _ext = filename.split(".")
|
227 |
+
station_tf = Path("table_formats", table_fmt_filename)
|
228 |
+
if not station_tf.exists():
|
229 |
+
station_tf = Path("table_formats", "bjuröklubb.toml")
|
230 |
+
|
231 |
+
first, last, book = io.read_book(Path(filename))
|
232 |
+
book._name = name
|
233 |
+
book.size_cell = [1.0, 1.0, 1.0, 1.0]
|
234 |
+
return (
|
235 |
+
[book.read_page(pg) for pg in range(first_page, last_page)],
|
236 |
+
book,
|
237 |
+
filename,
|
238 |
+
station_tf.read_text(),
|
239 |
+
)
|
240 |
+
|
241 |
def overwrite_table_format_file(book: io.Book, book_path, table_fmt: str):
|
242 |
name = book.station_name
|
243 |
table_fmt_dir = Path("table_formats")
|
|
|
261 |
with gr.Column(scale=5):
|
262 |
batch_image_gallery = gr.Gallery(
|
263 |
# file_types=[".pdf", ".zarr.zip"],
|
264 |
+
label="Preview",
|
265 |
+
interactive=False,
|
266 |
+
object_fit="contain",
|
267 |
+
# scale=0.8,
|
268 |
)
|
269 |
|
270 |
with gr.Column(scale=2):
|
271 |
+
first_page = gr.Number(3, label="First page", precision=0,)
|
272 |
+
last_page = gr.Number(5, label="Last page", precision=0,)
|
273 |
+
table_fmt_filename = gr.Dropdown(
|
274 |
+
[f.name for f in Path("table_formats").iterdir()],
|
275 |
+
interactive=True,
|
276 |
+
label="Select Table Format",
|
277 |
+
)
|
278 |
+
|
279 |
examples = gr.Gallery(
|
280 |
all_example_images(),
|
281 |
+
label="1a. Choose from the examples below, or",
|
282 |
interactive=False,
|
283 |
allow_preview=False,
|
284 |
object_fit="scale-down",
|
285 |
min_width=250,
|
286 |
+
height=160,
|
287 |
+
)
|
288 |
+
|
289 |
+
upload_file = gr.File(
|
290 |
+
label="1b. Upload a .pdf or .zarr.zip file",
|
291 |
+
file_types=[".pdf", ".zarr.zip"],
|
292 |
)
|
293 |
+
|
294 |
+
# upload_file_true_path = gr.Textbox(visible=False)
|
295 |
+
|
296 |
+
def move_uploaded_file(uploaded, table_fmt_filename):
|
297 |
+
current_directory = Path(uploaded).parent
|
298 |
+
|
299 |
+
# Define the target directory where you want to save the uploaded files
|
300 |
+
target_directory = current_directory / table_fmt_filename.removesuffix(".toml")
|
301 |
+
os.makedirs(target_directory, exist_ok=True)
|
302 |
+
|
303 |
+
# Move the uploaded file to the target directory
|
304 |
+
true_path = Path(target_directory / Path(uploaded).name)
|
305 |
+
# if true_path.exists():
|
306 |
+
# true_path.unlink()
|
307 |
+
|
308 |
+
shutil.copy2(uploaded, true_path)
|
309 |
+
print(f"Copy created", true_path)
|
310 |
+
return str(true_path)
|
311 |
+
|
312 |
+
upload_button = gr.Button(value="Upload", min_width=200)
|
313 |
|
314 |
with Modal(visible=False) as edit_table_fmt_modal:
|
315 |
with gr.Column():
|
|
|
356 |
batch_image_gallery,
|
357 |
batch_book_state,
|
358 |
batch_book_path_state,
|
359 |
+
table_fmt_filename,
|
360 |
table_fmt_config_override,
|
361 |
),
|
362 |
trigger_mode="always_last",
|
363 |
)
|
364 |
|
365 |
+
upload_file.upload(move_uploaded_file, inputs=[upload_file, table_fmt_filename], outputs=batch_book_path_state)
|
366 |
+
|
367 |
+
upload_button.click(
|
368 |
+
get_uploaded_image,
|
369 |
+
(first_page, last_page, table_fmt_filename, batch_book_path_state),
|
370 |
+
(
|
371 |
+
batch_image_gallery,
|
372 |
+
batch_book_state,
|
373 |
+
batch_book_path_state,
|
374 |
+
table_fmt_config_override,
|
375 |
+
),
|
376 |
)
|
377 |
+
|
378 |
+
# @batch_image_gallery.upload(
|
379 |
+
# inputs=batch_image_gallery,
|
380 |
+
# outputs=[batch_image_gallery],
|
381 |
+
# )
|
382 |
+
# def validate_images(images):
|
383 |
+
# print(images)
|
384 |
+
# if len(images) > MAX_IMAGES:
|
385 |
+
# gr.Warning(f"Maximum images you can upload is set to: {MAX_IMAGES}")
|
386 |
+
# return gr.update(value=None)
|
387 |
+
|
388 |
+
# gr.Warning(
|
389 |
+
# "Digitizing uploaded images is not implemented yet! Work in progress!"
|
390 |
+
# )
|
391 |
+
# raise NotImplementedError("WIP")
|
392 |
+
# return images
|
393 |
|
394 |
run_button.click(
|
395 |
fn=run_dawsonia,
|
|
|
399 |
last_page,
|
400 |
prob_thresh,
|
401 |
batch_book_state,
|
402 |
+
batch_book_path_state,
|
403 |
batch_image_gallery,
|
404 |
),
|
405 |
outputs=(collection_submit_state, batch_image_gallery),
|
uv.lock
CHANGED
The diff for this file is too large to render.
See raw diff
|
|