Spaces:
Sleeping
Sleeping
Improved feedback to the user
Browse files- gradio_app.py +15 -7
- src/translate_any_doc.py +19 -7
gradio_app.py
CHANGED
@@ -13,17 +13,22 @@ translator = SalamandraTA7bTranslator(hf_token)
|
|
13 |
|
14 |
def upload_file(filepath, source_lang, target_lang):
|
15 |
aligner = Aligner(config_folder, source_lang, target_lang, temp_folder)
|
16 |
-
translated_file_name
|
17 |
-
|
18 |
-
gr.
|
19 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
20 |
|
21 |
|
22 |
def before_processing():
|
23 |
return [
|
24 |
gr.UploadButton(visible=False),
|
25 |
-
gr.
|
26 |
-
# Keep download hidden until processing finishes
|
27 |
]
|
28 |
|
29 |
|
@@ -42,8 +47,11 @@ with gr.Blocks() as demo:
|
|
42 |
with gr.Row():
|
43 |
u = gr.UploadButton("Upload a file", file_count="single")
|
44 |
d = gr.DownloadButton("Download the file", visible=False)
|
|
|
45 |
|
46 |
-
u.upload(fn=before_processing, inputs=None, outputs=[u,
|
|
|
|
|
47 |
d.click(download_file, None, [u, d])
|
48 |
if __name__ == "__main__":
|
49 |
demo.launch(server_name="0.0.0.0", server_port=7860)
|
|
|
13 |
|
14 |
def upload_file(filepath, source_lang, target_lang):
|
15 |
aligner = Aligner(config_folder, source_lang, target_lang, temp_folder)
|
16 |
+
for status, translated_file_name in translate_document(filepath, source_lang, target_lang, translator, aligner):
|
17 |
+
if translated_file_name: # finished
|
18 |
+
yield [gr.UploadButton(visible=False),
|
19 |
+
gr.DownloadButton(label=f"Download {translated_file_name}", value=translated_file_name,
|
20 |
+
visible=True, interactive=True),
|
21 |
+
gr.Textbox(visible=False)]
|
22 |
+
else:
|
23 |
+
yield [gr.UploadButton(visible=False),
|
24 |
+
gr.DownloadButton(visible=False),
|
25 |
+
gr.Textbox(value=status, visible=True)]
|
26 |
|
27 |
|
28 |
def before_processing():
|
29 |
return [
|
30 |
gr.UploadButton(visible=False),
|
31 |
+
gr.Textbox(value="Processing...", visible=True),
|
|
|
32 |
]
|
33 |
|
34 |
|
|
|
47 |
with gr.Row():
|
48 |
u = gr.UploadButton("Upload a file", file_count="single")
|
49 |
d = gr.DownloadButton("Download the file", visible=False)
|
50 |
+
status_text = gr.Textbox(label="Status", visible=False)
|
51 |
|
52 |
+
u.upload(fn=before_processing, inputs=None, outputs=[u, status_text]).then(upload_file,
|
53 |
+
[u, dropdown1, dropdown2],
|
54 |
+
[u, d, status_text])
|
55 |
d.click(download_file, None, [u, d])
|
56 |
if __name__ == "__main__":
|
57 |
demo.launch(server_name="0.0.0.0", server_port=7860)
|
src/translate_any_doc.py
CHANGED
@@ -1,5 +1,6 @@
|
|
1 |
import shutil
|
2 |
import string
|
|
|
3 |
import time
|
4 |
import os
|
5 |
from itertools import groupby
|
@@ -12,6 +13,8 @@ import glob
|
|
12 |
import spacy
|
13 |
from spacy.tokens import Doc
|
14 |
|
|
|
|
|
15 |
import tqdm
|
16 |
|
17 |
# Load multilingual model to use as sentence tokenizer
|
@@ -366,7 +369,8 @@ def translate_document(input_file: str, source_lang: str, target_lang: str,
|
|
366 |
translator,
|
367 |
aligner: Aligner,
|
368 |
temp_folder: str = "tmp",
|
369 |
-
tikal_folder: str = "okapi-apps_gtk2-linux-x86_64_1.47.0", with_format: bool = True) -> str
|
|
|
370 |
input_filename = input_file.split("/")[-1]
|
371 |
os.makedirs(temp_folder, exist_ok=True)
|
372 |
|
@@ -390,20 +394,28 @@ def translate_document(input_file: str, source_lang: str, target_lang: str,
|
|
390 |
original_spacing += spaces
|
391 |
|
392 |
translated_sentences = []
|
393 |
-
|
394 |
-
|
395 |
-
|
|
|
|
|
396 |
text = Doc(spacy_nlp.vocab, words=[token["text"] for token in sentence], spaces=spacing).text
|
397 |
|
398 |
while True:
|
399 |
try:
|
400 |
translated_sentences.append(translator.translate(text, source_lang, target_lang))
|
401 |
break
|
402 |
-
except:
|
403 |
-
|
|
|
|
|
|
|
|
|
|
|
404 |
|
405 |
# time to align the translation with the original
|
406 |
print("Generating alignments...")
|
|
|
407 |
start_time = time.time()
|
408 |
translated_sentences_with_style, translated_sentences_spacing = generate_alignments(
|
409 |
original_tokenized_sentences_with_style,
|
@@ -468,4 +480,4 @@ def translate_document(input_file: str, source_lang: str, target_lang: str,
|
|
468 |
translated_file_path = re.search(r'(?<=Output:\s)(.*)', output)[0]
|
469 |
|
470 |
print(f"Saved file in {translated_file_path}")
|
471 |
-
|
|
|
1 |
import shutil
|
2 |
import string
|
3 |
+
import sys
|
4 |
import time
|
5 |
import os
|
6 |
from itertools import groupby
|
|
|
13 |
import spacy
|
14 |
from spacy.tokens import Doc
|
15 |
|
16 |
+
from gradio_client.exceptions import AppError
|
17 |
+
|
18 |
import tqdm
|
19 |
|
20 |
# Load multilingual model to use as sentence tokenizer
|
|
|
369 |
translator,
|
370 |
aligner: Aligner,
|
371 |
temp_folder: str = "tmp",
|
372 |
+
tikal_folder: str = "okapi-apps_gtk2-linux-x86_64_1.47.0", with_format: bool = True) -> (str,
|
373 |
+
str):
|
374 |
input_filename = input_file.split("/")[-1]
|
375 |
os.makedirs(temp_folder, exist_ok=True)
|
376 |
|
|
|
394 |
original_spacing += spaces
|
395 |
|
396 |
translated_sentences = []
|
397 |
+
yield "Translating 0%...", None
|
398 |
+
total = len(original_tokenized_sentences_with_style)
|
399 |
+
pbar = tqdm.tqdm(desc="Translating paragraphs...", total=total)
|
400 |
+
|
401 |
+
for i, (sentence, spacing) in enumerate(zip(original_tokenized_sentences_with_style, original_spacing)):
|
402 |
text = Doc(spacy_nlp.vocab, words=[token["text"] for token in sentence], spaces=spacing).text
|
403 |
|
404 |
while True:
|
405 |
try:
|
406 |
translated_sentences.append(translator.translate(text, source_lang, target_lang))
|
407 |
break
|
408 |
+
except AppError as e:
|
409 |
+
print(e)
|
410 |
+
sys.exit()
|
411 |
+
|
412 |
+
pbar.update(1)
|
413 |
+
percent_complete = int(((i + 1) / total) * 100)
|
414 |
+
yield f"Translating {percent_complete}%...", None
|
415 |
|
416 |
# time to align the translation with the original
|
417 |
print("Generating alignments...")
|
418 |
+
yield "Aligning...", None
|
419 |
start_time = time.time()
|
420 |
translated_sentences_with_style, translated_sentences_spacing = generate_alignments(
|
421 |
original_tokenized_sentences_with_style,
|
|
|
480 |
translated_file_path = re.search(r'(?<=Output:\s)(.*)', output)[0]
|
481 |
|
482 |
print(f"Saved file in {translated_file_path}")
|
483 |
+
yield "", translated_file_path
|