mjuvilla commited on
Commit
acd3a4d
·
1 Parent(s): 02be53b

Improved feedback to the user

Browse files
Files changed (2) hide show
  1. gradio_app.py +15 -7
  2. src/translate_any_doc.py +19 -7
gradio_app.py CHANGED
@@ -13,17 +13,22 @@ translator = SalamandraTA7bTranslator(hf_token)
13
 
14
  def upload_file(filepath, source_lang, target_lang):
15
  aligner = Aligner(config_folder, source_lang, target_lang, temp_folder)
16
- translated_file_name = translate_document(filepath, source_lang, target_lang, translator, aligner)
17
- return [gr.UploadButton(visible=False),
18
- gr.DownloadButton(label=f"Download {translated_file_name}", value=translated_file_name, visible=True,
19
- interactive=True)]
 
 
 
 
 
 
20
 
21
 
22
  def before_processing():
23
  return [
24
  gr.UploadButton(visible=False),
25
- gr.DownloadButton("Processing...", visible=True, interactive=False)
26
- # Keep download hidden until processing finishes
27
  ]
28
 
29
 
@@ -42,8 +47,11 @@ with gr.Blocks() as demo:
42
  with gr.Row():
43
  u = gr.UploadButton("Upload a file", file_count="single")
44
  d = gr.DownloadButton("Download the file", visible=False)
 
45
 
46
- u.upload(fn=before_processing, inputs=None, outputs=[u, d]).then(upload_file, [u, dropdown1, dropdown2], [u, d])
 
 
47
  d.click(download_file, None, [u, d])
48
  if __name__ == "__main__":
49
  demo.launch(server_name="0.0.0.0", server_port=7860)
 
13
 
14
  def upload_file(filepath, source_lang, target_lang):
15
  aligner = Aligner(config_folder, source_lang, target_lang, temp_folder)
16
+ for status, translated_file_name in translate_document(filepath, source_lang, target_lang, translator, aligner):
17
+ if translated_file_name: # finished
18
+ yield [gr.UploadButton(visible=False),
19
+ gr.DownloadButton(label=f"Download {translated_file_name}", value=translated_file_name,
20
+ visible=True, interactive=True),
21
+ gr.Textbox(visible=False)]
22
+ else:
23
+ yield [gr.UploadButton(visible=False),
24
+ gr.DownloadButton(visible=False),
25
+ gr.Textbox(value=status, visible=True)]
26
 
27
 
28
  def before_processing():
29
  return [
30
  gr.UploadButton(visible=False),
31
+ gr.Textbox(value="Processing...", visible=True),
 
32
  ]
33
 
34
 
 
47
  with gr.Row():
48
  u = gr.UploadButton("Upload a file", file_count="single")
49
  d = gr.DownloadButton("Download the file", visible=False)
50
+ status_text = gr.Textbox(label="Status", visible=False)
51
 
52
+ u.upload(fn=before_processing, inputs=None, outputs=[u, status_text]).then(upload_file,
53
+ [u, dropdown1, dropdown2],
54
+ [u, d, status_text])
55
  d.click(download_file, None, [u, d])
56
  if __name__ == "__main__":
57
  demo.launch(server_name="0.0.0.0", server_port=7860)
src/translate_any_doc.py CHANGED
@@ -1,5 +1,6 @@
1
  import shutil
2
  import string
 
3
  import time
4
  import os
5
  from itertools import groupby
@@ -12,6 +13,8 @@ import glob
12
  import spacy
13
  from spacy.tokens import Doc
14
 
 
 
15
  import tqdm
16
 
17
  # Load multilingual model to use as sentence tokenizer
@@ -366,7 +369,8 @@ def translate_document(input_file: str, source_lang: str, target_lang: str,
366
  translator,
367
  aligner: Aligner,
368
  temp_folder: str = "tmp",
369
- tikal_folder: str = "okapi-apps_gtk2-linux-x86_64_1.47.0", with_format: bool = True) -> str:
 
370
  input_filename = input_file.split("/")[-1]
371
  os.makedirs(temp_folder, exist_ok=True)
372
 
@@ -390,20 +394,28 @@ def translate_document(input_file: str, source_lang: str, target_lang: str,
390
  original_spacing += spaces
391
 
392
  translated_sentences = []
393
- for sentence, spacing in tqdm.tqdm(zip(original_tokenized_sentences_with_style, original_spacing),
394
- desc="Translating paragraphs...",
395
- total=len(original_tokenized_sentences_with_style)):
 
 
396
  text = Doc(spacy_nlp.vocab, words=[token["text"] for token in sentence], spaces=spacing).text
397
 
398
  while True:
399
  try:
400
  translated_sentences.append(translator.translate(text, source_lang, target_lang))
401
  break
402
- except:
403
- continue
 
 
 
 
 
404
 
405
  # time to align the translation with the original
406
  print("Generating alignments...")
 
407
  start_time = time.time()
408
  translated_sentences_with_style, translated_sentences_spacing = generate_alignments(
409
  original_tokenized_sentences_with_style,
@@ -468,4 +480,4 @@ def translate_document(input_file: str, source_lang: str, target_lang: str,
468
  translated_file_path = re.search(r'(?<=Output:\s)(.*)', output)[0]
469
 
470
  print(f"Saved file in {translated_file_path}")
471
- return translated_file_path
 
1
  import shutil
2
  import string
3
+ import sys
4
  import time
5
  import os
6
  from itertools import groupby
 
13
  import spacy
14
  from spacy.tokens import Doc
15
 
16
+ from gradio_client.exceptions import AppError
17
+
18
  import tqdm
19
 
20
  # Load multilingual model to use as sentence tokenizer
 
369
  translator,
370
  aligner: Aligner,
371
  temp_folder: str = "tmp",
372
+ tikal_folder: str = "okapi-apps_gtk2-linux-x86_64_1.47.0", with_format: bool = True) -> (str,
373
+ str):
374
  input_filename = input_file.split("/")[-1]
375
  os.makedirs(temp_folder, exist_ok=True)
376
 
 
394
  original_spacing += spaces
395
 
396
  translated_sentences = []
397
+ yield "Translating 0%...", None
398
+ total = len(original_tokenized_sentences_with_style)
399
+ pbar = tqdm.tqdm(desc="Translating paragraphs...", total=total)
400
+
401
+ for i, (sentence, spacing) in enumerate(zip(original_tokenized_sentences_with_style, original_spacing)):
402
  text = Doc(spacy_nlp.vocab, words=[token["text"] for token in sentence], spaces=spacing).text
403
 
404
  while True:
405
  try:
406
  translated_sentences.append(translator.translate(text, source_lang, target_lang))
407
  break
408
+ except AppError as e:
409
+ print(e)
410
+ sys.exit()
411
+
412
+ pbar.update(1)
413
+ percent_complete = int(((i + 1) / total) * 100)
414
+ yield f"Translating {percent_complete}%...", None
415
 
416
  # time to align the translation with the original
417
  print("Generating alignments...")
418
+ yield "Aligning...", None
419
  start_time = time.time()
420
  translated_sentences_with_style, translated_sentences_spacing = generate_alignments(
421
  original_tokenized_sentences_with_style,
 
480
  translated_file_path = re.search(r'(?<=Output:\s)(.*)', output)[0]
481
 
482
  print(f"Saved file in {translated_file_path}")
483
+ yield "", translated_file_path