Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
import gradio as gr
|
2 |
from transformers import pipeline
|
3 |
-
import tempfile, os, zipfile
|
4 |
|
5 |
translator_cache = {}
|
6 |
|
@@ -26,13 +26,12 @@ def safe_translate(text, src, tgt):
|
|
26 |
translator = get_translator(src, tgt)
|
27 |
return translator(text, max_length=512)[0]["translation_text"]
|
28 |
elif (src, tgt) == ("ja", "zh") or (src, tgt) == ("zh", "ja"):
|
29 |
-
# double translate via English
|
30 |
mid = safe_translate(text, src, "en")
|
31 |
return safe_translate(mid, "en", tgt)
|
32 |
else:
|
33 |
return f"[Unsupported: {src}->{tgt}]"
|
34 |
except Exception as e:
|
35 |
-
return f"[Translation error: {e}]"
|
36 |
|
37 |
def parse_srt(srt_text):
|
38 |
blocks = srt_text.strip().split("\n\n")
|
@@ -49,35 +48,51 @@ def parse_srt(srt_text):
|
|
49 |
def reassemble_srt(subtitles):
|
50 |
return "\n\n".join(f"{idx}\n{ts}\n{txt}" for idx, ts, txt in subtitles)
|
51 |
|
52 |
-
def process_file(file_obj, src_lang, tgt_lang, output_dir):
|
53 |
try:
|
54 |
-
raw_text = file_obj.read().decode("utf-8")
|
55 |
-
except:
|
56 |
raw_text = file_obj.read().decode("utf-8", errors="ignore")
|
|
|
|
|
57 |
|
58 |
-
|
59 |
-
|
|
|
|
|
60 |
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
output_path = os.path.join(output_dir, os.path.basename(file_obj.name))
|
67 |
-
with open(output_path, "w", encoding="utf-8") as f:
|
68 |
-
f.write(reassemble_srt(translated_subs))
|
69 |
|
70 |
def batch_translate(files, src_lang, tgt_lang):
|
71 |
tmp_dir = tempfile.mkdtemp()
|
|
|
|
|
72 |
for file_obj in files:
|
73 |
-
process_file(file_obj, src_lang, tgt_lang, tmp_dir)
|
74 |
|
75 |
zip_path = os.path.join(tmp_dir, "translated_srt.zip")
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
81 |
|
82 |
gr.Interface(
|
83 |
fn=batch_translate,
|
@@ -88,5 +103,5 @@ gr.Interface(
|
|
88 |
],
|
89 |
outputs=gr.File(label="Download Translated ZIP"),
|
90 |
title="Batch SRT Translator (EN-ZH-JA)",
|
91 |
-
description="Upload .srt subtitle files and translate between English, Chinese, and Japanese. Dual-language output with original + translation. ZIP output.",
|
92 |
).launch()
|
|
|
1 |
import gradio as gr
|
2 |
from transformers import pipeline
|
3 |
+
import tempfile, os, zipfile, traceback
|
4 |
|
5 |
translator_cache = {}
|
6 |
|
|
|
26 |
translator = get_translator(src, tgt)
|
27 |
return translator(text, max_length=512)[0]["translation_text"]
|
28 |
elif (src, tgt) == ("ja", "zh") or (src, tgt) == ("zh", "ja"):
|
|
|
29 |
mid = safe_translate(text, src, "en")
|
30 |
return safe_translate(mid, "en", tgt)
|
31 |
else:
|
32 |
return f"[Unsupported: {src}->{tgt}]"
|
33 |
except Exception as e:
|
34 |
+
return f"[Translation error: {str(e)}]"
|
35 |
|
36 |
def parse_srt(srt_text):
|
37 |
blocks = srt_text.strip().split("\n\n")
|
|
|
48 |
def reassemble_srt(subtitles):
|
49 |
return "\n\n".join(f"{idx}\n{ts}\n{txt}" for idx, ts, txt in subtitles)
|
50 |
|
51 |
+
def process_file(file_obj, src_lang, tgt_lang, output_dir, error_log):
|
52 |
try:
|
|
|
|
|
53 |
raw_text = file_obj.read().decode("utf-8", errors="ignore")
|
54 |
+
subtitles = parse_srt(raw_text)
|
55 |
+
translated_subs = []
|
56 |
|
57 |
+
for idx, ts, txt in subtitles:
|
58 |
+
translated = safe_translate(txt, src_lang, tgt_lang)
|
59 |
+
bilingual = f"{txt}\n{translated}"
|
60 |
+
translated_subs.append((idx, ts, bilingual))
|
61 |
|
62 |
+
output_path = os.path.join(output_dir, os.path.basename(file_obj.name))
|
63 |
+
with open(output_path, "w", encoding="utf-8") as f:
|
64 |
+
f.write(reassemble_srt(translated_subs))
|
65 |
+
except Exception as e:
|
66 |
+
error_log.append(f"File {file_obj.name} failed: {str(e)}\n{traceback.format_exc()}")
|
|
|
|
|
|
|
67 |
|
68 |
def batch_translate(files, src_lang, tgt_lang):
|
69 |
tmp_dir = tempfile.mkdtemp()
|
70 |
+
error_log = []
|
71 |
+
|
72 |
for file_obj in files:
|
73 |
+
process_file(file_obj, src_lang, tgt_lang, tmp_dir, error_log)
|
74 |
|
75 |
zip_path = os.path.join(tmp_dir, "translated_srt.zip")
|
76 |
+
try:
|
77 |
+
with zipfile.ZipFile(zip_path, 'w') as zipf:
|
78 |
+
for name in os.listdir(tmp_dir):
|
79 |
+
path = os.path.join(tmp_dir, name)
|
80 |
+
if os.path.isfile(path) and name.endswith(".srt"):
|
81 |
+
zipf.write(path, arcname=name)
|
82 |
+
if error_log:
|
83 |
+
log_path = os.path.join(tmp_dir, "log.txt")
|
84 |
+
with open(log_path, "w") as logf:
|
85 |
+
logf.write("\n".join(error_log))
|
86 |
+
zipf.write(log_path, arcname="log.txt")
|
87 |
+
return zip_path
|
88 |
+
except Exception as e:
|
89 |
+
# fallback error
|
90 |
+
fail_zip = os.path.join(tmp_dir, "fail.zip")
|
91 |
+
with zipfile.ZipFile(fail_zip, 'w') as zipf:
|
92 |
+
with open(os.path.join(tmp_dir, "log.txt"), "w") as logf:
|
93 |
+
logf.write(f"ZIP error: {str(e)}\n\n{traceback.format_exc()}")
|
94 |
+
zipf.write(os.path.join(tmp_dir, "log.txt"), arcname="log.txt")
|
95 |
+
return fail_zip
|
96 |
|
97 |
gr.Interface(
|
98 |
fn=batch_translate,
|
|
|
103 |
],
|
104 |
outputs=gr.File(label="Download Translated ZIP"),
|
105 |
title="Batch SRT Translator (EN-ZH-JA)",
|
106 |
+
description="Upload .srt subtitle files and translate between English, Chinese, and Japanese. Dual-language output with original + translation. ZIP output. Errors will be logged.",
|
107 |
).launch()
|