Win19840531 commited on
Commit
e8d1ade
·
verified ·
1 Parent(s): 94cc371

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +87 -0
app.py ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from transformers import pipeline
3
+ import tempfile, os, zipfile, shutil
4
+
5
+ translator_cache = {}
6
+
7
+ MODEL_MAP = {
8
+ ("en", "zh"): "Helsinki-NLP/opus-mt-en-zh",
9
+ ("zh", "en"): "Helsinki-NLP/opus-mt-zh-en",
10
+ ("en", "ja"): "Helsinki-NLP/opus-mt-en-ja",
11
+ ("ja", "en"): "Helsinki-NLP/opus-mt-ja-en",
12
+ }
13
+
14
+ def get_translator(src_lang, tgt_lang):
15
+ key = (src_lang, tgt_lang)
16
+ if key not in translator_cache:
17
+ if key in MODEL_MAP:
18
+ translator_cache[key] = pipeline("translation", model=MODEL_MAP[key])
19
+ else:
20
+ raise ValueError(f"No direct model for {src_lang} to {tgt_lang}")
21
+ return translator_cache[key]
22
+
23
+ def translate_text(text, src, tgt):
24
+ if (src, tgt) in MODEL_MAP:
25
+ translator = get_translator(src, tgt)
26
+ return translator(text, max_length=512)[0]["translation_text"]
27
+ elif (src == "zh" and tgt == "ja") or (src == "ja" and tgt == "zh"):
28
+ mid = translate_text(text, src, "en")
29
+ return translate_text(mid, "en", tgt)
30
+ else:
31
+ raise ValueError("Unsupported language pair")
32
+
33
+ def parse_srt(srt_text):
34
+ blocks = srt_text.strip().split("\n\n")
35
+ subtitles = []
36
+
37
+ for block in blocks:
38
+ lines = block.splitlines()
39
+ if len(lines) >= 3:
40
+ idx = lines[0]
41
+ timestamp = lines[1]
42
+ text = " ".join(lines[2:])
43
+ subtitles.append((idx, timestamp, text))
44
+ return subtitles
45
+
46
+ def reassemble_srt(subtitles):
47
+ return "\n\n".join(f"{idx}\n{ts}\n{txt}" for idx, ts, txt in subtitles)
48
+
49
+ def process_file(file_obj, src_lang, tgt_lang, output_dir):
50
+ raw_text = file_obj.read().decode("utf-8")
51
+ subtitles = parse_srt(raw_text)
52
+
53
+ translated_subs = []
54
+ for idx, ts, txt in subtitles:
55
+ try:
56
+ translated = translate_text(txt, src_lang, tgt_lang)
57
+ except Exception as e:
58
+ translated = f"[Error: {e}]"
59
+ translated_subs.append((idx, ts, translated))
60
+
61
+ output_path = os.path.join(output_dir, os.path.basename(file_obj.name))
62
+ with open(output_path, "w", encoding="utf-8") as f:
63
+ f.write(reassemble_srt(translated_subs))
64
+
65
+ def batch_translate(files, src_lang, tgt_lang):
66
+ tmp_dir = tempfile.mkdtemp()
67
+ for file_obj in files:
68
+ process_file(file_obj, src_lang, tgt_lang, tmp_dir)
69
+
70
+ zip_path = os.path.join(tmp_dir, "translated_srt.zip")
71
+ with zipfile.ZipFile(zip_path, 'w') as zipf:
72
+ for name in os.listdir(tmp_dir):
73
+ if name.endswith(".srt"):
74
+ zipf.write(os.path.join(tmp_dir, name), arcname=name)
75
+ return zip_path
76
+
77
+ gr.Interface(
78
+ fn=batch_translate,
79
+ inputs=[
80
+ gr.File(file_types=[".srt"], label="Upload SRT files", file_count="multiple"),
81
+ gr.Dropdown(["en", "zh", "ja"], label="Source Language", value="en"),
82
+ gr.Dropdown(["en", "zh", "ja"], label="Target Language", value="zh"),
83
+ ],
84
+ outputs=gr.File(label="Download Translated ZIP"),
85
+ title="Batch SRT Translator (EN-ZH-JA)",
86
+ description="Upload .srt subtitle files and translate between English, Chinese, and Japanese. Timecodes are preserved. All translated files will be zipped.",
87
+ ).launch()