Spaces:

Win19840531
/

gpt-srt-translate

Running

App Files Files Community

Win19840531 commited on May 4

Commit

e8d1ade

verified ·

1 Parent(s): 94cc371

Create app.py

Browse files

Files changed (1) hide show

app.py +87 -0

app.py ADDED Viewed

	@@ -0,0 +1,87 @@

+import gradio as gr
+from transformers import pipeline
+import tempfile, os, zipfile, shutil
+translator_cache = {}
+MODEL_MAP = {
+    ("en", "zh"): "Helsinki-NLP/opus-mt-en-zh",
+    ("zh", "en"): "Helsinki-NLP/opus-mt-zh-en",
+    ("en", "ja"): "Helsinki-NLP/opus-mt-en-ja",
+    ("ja", "en"): "Helsinki-NLP/opus-mt-ja-en",
+}
+def get_translator(src_lang, tgt_lang):
+    key = (src_lang, tgt_lang)
+    if key not in translator_cache:
+        if key in MODEL_MAP:
+            translator_cache[key] = pipeline("translation", model=MODEL_MAP[key])
+        else:
+            raise ValueError(f"No direct model for {src_lang} to {tgt_lang}")
+    return translator_cache[key]
+def translate_text(text, src, tgt):
+    if (src, tgt) in MODEL_MAP:
+        translator = get_translator(src, tgt)
+        return translator(text, max_length=512)[0]["translation_text"]
+    elif (src == "zh" and tgt == "ja") or (src == "ja" and tgt == "zh"):
+        mid = translate_text(text, src, "en")
+        return translate_text(mid, "en", tgt)
+    else:
+        raise ValueError("Unsupported language pair")
+def parse_srt(srt_text):
+    blocks = srt_text.strip().split("\n\n")
+    subtitles = []
+    for block in blocks:
+        lines = block.splitlines()
+        if len(lines) >= 3:
+            idx = lines[0]
+            timestamp = lines[1]
+            text = " ".join(lines[2:])
+            subtitles.append((idx, timestamp, text))
+    return subtitles
+def reassemble_srt(subtitles):
+    return "\n\n".join(f"{idx}\n{ts}\n{txt}" for idx, ts, txt in subtitles)
+def process_file(file_obj, src_lang, tgt_lang, output_dir):
+    raw_text = file_obj.read().decode("utf-8")
+    subtitles = parse_srt(raw_text)
+    translated_subs = []
+    for idx, ts, txt in subtitles:
+        try:
+            translated = translate_text(txt, src_lang, tgt_lang)
+        except Exception as e:
+            translated = f"[Error: {e}]"
+        translated_subs.append((idx, ts, translated))
+    output_path = os.path.join(output_dir, os.path.basename(file_obj.name))
+    with open(output_path, "w", encoding="utf-8") as f:
+        f.write(reassemble_srt(translated_subs))
+def batch_translate(files, src_lang, tgt_lang):
+    tmp_dir = tempfile.mkdtemp()
+    for file_obj in files:
+        process_file(file_obj, src_lang, tgt_lang, tmp_dir)
+    zip_path = os.path.join(tmp_dir, "translated_srt.zip")
+    with zipfile.ZipFile(zip_path, 'w') as zipf:
+        for name in os.listdir(tmp_dir):
+            if name.endswith(".srt"):
+                zipf.write(os.path.join(tmp_dir, name), arcname=name)
+    return zip_path
+gr.Interface(
+    fn=batch_translate,
+    inputs=[
+        gr.File(file_types=[".srt"], label="Upload SRT files", file_count="multiple"),
+        gr.Dropdown(["en", "zh", "ja"], label="Source Language", value="en"),
+        gr.Dropdown(["en", "zh", "ja"], label="Target Language", value="zh"),
+    ],
+    outputs=gr.File(label="Download Translated ZIP"),
+    title="Batch SRT Translator (EN-ZH-JA)",
+    description="Upload .srt subtitle files and translate between English, Chinese, and Japanese. Timecodes are preserved. All translated files will be zipped.",
+).launch()