import gradio as gr from transformers import pipeline import tempfile, os, zipfile, traceback translator_cache = {} MODEL_MAP = { ("en", "zh"): "Helsinki-NLP/opus-mt-en-zh", ("zh", "en"): "Helsinki-NLP/opus-mt-zh-en", ("en", "ja"): "Helsinki-NLP/opus-mt-en-ja", ("ja", "en"): "Helsinki-NLP/opus-mt-ja-en", } def get_translator(src_lang, tgt_lang): key = (src_lang, tgt_lang) if key not in translator_cache: if key in MODEL_MAP: translator_cache[key] = pipeline("translation", model=MODEL_MAP[key]) else: raise ValueError(f"No model for {src_lang} to {tgt_lang}") return translator_cache[key] def safe_translate(text, src, tgt): try: if (src, tgt) in MODEL_MAP: translator = get_translator(src, tgt) return translator(text, max_length=512)[0]["translation_text"] elif (src, tgt) == ("ja", "zh") or (src, tgt) == ("zh", "ja"): mid = safe_translate(text, src, "en") return safe_translate(mid, "en", tgt) else: return f"[Unsupported: {src}->{tgt}]" except Exception as e: return f"[Translation error: {str(e)}]" def parse_srt(srt_text): blocks = srt_text.strip().split("\n\n") subtitles = [] for block in blocks: lines = block.splitlines() if len(lines) >= 3: idx = lines[0] timestamp = lines[1] text = " ".join(lines[2:]) subtitles.append((idx, timestamp, text)) return subtitles def reassemble_srt(subtitles): return "\n\n".join(f"{idx}\n{ts}\n{txt}" for idx, ts, txt in subtitles) def process_file(file_obj, src_lang, tgt_lang, output_dir, error_log): try: with open(file_obj.name, "r", encoding="utf-8", errors="ignore") as f: raw_text = f.read() subtitles = parse_srt(raw_text) translated_subs = [] for idx, ts, txt in subtitles: translated = safe_translate(txt, src_lang, tgt_lang) bilingual = f"{txt}\n{translated}" translated_subs.append((idx, ts, bilingual)) output_path = os.path.join(output_dir, os.path.basename(file_obj.name)) with open(output_path, "w", encoding="utf-8") as f: f.write(reassemble_srt(translated_subs)) except Exception as e: error_log.append(f"File {file_obj.name} failed: {str(e)}\n{traceback.format_exc()}") def batch_translate(files, src_lang, tgt_lang): tmp_dir = tempfile.mkdtemp() error_log = [] for file_obj in files: process_file(file_obj, src_lang, tgt_lang, tmp_dir, error_log) zip_path = os.path.join(tmp_dir, "translated_srt.zip") try: with zipfile.ZipFile(zip_path, 'w') as zipf: for name in os.listdir(tmp_dir): path = os.path.join(tmp_dir, name) if os.path.isfile(path) and name.endswith(".srt"): zipf.write(path, arcname=name) if error_log: log_path = os.path.join(tmp_dir, "log.txt") with open(log_path, "w") as logf: logf.write("\n".join(error_log)) zipf.write(log_path, arcname="log.txt") return zip_path except Exception as e: fail_zip = os.path.join(tmp_dir, "fail.zip") with zipfile.ZipFile(fail_zip, 'w') as zipf: with open(os.path.join(tmp_dir, "log.txt"), "w") as logf: logf.write(f"ZIP error: {str(e)}\n\n{traceback.format_exc()}") zipf.write(os.path.join(tmp_dir, "log.txt"), arcname="log.txt") return fail_zip gr.Interface( fn=batch_translate, inputs=[ gr.File(file_types=[".srt"], label="Upload SRT files", file_count="multiple"), gr.Dropdown(["en", "zh", "ja"], label="Source Language", value="ja"), gr.Dropdown(["en", "zh", "ja"], label="Target Language", value="zh"), ], outputs=gr.File(label="Download Translated ZIP"), title="Batch SRT Translator (EN-ZH-JA)", description="Upload .srt subtitle files and translate between English, Chinese, and Japanese. Dual-language output with original + translation. ZIP output. Errors will be logged.", ).launch()