Spaces:

Athspi-ai
/

AutoSubGen

Running

App Files Files Community

Athspi commited on Jun 20

Commit

a8524a9

verified ·

1 Parent(s): a1976c9

Update app.py

Browse files

Files changed (1) hide show

app.py +589 -281

app.py CHANGED Viewed

@@ -1,317 +1,625 @@
 import os
 import google.generativeai as genai
-from moviepy.editor import VideoFileClip, AudioFileClip, CompositeVideoClip, TextClip
 from moviepy.video.tools.subtitles import SubtitlesClip
 import tempfile
 import logging
-import srt
-import datetime
 from gtts import gTTS
-from flask import Flask, request, render_template, send_from_directory, url_for, flash, session
-from werkzeug.utils import secure_filename
-import uuid
-# --- Configuration ---
 # Suppress moviepy logs
 logging.getLogger("moviepy").setLevel(logging.ERROR)
-# Configure Gemini API
-# IMPORTANT: Set your GEMINI_API_KEY as an environment variable
-# For example, in your terminal: export GEMINI_API_KEY="YOUR_API_KEY"
-try:
-    genai.configure(api_key=os.environ["GEMINI_API_KEY"])
-except KeyError:
-    raise Exception("GEMINI_API_KEY environment variable not set. Please set it before running the app.")
-# --- Flask App Initialization ---
-app = Flask(__name__)
-app.config['SECRET_KEY'] = os.urandom(24)
-app.config['UPLOAD_FOLDER'] = os.path.join(os.getcwd(), 'temp_uploads')
-# Ensure the upload folder exists
-os.makedirs(app.config['UPLOAD_FOLDER'], exist_ok=True)
-# --- Model and Language Configuration ---
 # Create the Gemini model
 generation_config = {
-    "temperature": 0.7,
-    "top_p": 0.9,
-    "top_k": 40,
-    "max_output_tokens": 8192,
-    "response_mime_type": "text/plain",
 }
 model = genai.GenerativeModel(
-    model_name="gemini-1.5-pro-latest", # Using a stable and capable model
-    generation_config=generation_config,
 )
 # List of all supported languages
 SUPPORTED_LANGUAGES = [
-    "Auto Detect", "English", "Chinese", "German", "Spanish", "Russian", "Korean",
-    "French", "Japanese", "Portuguese", "Turkish", "Polish", "Catalan", "Dutch",
-    "Arabic", "Swedish", "Italian", "Indonesian", "Hindi", "Finnish", "Vietnamese",
-    "Hebrew", "Ukrainian", "Greek", "Malay", "Czech", "Romanian", "Danish",
-    "Hungarian", "Tamil", "Norwegian", "Thai", "Urdu", "Croatian", "Bulgarian",
-    "Lithuanian", "Latin", "Maori", "Malayalam", "Welsh", "Slovak", "Telugu",
-    "Persian", "Latvian", "Bengali", "Serbian", "Azerbaijani", "Slovenian",
-    "Kannada", "Estonian", "Macedonian", "Breton", "Basque", "Icelandic",
-    "Armenian", "Nepali", "Mongolian", "Bosnian", "Kazakh", "Albanian",
-    "Swahili", "Galician", "Marathi", "Punjabi", "Sinhala", "Khmer", "Shona",
-    "Yoruba", "Somali", "Afrikaans", "Occitan", "Georgian", "Belarusian",
-    "Tajik", "Sindhi", "Gujarati", "Amharic", "Yiddish", "Lao", "Uzbek",
-    "Faroese", "Haitian Creole", "Pashto", "Turkmen", "Nynorsk", "Maltese",
-    "Sanskrit", "Luxembourgish", "Burmese", "Tibetan", "Tagalog", "Malagasy",
-    "Assamese", "Tatar", "Hawaiian", "Lingala", "Hausa", "Bashkir", "Javanese",
-    "Sundanese"
 ]
 # Language code mapping for gTTS
 LANGUAGE_CODES = {
-    "English": "en", "Chinese": "zh", "German": "de", "Spanish": "es",
-    "Russian": "ru", "Korean": "ko", "French": "fr", "Japanese": "ja",
-    "Portuguese": "pt", "Turkish": "tr", "Polish": "pl", "Catalan": "ca",
-    "Dutch": "nl", "Arabic": "ar", "Swedish": "sv", "Italian": "it",
-    "Indonesian": "id", "Hindi": "hi", "Finnish": "fi", "Vietnamese": "vi",
-    "Hebrew": "he", "Ukrainian": "uk", "Greek": "el", "Malay": "ms",
-    "Czech": "cs", "Romanian": "ro", "Danish": "da", "Hungarian": "hu",
-    "Tamil": "ta", "Norwegian": "no", "Thai": "th", "Urdu": "ur",
-    "Croatian": "hr", "Bulgarian": "bg", "Lithuanian": "lt", "Latin": "la",
-    "Maori": "mi", "Malayalam": "ml", "Welsh": "cy", "Slovak": "sk",
-    "Telugu": "te", "Persian": "fa", "Latvian": "lv", "Bengali": "bn",
-    "Serbian": "sr", "Azerbaijani": "az", "Slovenian": "sl", "Kannada": "kn",
-    "Estonian": "et", "Macedonian": "mk", "Breton": "br", "Basque": "eu",
-    "Icelandic": "is", "Armenian": "hy", "Nepali": "ne", "Mongolian": "mn",
-    "Bosnian": "bs", "Kazakh": "kk", "Albanian": "sq", "Swahili": "sw",
-    "Galician": "gl", "Marathi": "mr", "Punjabi": "pa", "Sinhala": "si",
-    "Khmer": "km", "Shona": "sn", "Yoruba": "yo", "Somali": "so",
-    "Afrikaans": "af", "Occitan": "oc", "Georgian": "ka", "Belarusian": "be",
-    "Tajik": "tg", "Sindhi": "sd", "Gujarati": "gu", "Amharic": "am",
-    "Yiddish": "yi", "Lao": "lo", "Uzbek": "uz", "Faroese": "fo",
-    "Haitian Creole": "ht", "Pashto": "ps", "Turkmen": "tk", "Nynorsk": "nn",
-    "Maltese": "mt", "Sanskrit": "sa", "Luxembourgish": "lb", "Burmese": "my",
-    "Tibetan": "bo", "Tagalog": "tl", "Malagasy": "mg", "Assamese": "as",
-    "Tatar": "tt", "Hawaiian": "haw", "Lingala": "ln", "Hausa": "ha",
-    "Bashkir": "ba", "Javanese": "jv", "Sundanese": "su"
 }
-# --- Core Processing Functions ---
-def time_to_seconds(time_obj):
-    """Convert datetime.time object to seconds."""
-    return time_obj.hour * 3600 + time_obj.minute * 60 + time_obj.second + time_obj.microsecond / 1e6
-def extract_audio_from_video(video_path, audio_path):
-    """Extract audio from a video file and save it as a WAV file."""
-    try:
-        video = VideoFileClip(video_path)
-        video.audio.write_audiofile(audio_path, fps=16000, logger=None)
-        return audio_path
-    except Exception as e:
-        logging.error(f"Error extracting audio: {e}")
-        return None
-def transcribe_audio_with_gemini(audio_path, source_language):
-    """Transcribe audio using Gemini with a prompt for accurate timestamps."""
-    try:
-        audio_file = genai.upload_file(path=audio_path)
-        language_prompt = f"in {source_language}" if source_language != "Auto Detect" else ""
-        prompt = f"""
-        You are a professional transcriber. Transcribe this audio accurately and verbatim {language_prompt}.
-        Include timestamps for each sentence in the SRT (SubRip) format.
-        Example:
-        1
-        00:00:01,234 --> 00:00:05,678
-        This is the first sentence.
-        2
-        00:00:06,123 --> 00:00:09,456
-        This is the second sentence.
-        Ensure the timestamps are precise and correspond to the start and end of each spoken sentence.
-        Respond ONLY with the transcription in the SRT format. Do not add explanations or any extra text.
-        """
-        response = model.generate_content([prompt, audio_file])
-        genai.delete_file(audio_file.name) # Clean up the uploaded file
-        return response.text.strip()
-    except Exception as e:
-        logging.error(f"Error during Gemini transcription: {e}")
-        return None
 def translate_srt(srt_text, target_language):
-    """Translate an SRT file using Gemini while preserving timestamps."""
-    try:
-        prompt = f"""
-        Translate the following SRT subtitles into {target_language}.
-        Preserve the SRT format perfectly (index numbers, timestamps, and structure).
-        Translate only the subtitle text on the lines after the timestamps.
-        Do not add any explanations or extra text. Your output must be a valid SRT file.
-        Here is the SRT file content:
-        {srt_text}
-        """
-        response = model.generate_content(prompt)
-        return response.text.strip()
-    except Exception as e:
-        logging.error(f"Error during translation: {e}")
-        return None
-def generate_tts_audio(srt_text, language, tts_audio_path):
-    """Generate TTS audio from SRT text."""
-    try:
-        subtitles = list(srt.parse(srt_text))
-        all_text = " ".join([sub.content for sub in subtitles])
-        lang_code = LANGUAGE_CODES.get(language, "en")
-        tts = gTTS(text=all_text, lang=lang_code, slow=False)
-        tts.save(tts_audio_path)
-        return tts_audio_path
-    except Exception as e:
-        logging.error(f"Error generating TTS audio: {e}")
-        return None
-def add_subtitles_to_video(video_path, srt_text, output_video_path):
-    """Add subtitles to video and return the path to the new video."""
-    try:
-        def generator(txt):
-             return TextClip(txt, font='Arial-Bold', fontsize=24, color='white',
-                            stroke_color='black', stroke_width=1)
-        # MoviePy's SubtitlesClip requires a file path
-        with tempfile.NamedTemporaryFile(mode='w', suffix='.srt', delete=False, encoding='utf-8') as temp_srt:
-            temp_srt.write(srt_text)
-            srt_path = temp_srt.name
-        video = VideoFileClip(video_path)
-        subtitles = SubtitlesClip(srt_path, generator)
-        result = CompositeVideoClip([video, subtitles.set_position(('center', 'bottom'))])
-        # Write output with original audio
-        result.write_videofile(output_video_path, codec='libx264', audio_codec='aac', threads=4, logger=None)
-        os.remove(srt_path) # Clean up temp srt file
-        return output_video_path
-    except Exception as e:
-        logging.error(f"Error adding subtitles to video: {e}")
-        return None
-# --- Flask Routes ---
-@app.route('/')
-def index():
-    """Render the main page."""
-    session.clear() # Clear any old data
-    return render_template('index.html', supported_languages=SUPPORTED_LANGUAGES)
-@app.route('/process', methods=['POST'])
-def process():
-    """Handle the video processing request."""
-    if 'video' not in request.files:
-        flash('No video file selected. Please upload a video.', 'error')
-        return render_template('index.html', supported_languages=SUPPORTED_LANGUAGES)
-    video_file = request.files['video']
-    if video_file.filename == '':
-        flash('No video file selected. Please upload a video.', 'error')
-        return render_template('index.html', supported_languages=SUPPORTED_LANGUAGES)
-    # --- Get form options ---
-    source_language = request.form.get('source_language', 'Auto Detect')
-    translate_to = request.form.get('translate_to', 'None')
-    add_tts = 'add_tts' in request.form
-    add_subtitles = 'add_subtitles' in request.form
-    # --- Setup a unique session directory for this request ---
-    session_id = str(uuid.uuid4())
-    session['session_id'] = session_id
-    session_dir = os.path.join(app.config['UPLOAD_FOLDER'], session_id)
-    os.makedirs(session_dir, exist_ok=True)
-    filename = secure_filename(video_file.filename)
-    video_path = os.path.join(session_dir, filename)
-    video_file.save(video_path)
-    results = {}
-    # 1. Extract Audio
-    audio_path = os.path.join(session_dir, "extracted_audio.wav")
-    if not extract_audio_from_video(video_path, audio_path):
-        flash('Failed to extract audio from the video.', 'error')
-        return render_template('index.html', supported_languages=SUPPORTED_LANGUAGES)
-    # 2. Transcribe Audio
-    original_srt_text = transcribe_audio_with_gemini(audio_path, source_language)
-    if not original_srt_text:
-        flash('Failed to transcribe the audio. The API call might have failed.', 'error')
-        return render_template('index.html', supported_languages=SUPPORTED_LANGUAGES)
-    original_srt_path = os.path.join(session_dir, "original_subtitles.srt")
-    with open(original_srt_path, "w", encoding="utf-8") as f:
-        f.write(original_srt_text)
-    results['original_srt_file'] = "original_subtitles.srt"
-    # Keep track of the final SRT to use for TTS and video burn-in
-    final_srt_text = original_srt_text
-    # 3. Translate Subtitles (if requested)
-    if translate_to and translate_to != "None":
-        translated_srt_text = translate_srt(original_srt_text, translate_to)
-        if translated_srt_text:
-            translated_srt_path = os.path.join(session_dir, "translated_subtitles.srt")
-            with open(translated_srt_path, "w", encoding="utf-8") as f:
-                f.write(translated_srt_text)
-            results['translated_srt_file'] = "translated_subtitles.srt"
-            final_srt_text = translated_srt_text # Use translated text for next steps
-        else:
-            flash(f'Failed to translate subtitles to {translate_to}.', 'warning')
-    # 4. Generate TTS Audio (if requested)
-    if add_tts:
-        tts_lang = translate_to if translate_to and translate_to != "None" else source_language
-        # If source was auto-detect, we can't reliably guess the TTS language. Default to English.
-        if tts_lang == 'Auto Detect':
-             flash('TTS language cannot be "Auto Detect". Defaulting to English. For better results, please specify the source language.', 'warning')
-             tts_lang = 'English'
-        tts_audio_path = os.path.join(session_dir, "tts_audio.mp3")
-        if generate_tts_audio(final_srt_text, tts_lang, tts_audio_path):
-             results['tts_audio_file'] = "tts_audio.mp3"
-        else:
-             flash('Failed to generate Text-to-Speech audio.', 'warning')
-    # 5. Add Subtitles to Video (if requested)
-    if add_subtitles:
-        output_video_path = os.path.join(session_dir, "output_video.mp4")
-        if add_subtitles_to_video(video_path, final_srt_text, output_video_path):
-            results['output_video_file'] = "output_video.mp4"
-        else:
-            flash('Failed to add subtitles to the video.', 'warning')
-    # Clean up original extracted audio
-    os.remove(audio_path)
-    return render_template('index.html',
-                           supported_languages=SUPPORTED_LANGUAGES,
-                           results=results,
-                           session_id=session_id)
-@app.route('/download/<session_id>/<path:filename>')
-def download_file(session_id, filename):
-    """Serve files from the session directory for download."""
-    directory = os.path.join(app.config['UPLOAD_FOLDER'], session_id)
-    return send_from_directory(directory, filename, as_attachment=True)
-# --- Run the App ---
-if __name__ == '__main__':
-    app.run(host="0.0.0.0", port=7860)

 import os
 import google.generativeai as genai
+from moviepy.video.io.VideoFileClip import VideoFileClip
+from moviepy.audio.io.AudioFileClip import AudioFileClip
+from moviepy.video.compositing.CompositeVideoClip import CompositeVideoClip
 from moviepy.video.tools.subtitles import SubtitlesClip
 import tempfile
 import logging
+import gradio as gr
 from gtts import gTTS
+import srt
 # Suppress moviepy logs
 logging.getLogger("moviepy").setLevel(logging.ERROR)
+# Configure Gemini API
+genai.configure(api_key=os.environ["GEMINI_API_KEY"])
 # Create the Gemini model
 generation_config = {
+    "temperature": 0.7,
+    "top_p": 0.9,
+    "top_k": 40,
+    "max_output_tokens": 8192,
+    "response_mime_type": "text/plain",
 }
 model = genai.GenerativeModel(
+    model_name="gemini-2.0-pro-exp-02-05",
+    generation_config=generation_config,
 )
 # List of all supported languages
 SUPPORTED_LANGUAGES = [
+    "Auto Detect", "English", "Chinese", "German", "Spanish", "Russian", "Korean",
+    "French", "Japanese", "Portuguese", "Turkish", "Polish", "Catalan", "Dutch",
+    "Arabic", "Swedish", "Italian", "Indonesian", "Hindi", "Finnish", "Vietnamese",
+    "Hebrew", "Ukrainian", "Greek", "Malay", "Czech", "Romanian", "Danish",
+    "Hungarian", "Tamil", "Norwegian", "Thai", "Urdu", "Croatian", "Bulgarian",
+    "Lithuanian", "Latin", "Maori", "Malayalam", "Welsh", "Slovak", "Telugu",
+    "Persian", "Latvian", "Bengali", "Serbian", "Azerbaijani", "Slovenian",
+    "Kannada", "Estonian", "Macedonian", "Breton", "Basque", "Icelandic",
+    "Armenian", "Nepali", "Mongolian", "Bosnian", "Kazakh", "Albanian",
+    "Swahili", "Galician", "Marathi", "Punjabi", "Sinhala", "Khmer", "Shona",
+    "Yoruba", "Somali", "Afrikaans", "Occitan", "Georgian", "Belarusian",
+    "Tajik", "Sindhi", "Gujarati", "Amharic", "Yiddish", "Lao", "Uzbek",
+    "Faroese", "Haitian Creole", "Pashto", "Turkmen", "Nynorsk", "Maltese",
+    "Sanskrit", "Luxembourgish", "Burmese", "Tibetan", "Tagalog", "Malagasy",
+    "Assamese", "Tatar", "Hawaiian", "Lingala", "Hausa", "Bashkir", "Javanese",
+    "Sundanese"
 ]
 # Language code mapping for gTTS
 LANGUAGE_CODES = {
+    "English": "en", "Chinese": "zh", "German": "de", "Spanish": "es",
+    "Russian": "ru", "Korean": "ko", "French": "fr", "Japanese": "ja",
+    "Portuguese": "pt", "Turkish": "tr", "Polish": "pl", "Catalan": "ca",
+    "Dutch": "nl", "Arabic": "ar", "Swedish": "sv", "Italian": "it",
+    "Indonesian": "id", "Hindi": "hi", "Finnish": "fi", "Vietnamese": "vi",
+    "Hebrew": "he", "Ukrainian": "uk", "Greek": "el", "Malay": "ms",
+    "Czech": "cs", "Romanian": "ro", "Danish": "da", "Hungarian": "hu",
+    "Tamil": "ta", "Norwegian": "no", "Thai": "th", "Urdu": "ur",
+    "Croatian": "hr", "Bulgarian": "bg", "Lithuanian": "lt", "Latin": "la",
+    "Maori": "mi", "Malayalam": "ml", "Welsh": "cy", "Slovak": "sk",
+    "Telugu": "te", "Persian": "fa", "Latvian": "lv", "Bengali": "bn",
+    "Serbian": "sr", "Azerbaijani": "az", "Slovenian": "sl", "Kannada": "kn",
+    "Estonian": "et", "Macedonian": "mk", "Breton": "br", "Basque": "eu",
+    "Icelandic": "is", "Armenian": "hy", "Nepali": "ne", "Mongolian": "mn",
+    "Bosnian": "bs", "Kazakh": "kk", "Albanian": "sq", "Swahili": "sw",
+    "Galician": "gl", "Marathi": "mr", "Punjabi": "pa", "Sinhala": "si",
+    "Khmer": "km", "Shona": "sn", "Yoruba": "yo", "Somali": "so",
+    "Afrikaans": "af", "Occitan": "oc", "Georgian": "ka", "Belarusian": "be",
+    "Tajik": "tg", "Sindhi": "sd", "Gujarati": "gu", "Amharic": "am",
+    "Yiddish": "yi", "Lao": "lo", "Uzbek": "uz", "Faroese": "fo",
+    "Haitian Creole": "ht", "Pashto": "ps", "Turkmen": "tk", "Nynorsk": "nn",
+    "Maltese": "mt", "Sanskrit": "sa", "Luxembourgish": "lb", "Burmese": "my",
+    "Tibetan": "bo", "Tagalog": "tl", "Malagasy": "mg", "Assamese": "as",
+    "Tatar": "tt", "Hawaiian": "haw", "Lingala": "ln", "Hausa": "ha",
+    "Bashkir": "ba", "Javanese": "jv", "Sundanese": "su"
 }
+def extract_audio_from_video(video_file):
+    """Extract audio from a video file and save it as a WAV file."""
+    video = VideoFileClip(video_file)
+    audio_file = os.path.join(tempfile.gettempdir(), "extracted_audio.wav")
+    video.audio.write_audiofile(audio_file, fps=16000, logger=None)
+    return audio_file
+def transcribe_audio_with_gemini(audio_file):
+    """Transcribe audio using Gemini with a magic prompt for accurate timestamps."""
+    with open(audio_file, "rb") as f:
+        audio_data = f.read()
+    audio_blob = {
+        'mime_type': 'audio/wav',
+        'data': audio_data
+    }
+    prompt = """
+    You are a professional transcriber. Transcribe this audio accurately and verbatim in the original language.
+    Include timestamps for each sentence in the following format:
+    [HH:MM:SS] Sentence 1
+    [HH:MM:SS] Sentence 2
+    ...
+    Ensure the timestamps are accurate and correspond to the start of each sentence.
+    Respond only with the transcription and timestamps. Do not add explanations or extra text.
+    """
+    convo = model.start_chat()
+    convo.send_message(prompt)
+    response = convo.send_message(audio_blob)
+    return response.text.strip()
+def generate_subtitles(transcription):
+    """Generate SRT subtitles from transcription with timestamps."""
+    lines = transcription.split("\n")
+    srt_subtitles = []
+    for i, line in enumerate(lines, start=1):
+        if not line.strip():
+            continue
+        if line.startswith("["):
+            timestamp = line.split("]")[0] + "]"
+            text = line.split("]")[1].strip()
+        else:
+            timestamp = "[00:00:00]"
+            text = line.strip()
+        start_time = timestamp[1:-1]
+        start_seconds = time_to_seconds(start_time)
+        end_seconds = start_seconds + 5  # Placeholder duration
+        subtitle = srt.Subtitle(
+            index=i,
+            start=datetime.timedelta(seconds=start_seconds),
+            end=datetime.timedelta(seconds=end_seconds),
+            content=text
+        )
+        srt_subtitles.append(subtitle)
+    return srt.compose(srt_subtitles)
+def time_to_seconds(time_str):
+    """Convert HH:MM:SS to seconds."""
+    hh, mm, ss = map(int, time_str.split(":"))
+    return hh * 3600 + mm * 60 + ss
+def seconds_to_time(seconds):
+    """Convert seconds to HH:MM:SS."""
+    hh = seconds // 3600
+    mm = (seconds % 3600) // 60
+    ss = seconds % 60
+    return f"{hh:02}:{mm:02}:{ss:02}"
 def translate_srt(srt_text, target_language):
+    """Translate an SRT file while preserving timestamps."""
+    prompt = f"""
+    Translate the following SRT subtitles into {target_language}.
+    Preserve the SRT format (timestamps and structure).
+    Translate only the text after the timestamp.
+    Do not add explanations or extra text.
+    Ensure the translation is accurate and culturally appropriate.
+    Here is the SRT file:
+    {srt_text}
+    """
+    response = model.generate_content(prompt)
+    return response.text
+def generate_tts_audio(srt_text, language):
+    """Generate TTS audio from SRT text."""
+    # Extract all text from SRT
+    subtitles = list(srt.parse(srt_text))
+    all_text = " ".join([sub.content for sub in subtitles])
+    # Get language code
+    lang_code = LANGUAGE_CODES.get(language, "en")
+    # Generate TTS
+    tts = gTTS(text=all_text, lang=lang_code, slow=False)
+    audio_file = os.path.join(tempfile.gettempdir(), "tts_audio.mp3")
+    tts.save(audio_file)
+    return audio_file
+def add_subtitles_to_video(video_file, srt_file, output_file):
+    """Add subtitles to video and return the path to the new video."""
+    # Create subtitle clip
+    generator = lambda txt: TextClip(txt, font='Arial', fontsize=24, color='white')
+    subtitles = SubtitlesClip(srt_file, generator)
+    # Load video
+    video = VideoFileClip(video_file)
+    # Composite video with subtitles
+    result = CompositeVideoClip([
+        video,
+        subtitles.set_position(('center', 'bottom'))
+    ])
+    # Write output
+    result.write_videofile(output_file, codec='libx264', audio_codec='aac', threads=4)
+    return output_file
+def process_video(video_file, language="Auto Detect", translate_to=None, add_tts=False, add_subtitles=False):
+    """Process a video file with full options."""
+    # Extract audio from the video
+    audio_file = extract_audio_from_video(video_file)
+    # Transcribe audio using Gemini
+    transcription = transcribe_audio_with_gemini(audio_file)
+    # Generate subtitles
+    subtitles = generate_subtitles(transcription)
+    # Save original subtitles
+    original_srt_file = os.path.join(tempfile.gettempdir(), "original_subtitles.srt")
+    with open(original_srt_file, "w", encoding="utf-8") as f:
+        f.write(subtitles)
+    # Translate subtitles if requested
+    translated_srt_file = None
+    if translate_to and translate_to != "None":
+        translated_subtitles = translate_srt(subtitles, translate_to)
+        translated_srt_file = os.path.join(tempfile.gettempdir(), "translated_subtitles.srt")
+        with open(translated_srt_file, "w", encoding="utf-8") as f:
+            f.write(translated_subtitles)
+    # Generate TTS audio if requested
+    tts_audio_file = None
+    if add_tts:
+        target_lang = translate_to if translate_to and translate_to != "None" else language
+        tts_audio_file = generate_tts_audio(subtitles if not translated_srt_file else translated_subtitles, target_lang)
+    # Create video with subtitles if requested
+    output_video_file = None
+    if add_subtitles:
+        srt_to_use = translated_srt_file if translated_srt_file else original_srt_file
+        output_video_file = os.path.join(tempfile.gettempdir(), "output_video.mp4")
+        add_subtitles_to_video(video_file, srt_to_use, output_video_file)
+    # Clean up
+    os.remove(audio_file)
+    return original_srt_file, translated_srt_file, tts_audio_file, output_video_file, "Detected Language: Auto"
+# Define the Gradio interface
+with gr.Blocks(title="AutoSubGen Pro - AI Video Subtitle Generator") as demo:
+    # Header
+    with gr.Column():
+        gr.Markdown("# 🎥 AutoSubGen Pro")
+        gr.Markdown("### Advanced AI-Powered Video Subtitle Generator")
+        gr.Markdown("Generate, translate, and add subtitles with text-to-speech audio to your videos.")
+    # Main content
+    with gr.Tab("Generate Subtitles"):
+        gr.Markdown("### Upload a video file to process")
+        with gr.Row():
+            video_input = gr.Video(label="Upload Video File", scale=2)
+            with gr.Column():
+                language_dropdown = gr.Dropdown(
+                    choices=SUPPORTED_LANGUAGES,
+                    label="Source Language",
+                    value="Auto Detect",
+                )
+                translate_to_dropdown = gr.Dropdown(
+                    choices=["None"] + SUPPORTED_LANGUAGES[1:],
+                    label="Translate To",
+                    value="None",
+                )
+                tts_checkbox = gr.Checkbox(label="Generate Text-to-Speech Audio")
+                subtitles_checkbox = gr.Checkbox(label="Add Subtitles to Video")
+        generate_button = gr.Button("Process Video", variant="primary")
+        with gr.Row():
+            with gr.Column():
+                original_subtitle_output = gr.File(label="Original Subtitles (SRT)")
+                translated_subtitle_output = gr.File(label="Translated Subtitles (SRT)")
+            with gr.Column():
+                tts_audio_output = gr.Audio(label="Text-to-Speech Audio", visible=False)
+                video_output = gr.Video(label="Video with Subtitles", visible=False)
+        detected_language_output = gr.Textbox(label="Detected Language")
+        # Show/hide outputs based on checkboxes
+        def toggle_outputs(tts, subs):
+            return [
+                gr.Audio(visible=tts),
+                gr.Video(visible=subs)
+            ]
+        tts_checkbox.change(
+            fn=lambda x: gr.Audio(visible=x),
+            inputs=tts_checkbox,
+            outputs=tts_audio_output
+        )
+        subtitles_checkbox.change(
+            fn=lambda x: gr.Video(visible=x),
+            inputs=subtitles_checkbox,
+            outputs=video_output
+        )
+    # Link button to function
+    generate_button.click(
+        process_video,
+        inputs=[video_input, language_dropdown, translate_to_dropdown, tts_checkbox, subtitles_checkbox],
+        outputs=[original_subtitle_output, translated_subtitle_output, tts_audio_output, video_output, detected_language_output]
+    )
+# Launch the interface
+demo.launch(share=True)