File size: 16,553 Bytes
91f8d48
e6d59c3
e2954cc
 
e6d59c3
91f8d48
a8524a9
bd56570
 
1d3d329
bd56570
 
a8524a9
bd56570
91f8d48
1d3d329
 
91f8d48
 
e2954cc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bd56570
e2954cc
bd56570
 
 
e2954cc
 
1b72949
 
bd56570
 
 
1d3d329
 
 
 
1b72949
 
bd56570
818e336
e4d42f1
 
bd56570
 
 
 
 
e4d42f1
 
1d3d329
91f8d48
bd56570
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
91f8d48
 
1d3d329
86417c2
e2954cc
 
 
 
 
 
 
 
 
 
 
 
1d3d329
86417c2
 
1b72949
bd56570
 
e2954cc
 
 
 
 
 
 
 
 
bd56570
1d3d329
bd56570
1d3d329
 
bd56570
 
 
 
 
 
1d3d329
e2954cc
 
 
bd56570
1d3d329
bd56570
e2954cc
bd56570
e2954cc
bd56570
 
1d3d329
bd56570
 
 
 
 
 
 
 
 
1d3d329
 
bd56570
e2954cc
1d3d329
 
bd56570
1d3d329
bd56570
 
 
 
c463c7e
 
e4d42f1
1d3d329
e2954cc
 
 
bd56570
 
 
 
1d3d329
 
 
 
 
bd56570
 
1d3d329
bd56570
1d3d329
bd56570
 
 
 
 
 
1d3d329
bd56570
 
1d3d329
e2954cc
1d3d329
 
 
 
 
 
 
e2954cc
1d3d329
 
bd56570
1d3d329
bd56570
 
 
 
 
1d3d329
 
 
 
 
 
bd56570
1d3d329
 
e2954cc
 
1d3d329
 
 
e2954cc
 
 
1d3d329
bd56570
1d3d329
e2954cc
1d3d329
 
 
e2954cc
1d3d329
 
 
e2954cc
1d3d329
 
e2954cc
1d3d329
 
 
 
 
 
e2954cc
1d3d329
 
e2954cc
1d3d329
e2954cc
1d3d329
 
e2954cc
 
1d3d329
 
bd56570
1d3d329
bd56570
 
 
 
 
 
 
1d3d329
 
e2954cc
 
 
 
 
bd56570
 
 
1d3d329
e2954cc
 
 
 
1d3d329
bd56570
1d3d329
bd56570
 
e2954cc
bd56570
 
 
 
 
e2954cc
 
 
 
 
bd56570
 
 
 
 
 
 
1d3d329
 
bd56570
 
 
 
 
1d3d329
 
 
bd56570
 
 
1d3d329
bd56570
1d3d329
 
e2954cc
bd56570
 
 
 
e2954cc
1d3d329
 
bd56570
 
 
1d3d329
bd56570
 
 
 
 
 
1d3d329
bd56570
1d3d329
e2954cc
bd56570
 
1d3d329
bd56570
1d3d329
 
e2954cc
1d3d329
 
 
 
bd56570
1d3d329
 
 
 
bd56570
1d3d329
 
 
 
 
 
 
bd56570
1d3d329
 
e2954cc
bd56570
 
 
e2954cc
 
 
bd56570
 
 
1d3d329
bd56570
 
 
 
 
 
e2954cc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
import os
import google.generativeai as genai
from moviepy.editor import *
import moviepy.config as mp_config
import tempfile
import logging
import srt
import datetime
from gtts import gTTS
from flask import Flask, request, render_template, send_from_directory, url_for, flash, session, redirect
from werkzeug.utils import secure_filename
import uuid

# --- Configuration ---

# Suppress moviepy logs which can be verbose
logging.basicConfig(level=logging.INFO)
logging.getLogger("moviepy").setLevel(logging.ERROR)

# Set a marker to check if ImageMagick is not found
IMAGEMAGICK_NOT_FOUND = False
try:
    # This will raise an exception if ImageMagick is not found
    TextClip("test", font='Arial', fontsize=24, color='white')
except Exception as e:
    if "ImageMagick is not installed" in str(e) or "magick: not found" in str(e):
        IMAGEMAGICK_NOT_FOUND = True
        logging.warning("ImageMagick is not installed or not found in the system's PATH. Subtitle generation will be disabled.")
        logging.warning("Please install ImageMagick and ensure its 'magick' command is accessible from your shell.")
    else:
        # Handle other potential startup errors from TextClip
        logging.error(f"An unexpected error occurred with MoviePy/ImageMagick: {e}")


# Configure Gemini API
# IMPORTANT: Set your GEMINI_API_KEY as an environment variable for this to work.
try:
    genai.configure(api_key=os.environ["GEMINI_API_KEY"])
except KeyError:
    # This will be handled in the Flask app to show a message to the user
    pass


# --- Flask App Initialization ---
app = Flask(__name__)
app.config['SECRET_KEY'] = os.urandom(24)
# Create a permanent directory for uploads if it doesn't exist
UPLOAD_FOLDER = os.path.join(os.getcwd(), 'user_uploads')
os.makedirs(UPLOAD_FOLDER, exist_ok=True)
app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER


# --- Model and Language Configuration ---

# Create the Gemini model
generation_config = {
    "temperature": 0.7,
    "top_p": 0.9,
    "top_k": 40,
    "max_output_tokens": 8192,
    "response_mime_type": "text/plain",
}

# List of all supported languages for the UI
SUPPORTED_LANGUAGES = [
    "Auto Detect", "English", "Chinese", "German", "Spanish", "Russian", "Korean",
    "French", "Japanese", "Portuguese", "Turkish", "Polish", "Catalan", "Dutch",
    "Arabic", "Swedish", "Italian", "Indonesian", "Hindi", "Finnish", "Vietnamese",
    "Hebrew", "Ukrainian", "Greek", "Malay", "Czech", "Romanian", "Danish",
    "Hungarian", "Tamil", "Norwegian", "Thai", "Urdu", "Croatian", "Bulgarian",
    "Lithuanian", "Latin", "Maori", "Malayalam", "Welsh", "Slovak", "Telugu",
    "Persian", "Latvian", "Bengali", "Serbian", "Azerbaijani", "Slovenian",
    "Kannada", "Estonian", "Macedonian", "Breton", "Basque", "Icelandic",
    "Armenian", "Nepali", "Mongolian", "Bosnian", "Kazakh", "Albanian",
    "Swahili", "Galician", "Marathi", "Punjabi", "Sinhala", "Khmer", "Shona",
    "Yoruba", "Somali", "Afrikaans", "Occitan", "Georgian", "Belarusian",
    "Tajik", "Sindhi", "Gujarati", "Amharic", "Yiddish", "Lao", "Uzbek",
    "Faroese", "Haitian Creole", "Pashto", "Turkmen", "Nynorsk", "Maltese",
    "Sanskrit", "Luxembourgish", "Burmese", "Tibetan", "Tagalog", "Malagasy",
    "Assamese", "Tatar", "Hawaiian", "Lingala", "Hausa", "Bashkir", "Javanese",
    "Sundanese"
]

# Language code mapping for Google Text-to-Speech (gTTS)
LANGUAGE_CODES = {
    "English": "en", "Chinese": "zh", "German": "de", "Spanish": "es", "Russian": "ru",
    "Korean": "ko", "French": "fr", "Japanese": "ja", "Portuguese": "pt", "Turkish": "tr",
    "Polish": "pl", "Catalan": "ca", "Dutch": "nl", "Arabic": "ar", "Swedish": "sv",
    "Italian": "it", "Indonesian": "id", "Hindi": "hi", "Finnish": "fi", "Vietnamese": "vi",
    "Hebrew": "iw", "Ukrainian": "uk", "Greek": "el", "Malay": "ms", "Czech": "cs",
    "Romanian": "ro", "Danish": "da", "Hungarian": "hu", "Tamil": "ta", "Norwegian": "no",
    "Thai": "th", "Urdu": "ur", "Croatian": "hr", "Bulgarian": "bg", "Lithuanian": "lt",
    "Latin": "la", "Malayalam": "ml", "Welsh": "cy", "Slovak": "sk", "Telugu": "te",
    "Persian": "fa", "Latvian": "lv", "Bengali": "bn", "Serbian": "sr", "Slovenian": "sl",
    "Kannada": "kn", "Estonian": "et", "Macedonian": "mk", "Armenian": "hy", "Nepali": "ne",
    "Mongolian": "mn", "Bosnian": "bs", "Kazakh": "kk", "Albanian": "sq", "Swahili": "sw",
    "Gujarati": "gu", "Lao": "lo", "Uzbek": "uz", "Maltese": "mt", "Burmese": "my",
    "Tagalog": "tl", "Javanese": "jw", "Sundanese": "su", "Afrikaans": "af"
}


# --- Core Processing Functions ---

def get_gemini_model():
    """Initializes and returns the Gemini model, checking for API key."""
    if "GEMINI_API_KEY" not in os.environ:
        return None
    return genai.GenerativeModel(
        model_name="gemini-1.5-flash",
        generation_config=generation_config,
    )

def extract_audio_from_video(video_path, audio_path):
    """Extract audio from a video file into a WAV file."""
    try:
        with VideoFileClip(video_path) as video:
            video.audio.write_audiofile(audio_path, fps=16000, logger=None)
        return audio_path
    except Exception as e:
        logging.error(f"Error extracting audio: {e}")
        return None

def transcribe_audio_with_gemini(audio_path, source_language):
    """Transcribe audio using Gemini, requesting output in SRT format."""
    model = get_gemini_model()
    if not model:
        return None
    try:
        logging.info(f"Uploading audio file: {audio_path}")
        audio_file = genai.upload_file(path=audio_path)

        language_prompt = f"in {source_language}" if source_language != "Auto Detect" else ""

        prompt = f"""
        You are a professional transcriber. Transcribe this audio accurately and verbatim {language_prompt}.
        Your output MUST be in the SRT (SubRip) format.
        Example:
        1
        00:00:01,234 --> 00:00:05,678
        This is the first sentence.

        2
        00:00:06,123 --> 00:00:09,456
        This is the second sentence.

        Ensure timestamps are precise. Respond ONLY with the transcription in the SRT format.
        Do not add explanations, notes, or any other text outside of the valid SRT content.
        """

        logging.info("Sending transcription request to Gemini...")
        response = model.generate_content([prompt, audio_file], request_options={"timeout": 600})
        genai.delete_file(audio_file.name) # Clean up the uploaded file
        logging.info("Transcription received from Gemini.")
        return response.text.strip()
    except Exception as e:
        logging.error(f"Error during Gemini transcription: {e}")
        return None


def translate_srt(srt_text, target_language):
    """Translate an SRT file using Gemini while preserving its structure."""
    model = get_gemini_model()
    if not model:
        return None
    try:
        prompt = f"""
        Translate the following SRT subtitles into {target_language}.
        Preserve the SRT format perfectly (index numbers, timestamps, and structure).
        Translate only the subtitle text itself.
        Your output must be only the translated and valid SRT file content.
        Do not add any explanations or extra text.

        SRT Content to Translate:
        {srt_text}
        """
        logging.info(f"Sending translation request to Gemini for {target_language}...")
        response = model.generate_content(prompt)
        logging.info("Translation received.")
        return response.text.strip()
    except Exception as e:
        logging.error(f"Error during translation: {e}")
        return None

def generate_tts_audio(srt_text, language, tts_audio_path):
    """Generate a single TTS audio file from the text content of an SRT file."""
    try:
        subtitles = list(srt.parse(srt_text))
        full_text = " ".join([sub.content.replace('\n', ' ') for sub in subtitles])

        if not full_text:
            return None

        lang_code = LANGUAGE_CODES.get(language)
        if not lang_code:
            flash(f"Language '{language}' not supported for TTS, defaulting to English.", "warning")
            lang_code = "en"

        logging.info(f"Generating TTS audio in '{language}' ({lang_code})...")
        tts = gTTS(text=full_text, lang=lang_code, slow=False)
        tts.save(tts_audio_path)
        logging.info(f"TTS audio saved to {tts_audio_path}")
        return tts_audio_path
    except Exception as e:
        logging.error(f"Error generating TTS audio: {e}")
        return None

def create_final_video(original_video_path, srt_text, new_audio_path, output_path):
    """
    Creates the final video.
    - If srt_text is provided, subtitles are burned in.
    - If new_audio_path is provided, the original audio is replaced.
    """
    try:
        logging.info("Creating final video...")
        original_clip = VideoFileClip(original_video_path)
        final_clip = original_clip

        # If new audio is provided, replace the original audio track
        if new_audio_path:
            tts_audio_clip = AudioFileClip(new_audio_path)
            # If the new audio is shorter than the video, loop it.
            if tts_audio_clip.duration < original_clip.duration:
                 tts_audio_clip = tts_audio_clip.fx(vfx.loop, duration=original_clip.duration)
            final_clip = original_clip.set_audio(tts_audio_clip)

        # If subtitle text is provided, burn it into the video
        if srt_text and not IMAGEMAGICK_NOT_FOUND:
            with tempfile.NamedTemporaryFile(mode='w', suffix='.srt', delete=False, encoding='utf-8') as temp_srt:
                temp_srt.write(srt_text)
                srt_filepath = temp_srt.name

            # Subtitle styling
            generator = lambda txt: TextClip(
                txt, font='Arial-Bold', fontsize=24, color='white',
                stroke_color='black', stroke_width=1, method='caption',
                size=(final_clip.w * 0.8, None) # Subtitles take 80% of video width
            )

            subtitles_clip = SubtitlesClip(srt_filepath, generator)
            # Composite the video with the subtitles
            final_video = CompositeVideoClip([final_clip, subtitles_clip.set_position(('center', 'bottom'))])
            os.remove(srt_filepath) # Clean up temp srt file
        else:
            final_video = final_clip

        # Write the final video file
        final_video.write_videofile(output_path, codec='libx264', audio_codec='aac', threads=4, logger=None)

        # Close all clips to release memory
        if 'tts_audio_clip' in locals() and tts_audio_clip:
            tts_audio_clip.close()
        final_video.close()
        original_clip.close()

        logging.info(f"Final video saved to {output_path}")
        return output_path
    except Exception as e:
        logging.error(f"Error creating final video: {e}")
        return None


# --- Flask Routes ---

@app.route('/')
def index():
    """Render the main page. Clear session for a fresh start."""
    session.clear()
    api_key_status = "GEMINI_API_KEY" in os.environ
    return render_template('index.html',
                           supported_languages=SUPPORTED_LANGUAGES,
                           imagemagick_missing=IMAGEMAGICK_NOT_FOUND,
                           api_key_set=api_key_status)

@app.route('/process', methods=['POST'])
def process():
    """Handle the video processing request from the form."""
    if "GEMINI_API_KEY" not in os.environ:
        flash('Your GEMINI_API_KEY environment variable is not set. Please set it to use the application.', 'error')
        return redirect(url_for('index'))

    if 'video' not in request.files or request.files['video'].filename == '':
        flash('No video file selected. Please upload a video.', 'error')
        return redirect(url_for('index'))

    video_file = request.files['video']

    # --- Get form options ---
    source_language = request.form.get('source_language', 'Auto Detect')
    translate_to = request.form.get('translate_to', 'None')
    add_tts = 'add_tts' in request.form
    add_subtitles = 'add_subtitles' in request.form

    if IMAGEMAGICK_NOT_FOUND and add_subtitles:
        flash("Cannot add subtitles because ImageMagick is not installed. The video will be processed without them.", "warning")
        add_subtitles = False

    # --- Setup a unique session directory for this request ---
    session_id = str(uuid.uuid4())
    session['session_id'] = session_id
    session_dir = os.path.join(app.config['UPLOAD_FOLDER'], session_id)
    os.makedirs(session_dir, exist_ok=True)

    filename = secure_filename(video_file.filename)
    original_video_path = os.path.join(session_dir, filename)
    video_file.save(original_video_path)

    results = {}

    # 1. Extract Audio
    audio_path = os.path.join(session_dir, "extracted_audio.wav")
    if not extract_audio_from_video(original_video_path, audio_path):
        flash('Failed to extract audio from the video. The file might be corrupted or in an unsupported format.', 'error')
        return redirect(url_for('index'))

    # 2. Transcribe Audio
    original_srt_text = transcribe_audio_with_gemini(audio_path, source_language)
    os.remove(audio_path) # Clean up extracted audio immediately
    if not original_srt_text:
        flash('Failed to transcribe the audio. The API call might have failed or the audio is silent.', 'error')
        return redirect(url_for('index'))

    original_srt_path = os.path.join(session_dir, "original_subtitles.srt")
    with open(original_srt_path, "w", encoding="utf-8") as f:
        f.write(original_srt_text)
    results['original_srt_file'] = "original_subtitles.srt"

    srt_for_final_video = None
    tts_audio_path = None
    final_srt_text = original_srt_text

    # 3. Translate Subtitles (if requested)
    if translate_to != "None":
        translated_srt_text = translate_srt(original_srt_text, translate_to)
        if translated_srt_text:
            translated_srt_path = os.path.join(session_dir, "translated_subtitles.srt")
            with open(translated_srt_path, "w", encoding="utf-8") as f:
                f.write(translated_srt_text)
            results['translated_srt_file'] = "translated_subtitles.srt"
            final_srt_text = translated_srt_text # Use translated text for subsequent steps
        else:
            flash(f'Failed to translate subtitles to {translate_to}. Using original subtitles.', 'warning')

    # 4. Generate TTS Audio (if requested)
    if add_tts:
        tts_lang = translate_to if translate_to != "None" else source_language
        if tts_lang == 'Auto Detect':
            flash('TTS language cannot be "Auto Detect". Defaulting to English.', 'warning')
            tts_lang = 'English'

        tts_audio_path_out = os.path.join(session_dir, "tts_audio.mp3")
        tts_audio_path = generate_tts_audio(final_srt_text, tts_lang, tts_audio_path_out)
        if tts_audio_path:
            results['tts_audio_file'] = "tts_audio.mp3"
        else:
            flash('Failed to generate Text-to-Speech audio.', 'warning')
            tts_audio_path = None # Ensure it's None if generation failed

    # 5. Determine which subtitles to burn into the video (if requested)
    if add_subtitles:
        srt_for_final_video = final_srt_text

    # 6. Create the final output video if any processing was requested
    if add_subtitles or add_tts:
        output_video_path = os.path.join(session_dir, "final_video.mp4")
        if create_final_video(original_video_path, srt_for_final_video, tts_audio_path, output_video_path):
            results['output_video_file'] = "final_video.mp4"
        else:
            flash('Failed to create the final processed video.', 'error')

    api_key_status = "GEMINI_API_KEY" in os.environ
    return render_template('index.html',
                           supported_languages=SUPPORTED_LANGUAGES,
                           results=results,
                           session_id=session_id,
                           imagemagick_missing=IMAGEMAGICK_NOT_FOUND,
                           api_key_set=api_key_status)

@app.route('/download/<session_id>/<path:filename>')
def download_file(session_id, filename):
    """Serve files from the session-specific directory for download."""
    directory = os.path.join(app.config['UPLOAD_FOLDER'], session_id)
    return send_from_directory(directory, filename, as_attachment=True)


# --- Run the App ---
if __name__ == '__main__':
    app.run(host="0.0.0.0", port=7860)