Athspi commited on
Commit
bd56570
·
verified ·
1 Parent(s): a7fb2a1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +281 -589
app.py CHANGED
@@ -1,625 +1,317 @@
1
  import os
2
-
3
  import google.generativeai as genai
4
-
5
- from moviepy.video.io.VideoFileClip import VideoFileClip
6
-
7
- from moviepy.audio.io.AudioFileClip import AudioFileClip
8
-
9
- from moviepy.video.compositing.CompositeVideoClip import CompositeVideoClip
10
-
11
  from moviepy.video.tools.subtitles import SubtitlesClip
12
-
13
  import tempfile
14
-
15
  import logging
16
-
17
- import gradio as gr
18
-
19
- from gtts import gTTS
20
-
21
  import srt
 
 
 
 
 
22
 
23
-
24
 
25
  # Suppress moviepy logs
26
-
27
  logging.getLogger("moviepy").setLevel(logging.ERROR)
28
 
 
 
 
 
 
 
 
29
 
30
 
31
- # Configure Gemini API
 
 
 
32
 
33
- genai.configure(api_key=os.environ["GEMINI_API_KEY"])
 
34
 
35
 
 
36
 
37
  # Create the Gemini model
38
-
39
  generation_config = {
40
-
41
-     "temperature": 0.7,
42
-
43
-     "top_p": 0.9,
44
-
45
-     "top_k": 40,
46
-
47
-     "max_output_tokens": 8192,
48
-
49
-     "response_mime_type": "text/plain",
50
-
51
  }
52
 
53
-
54
-
55
  model = genai.GenerativeModel(
56
-
57
-     model_name="gemini-2.0-pro-exp-02-05",
58
-
59
-     generation_config=generation_config,
60
-
61
  )
62
 
63
-
64
-
65
  # List of all supported languages
66
-
67
  SUPPORTED_LANGUAGES = [
68
-
69
-     "Auto Detect", "English", "Chinese", "German", "Spanish", "Russian", "Korean",
70
-
71
-     "French", "Japanese", "Portuguese", "Turkish", "Polish", "Catalan", "Dutch",
72
-
73
-     "Arabic", "Swedish", "Italian", "Indonesian", "Hindi", "Finnish", "Vietnamese",
74
-
75
-     "Hebrew", "Ukrainian", "Greek", "Malay", "Czech", "Romanian", "Danish",
76
-
77
-     "Hungarian", "Tamil", "Norwegian", "Thai", "Urdu", "Croatian", "Bulgarian",
78
-
79
-     "Lithuanian", "Latin", "Maori", "Malayalam", "Welsh", "Slovak", "Telugu",
80
-
81
-     "Persian", "Latvian", "Bengali", "Serbian", "Azerbaijani", "Slovenian",
82
-
83
-     "Kannada", "Estonian", "Macedonian", "Breton", "Basque", "Icelandic",
84
-
85
-     "Armenian", "Nepali", "Mongolian", "Bosnian", "Kazakh", "Albanian",
86
-
87
-     "Swahili", "Galician", "Marathi", "Punjabi", "Sinhala", "Khmer", "Shona",
88
-
89
-     "Yoruba", "Somali", "Afrikaans", "Occitan", "Georgian", "Belarusian",
90
-
91
-     "Tajik", "Sindhi", "Gujarati", "Amharic", "Yiddish", "Lao", "Uzbek",
92
-
93
-     "Faroese", "Haitian Creole", "Pashto", "Turkmen", "Nynorsk", "Maltese",
94
-
95
-     "Sanskrit", "Luxembourgish", "Burmese", "Tibetan", "Tagalog", "Malagasy",
96
-
97
-     "Assamese", "Tatar", "Hawaiian", "Lingala", "Hausa", "Bashkir", "Javanese",
98
-
99
-     "Sundanese"
100
-
101
  ]
102
 
103
-
104
-
105
  # Language code mapping for gTTS
106
-
107
  LANGUAGE_CODES = {
108
-
109
-     "English": "en", "Chinese": "zh", "German": "de", "Spanish": "es", 
110
-
111
-     "Russian": "ru", "Korean": "ko", "French": "fr", "Japanese": "ja",
112
-
113
-     "Portuguese": "pt", "Turkish": "tr", "Polish": "pl", "Catalan": "ca",
114
-
115
-     "Dutch": "nl", "Arabic": "ar", "Swedish": "sv", "Italian": "it",
116
-
117
-     "Indonesian": "id", "Hindi": "hi", "Finnish": "fi", "Vietnamese": "vi",
118
-
119
-     "Hebrew": "he", "Ukrainian": "uk", "Greek": "el", "Malay": "ms",
120
-
121
-     "Czech": "cs", "Romanian": "ro", "Danish": "da", "Hungarian": "hu",
122
-
123
-     "Tamil": "ta", "Norwegian": "no", "Thai": "th", "Urdu": "ur",
124
-
125
-     "Croatian": "hr", "Bulgarian": "bg", "Lithuanian": "lt", "Latin": "la",
126
-
127
-     "Maori": "mi", "Malayalam": "ml", "Welsh": "cy", "Slovak": "sk",
128
-
129
-     "Telugu": "te", "Persian": "fa", "Latvian": "lv", "Bengali": "bn",
130
-
131
-     "Serbian": "sr", "Azerbaijani": "az", "Slovenian": "sl", "Kannada": "kn",
132
-
133
-     "Estonian": "et", "Macedonian": "mk", "Breton": "br", "Basque": "eu",
134
-
135
-     "Icelandic": "is", "Armenian": "hy", "Nepali": "ne", "Mongolian": "mn",
136
-
137
-     "Bosnian": "bs", "Kazakh": "kk", "Albanian": "sq", "Swahili": "sw",
138
-
139
-     "Galician": "gl", "Marathi": "mr", "Punjabi": "pa", "Sinhala": "si",
140
-
141
-     "Khmer": "km", "Shona": "sn", "Yoruba": "yo", "Somali": "so",
142
-
143
-     "Afrikaans": "af", "Occitan": "oc", "Georgian": "ka", "Belarusian": "be",
144
-
145
-     "Tajik": "tg", "Sindhi": "sd", "Gujarati": "gu", "Amharic": "am",
146
-
147
-     "Yiddish": "yi", "Lao": "lo", "Uzbek": "uz", "Faroese": "fo",
148
-
149
-     "Haitian Creole": "ht", "Pashto": "ps", "Turkmen": "tk", "Nynorsk": "nn",
150
-
151
-     "Maltese": "mt", "Sanskrit": "sa", "Luxembourgish": "lb", "Burmese": "my",
152
-
153
-     "Tibetan": "bo", "Tagalog": "tl", "Malagasy": "mg", "Assamese": "as",
154
-
155
-     "Tatar": "tt", "Hawaiian": "haw", "Lingala": "ln", "Hausa": "ha",
156
-
157
-     "Bashkir": "ba", "Javanese": "jv", "Sundanese": "su"
158
-
159
  }
160
 
161
 
162
-
163
- def extract_audio_from_video(video_file):
164
-
165
-     """Extract audio from a video file and save it as a WAV file."""
166
-
167
-     video = VideoFileClip(video_file)
168
-
169
-     audio_file = os.path.join(tempfile.gettempdir(), "extracted_audio.wav")
170
-
171
-     video.audio.write_audiofile(audio_file, fps=16000, logger=None)
172
-
173
-     return audio_file
174
-
175
-
176
-
177
- def transcribe_audio_with_gemini(audio_file):
178
-
179
-     """Transcribe audio using Gemini with a magic prompt for accurate timestamps."""
180
-
181
-     with open(audio_file, "rb") as f:
182
-
183
-         audio_data = f.read()
184
-
185
-
186
-
187
-     audio_blob = {
188
-
189
-         'mime_type': 'audio/wav',
190
-
191
-         'data': audio_data
192
-
193
-     }
194
-
195
-
196
-
197
-     prompt = """
198
-
199
-     You are a professional transcriber. Transcribe this audio accurately and verbatim in the original language.
200
-
201
-     Include timestamps for each sentence in the following format:
202
-
203
-     [HH:MM:SS] Sentence 1
204
-
205
-     [HH:MM:SS] Sentence 2
206
-
207
-     ...
208
-
209
-     Ensure the timestamps are accurate and correspond to the start of each sentence.
210
-
211
-     Respond only with the transcription and timestamps. Do not add explanations or extra text.
212
-
213
-     """
214
-
215
-
216
-
217
-     convo = model.start_chat()
218
-
219
-     convo.send_message(prompt)
220
-
221
-     response = convo.send_message(audio_blob)
222
-
223
-     return response.text.strip()
224
-
225
-
226
-
227
- def generate_subtitles(transcription):
228
-
229
-     """Generate SRT subtitles from transcription with timestamps."""
230
-
231
-     lines = transcription.split("\n")
232
-
233
-     srt_subtitles = []
234
-
235
-     
236
-
237
-     for i, line in enumerate(lines, start=1):
238
-
239
-         if not line.strip():
240
-
241
-             continue
242
-
243
-         
244
-
245
-         if line.startswith("["):
246
-
247
-             timestamp = line.split("]")[0] + "]"
248
-
249
-             text = line.split("]")[1].strip()
250
-
251
-         else:
252
-
253
-             timestamp = "[00:00:00]"
254
-
255
-             text = line.strip()
256
-
257
-         
258
-
259
-         start_time = timestamp[1:-1]
260
-
261
-         start_seconds = time_to_seconds(start_time)
262
-
263
-         end_seconds = start_seconds + 5  # Placeholder duration
264
-
265
-         
266
-
267
-         subtitle = srt.Subtitle(
268
-
269
-             index=i,
270
-
271
-             start=datetime.timedelta(seconds=start_seconds),
272
-
273
-             end=datetime.timedelta(seconds=end_seconds),
274
-
275
-             content=text
276
-
277
-         )
278
-
279
-         srt_subtitles.append(subtitle)
280
-
281
-     
282
-
283
-     return srt.compose(srt_subtitles)
284
-
285
-
286
-
287
- def time_to_seconds(time_str):
288
-
289
-     """Convert HH:MM:SS to seconds."""
290
-
291
-     hh, mm, ss = map(int, time_str.split(":"))
292
-
293
-     return hh * 3600 + mm * 60 + ss
294
-
295
-
296
-
297
- def seconds_to_time(seconds):
298
-
299
-     """Convert seconds to HH:MM:SS."""
300
-
301
-     hh = seconds // 3600
302
-
303
-     mm = (seconds % 3600) // 60
304
-
305
-     ss = seconds % 60
306
-
307
-     return f"{hh:02}:{mm:02}:{ss:02}"
308
-
309
 
310
 
311
  def translate_srt(srt_text, target_language):
312
-
313
-     """Translate an SRT file while preserving timestamps."""
314
-
315
-     prompt = f"""
316
-
317
-     Translate the following SRT subtitles into {target_language}.
318
-
319
-     Preserve the SRT format (timestamps and structure).
320
-
321
-     Translate only the text after the timestamp.
322
-
323
-     Do not add explanations or extra text.
324
-
325
-     Ensure the translation is accurate and culturally appropriate.
326
-
327
-     Here is the SRT file:
328
-
329
-     {srt_text}
330
-
331
-     """
332
-
333
-
334
-
335
-     response = model.generate_content(prompt)
336
-
337
-     return response.text
338
-
339
-
340
-
341
- def generate_tts_audio(srt_text, language):
342
-
343
-     """Generate TTS audio from SRT text."""
344
-
345
-     # Extract all text from SRT
346
-
347
-     subtitles = list(srt.parse(srt_text))
348
-
349
-     all_text = " ".join([sub.content for sub in subtitles])
350
-
351
-     
352
-
353
-     # Get language code
354
-
355
-     lang_code = LANGUAGE_CODES.get(language, "en")
356
-
357
-     
358
-
359
-     # Generate TTS
360
-
361
-     tts = gTTS(text=all_text, lang=lang_code, slow=False)
362
-
363
-     audio_file = os.path.join(tempfile.gettempdir(), "tts_audio.mp3")
364
-
365
-     tts.save(audio_file)
366
-
367
-     return audio_file
368
-
369
-
370
-
371
- def add_subtitles_to_video(video_file, srt_file, output_file):
372
-
373
-     """Add subtitles to video and return the path to the new video."""
374
-
375
-     # Create subtitle clip
376
-
377
-     generator = lambda txt: TextClip(txt, font='Arial', fontsize=24, color='white')
378
-
379
-     subtitles = SubtitlesClip(srt_file, generator)
380
-
381
-     
382
-
383
-     # Load video
384
-
385
-     video = VideoFileClip(video_file)
386
-
387
-     
388
-
389
-     # Composite video with subtitles
390
-
391
-     result = CompositeVideoClip([
392
-
393
-         video,
394
-
395
-         subtitles.set_position(('center', 'bottom'))
396
-
397
-     ])
398
-
399
-     
400
-
401
-     # Write output
402
-
403
-     result.write_videofile(output_file, codec='libx264', audio_codec='aac', threads=4)
404
-
405
-     return output_file
406
-
407
-
408
-
409
- def process_video(video_file, language="Auto Detect", translate_to=None, add_tts=False, add_subtitles=False):
410
-
411
-     """Process a video file with full options."""
412
-
413
-     # Extract audio from the video
414
-
415
-     audio_file = extract_audio_from_video(video_file)
416
-
417
-     
418
-
419
-     # Transcribe audio using Gemini
420
-
421
-     transcription = transcribe_audio_with_gemini(audio_file)
422
-
423
-     
424
-
425
-     # Generate subtitles
426
-
427
-     subtitles = generate_subtitles(transcription)
428
-
429
-     
430
-
431
-     # Save original subtitles
432
-
433
-     original_srt_file = os.path.join(tempfile.gettempdir(), "original_subtitles.srt")
434
-
435
-     with open(original_srt_file, "w", encoding="utf-8") as f:
436
-
437
-         f.write(subtitles)
438
-
439
-     
440
-
441
-     # Translate subtitles if requested
442
-
443
-     translated_srt_file = None
444
-
445
-     if translate_to and translate_to != "None":
446
-
447
-         translated_subtitles = translate_srt(subtitles, translate_to)
448
-
449
-         translated_srt_file = os.path.join(tempfile.gettempdir(), "translated_subtitles.srt")
450
-
451
-         with open(translated_srt_file, "w", encoding="utf-8") as f:
452
-
453
-             f.write(translated_subtitles)
454
-
455
-     
456
-
457
-     # Generate TTS audio if requested
458
-
459
-     tts_audio_file = None
460
-
461
-     if add_tts:
462
-
463
-         target_lang = translate_to if translate_to and translate_to != "None" else language
464
-
465
-         tts_audio_file = generate_tts_audio(subtitles if not translated_srt_file else translated_subtitles, target_lang)
466
-
467
-     
468
-
469
-     # Create video with subtitles if requested
470
-
471
-     output_video_file = None
472
-
473
-     if add_subtitles:
474
-
475
-         srt_to_use = translated_srt_file if translated_srt_file else original_srt_file
476
-
477
-         output_video_file = os.path.join(tempfile.gettempdir(), "output_video.mp4")
478
-
479
-         add_subtitles_to_video(video_file, srt_to_use, output_video_file)
480
-
481
-     
482
-
483
-     # Clean up
484
-
485
-     os.remove(audio_file)
486
-
487
-     
488
-
489
-     return original_srt_file, translated_srt_file, tts_audio_file, output_video_file, "Detected Language: Auto"
490
-
491
-
492
-
493
- # Define the Gradio interface
494
-
495
- with gr.Blocks(title="AutoSubGen Pro - AI Video Subtitle Generator") as demo:
496
-
497
-     # Header
498
-
499
-     with gr.Column():
500
-
501
-         gr.Markdown("# 🎥 AutoSubGen Pro")
502
-
503
-         gr.Markdown("### Advanced AI-Powered Video Subtitle Generator")
504
-
505
-         gr.Markdown("Generate, translate, and add subtitles with text-to-speech audio to your videos.")
506
-
507
-     
508
-
509
-     # Main content
510
-
511
-     with gr.Tab("Generate Subtitles"):
512
-
513
-         gr.Markdown("### Upload a video file to process")
514
-
515
-         with gr.Row():
516
-
517
-             video_input = gr.Video(label="Upload Video File", scale=2)
518
-
519
-             with gr.Column():
520
-
521
-                 language_dropdown = gr.Dropdown(
522
-
523
-                     choices=SUPPORTED_LANGUAGES,
524
-
525
-                     label="Source Language",
526
-
527
-                     value="Auto Detect",
528
-
529
-                 )
530
-
531
-                 translate_to_dropdown = gr.Dropdown(
532
-
533
-                     choices=["None"] + SUPPORTED_LANGUAGES[1:],
534
-
535
-                     label="Translate To",
536
-
537
-                     value="None",
538
-
539
-                 )
540
-
541
-                 tts_checkbox = gr.Checkbox(label="Generate Text-to-Speech Audio")
542
-
543
-                 subtitles_checkbox = gr.Checkbox(label="Add Subtitles to Video")
544
-
545
-         
546
-
547
-         generate_button = gr.Button("Process Video", variant="primary")
548
-
549
-         
550
-
551
-         with gr.Row():
552
-
553
-             with gr.Column():
554
-
555
-                 original_subtitle_output = gr.File(label="Original Subtitles (SRT)")
556
-
557
-                 translated_subtitle_output = gr.File(label="Translated Subtitles (SRT)")
558
-
559
-             with gr.Column():
560
-
561
-                 tts_audio_output = gr.Audio(label="Text-to-Speech Audio", visible=False)
562
-
563
-                 video_output = gr.Video(label="Video with Subtitles", visible=False)
564
-
565
-         
566
-
567
-         detected_language_output = gr.Textbox(label="Detected Language")
568
-
569
-         
570
-
571
-         # Show/hide outputs based on checkboxes
572
-
573
-         def toggle_outputs(tts, subs):
574
-
575
-             return [
576
-
577
-                 gr.Audio(visible=tts),
578
-
579
-                 gr.Video(visible=subs)
580
-
581
-             ]
582
-
583
-         
584
-
585
-         tts_checkbox.change(
586
-
587
-             fn=lambda x: gr.Audio(visible=x),
588
-
589
-             inputs=tts_checkbox,
590
-
591
-             outputs=tts_audio_output
592
-
593
-         )
594
-
595
-         
596
-
597
-         subtitles_checkbox.change(
598
-
599
-             fn=lambda x: gr.Video(visible=x),
600
-
601
-             inputs=subtitles_checkbox,
602
-
603
-             outputs=video_output
604
-
605
-         )
606
-
607
-     
608
-
609
-     # Link button to function
610
-
611
-     generate_button.click(
612
-
613
-         process_video,
614
-
615
-         inputs=[video_input, language_dropdown, translate_to_dropdown, tts_checkbox, subtitles_checkbox],
616
-
617
-         outputs=[original_subtitle_output, translated_subtitle_output, tts_audio_output, video_output, detected_language_output]
618
-
619
-     )
620
-
621
-
622
-
623
- # Launch the interface
624
-
625
- demo.launch(share=True)
 
1
  import os
 
2
  import google.generativeai as genai
3
+ from moviepy.editor import VideoFileClip, AudioFileClip, CompositeVideoClip, TextClip
 
 
 
 
 
 
4
  from moviepy.video.tools.subtitles import SubtitlesClip
 
5
  import tempfile
 
6
  import logging
 
 
 
 
 
7
  import srt
8
+ import datetime
9
+ from gtts import gTTS
10
+ from flask import Flask, request, render_template, send_from_directory, url_for, flash, session
11
+ from werkzeug.utils import secure_filename
12
+ import uuid
13
 
14
+ # --- Configuration ---
15
 
16
  # Suppress moviepy logs
 
17
  logging.getLogger("moviepy").setLevel(logging.ERROR)
18
 
19
+ # Configure Gemini API
20
+ # IMPORTANT: Set your GEMINI_API_KEY as an environment variable
21
+ # For example, in your terminal: export GEMINI_API_KEY="YOUR_API_KEY"
22
+ try:
23
+ genai.configure(api_key=os.environ["GEMINI_API_KEY"])
24
+ except KeyError:
25
+ raise Exception("GEMINI_API_KEY environment variable not set. Please set it before running the app.")
26
 
27
 
28
+ # --- Flask App Initialization ---
29
+ app = Flask(__name__)
30
+ app.config['SECRET_KEY'] = os.urandom(24)
31
+ app.config['UPLOAD_FOLDER'] = os.path.join(os.getcwd(), 'temp_uploads')
32
 
33
+ # Ensure the upload folder exists
34
+ os.makedirs(app.config['UPLOAD_FOLDER'], exist_ok=True)
35
 
36
 
37
+ # --- Model and Language Configuration ---
38
 
39
  # Create the Gemini model
 
40
  generation_config = {
41
+ "temperature": 0.7,
42
+ "top_p": 0.9,
43
+ "top_k": 40,
44
+ "max_output_tokens": 8192,
45
+ "response_mime_type": "text/plain",
 
 
 
 
 
 
46
  }
47
 
 
 
48
  model = genai.GenerativeModel(
49
+ model_name="gemini-1.5-pro-latest", # Using a stable and capable model
50
+ generation_config=generation_config,
 
 
 
51
  )
52
 
 
 
53
  # List of all supported languages
 
54
  SUPPORTED_LANGUAGES = [
55
+ "Auto Detect", "English", "Chinese", "German", "Spanish", "Russian", "Korean",
56
+ "French", "Japanese", "Portuguese", "Turkish", "Polish", "Catalan", "Dutch",
57
+ "Arabic", "Swedish", "Italian", "Indonesian", "Hindi", "Finnish", "Vietnamese",
58
+ "Hebrew", "Ukrainian", "Greek", "Malay", "Czech", "Romanian", "Danish",
59
+ "Hungarian", "Tamil", "Norwegian", "Thai", "Urdu", "Croatian", "Bulgarian",
60
+ "Lithuanian", "Latin", "Maori", "Malayalam", "Welsh", "Slovak", "Telugu",
61
+ "Persian", "Latvian", "Bengali", "Serbian", "Azerbaijani", "Slovenian",
62
+ "Kannada", "Estonian", "Macedonian", "Breton", "Basque", "Icelandic",
63
+ "Armenian", "Nepali", "Mongolian", "Bosnian", "Kazakh", "Albanian",
64
+ "Swahili", "Galician", "Marathi", "Punjabi", "Sinhala", "Khmer", "Shona",
65
+ "Yoruba", "Somali", "Afrikaans", "Occitan", "Georgian", "Belarusian",
66
+ "Tajik", "Sindhi", "Gujarati", "Amharic", "Yiddish", "Lao", "Uzbek",
67
+ "Faroese", "Haitian Creole", "Pashto", "Turkmen", "Nynorsk", "Maltese",
68
+ "Sanskrit", "Luxembourgish", "Burmese", "Tibetan", "Tagalog", "Malagasy",
69
+ "Assamese", "Tatar", "Hawaiian", "Lingala", "Hausa", "Bashkir", "Javanese",
70
+ "Sundanese"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
71
  ]
72
 
 
 
73
  # Language code mapping for gTTS
 
74
  LANGUAGE_CODES = {
75
+ "English": "en", "Chinese": "zh", "German": "de", "Spanish": "es",
76
+ "Russian": "ru", "Korean": "ko", "French": "fr", "Japanese": "ja",
77
+ "Portuguese": "pt", "Turkish": "tr", "Polish": "pl", "Catalan": "ca",
78
+ "Dutch": "nl", "Arabic": "ar", "Swedish": "sv", "Italian": "it",
79
+ "Indonesian": "id", "Hindi": "hi", "Finnish": "fi", "Vietnamese": "vi",
80
+ "Hebrew": "he", "Ukrainian": "uk", "Greek": "el", "Malay": "ms",
81
+ "Czech": "cs", "Romanian": "ro", "Danish": "da", "Hungarian": "hu",
82
+ "Tamil": "ta", "Norwegian": "no", "Thai": "th", "Urdu": "ur",
83
+ "Croatian": "hr", "Bulgarian": "bg", "Lithuanian": "lt", "Latin": "la",
84
+ "Maori": "mi", "Malayalam": "ml", "Welsh": "cy", "Slovak": "sk",
85
+ "Telugu": "te", "Persian": "fa", "Latvian": "lv", "Bengali": "bn",
86
+ "Serbian": "sr", "Azerbaijani": "az", "Slovenian": "sl", "Kannada": "kn",
87
+ "Estonian": "et", "Macedonian": "mk", "Breton": "br", "Basque": "eu",
88
+ "Icelandic": "is", "Armenian": "hy", "Nepali": "ne", "Mongolian": "mn",
89
+ "Bosnian": "bs", "Kazakh": "kk", "Albanian": "sq", "Swahili": "sw",
90
+ "Galician": "gl", "Marathi": "mr", "Punjabi": "pa", "Sinhala": "si",
91
+ "Khmer": "km", "Shona": "sn", "Yoruba": "yo", "Somali": "so",
92
+ "Afrikaans": "af", "Occitan": "oc", "Georgian": "ka", "Belarusian": "be",
93
+ "Tajik": "tg", "Sindhi": "sd", "Gujarati": "gu", "Amharic": "am",
94
+ "Yiddish": "yi", "Lao": "lo", "Uzbek": "uz", "Faroese": "fo",
95
+ "Haitian Creole": "ht", "Pashto": "ps", "Turkmen": "tk", "Nynorsk": "nn",
96
+ "Maltese": "mt", "Sanskrit": "sa", "Luxembourgish": "lb", "Burmese": "my",
97
+ "Tibetan": "bo", "Tagalog": "tl", "Malagasy": "mg", "Assamese": "as",
98
+ "Tatar": "tt", "Hawaiian": "haw", "Lingala": "ln", "Hausa": "ha",
99
+ "Bashkir": "ba", "Javanese": "jv", "Sundanese": "su"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
100
  }
101
 
102
 
103
+ # --- Core Processing Functions ---
104
+
105
+ def time_to_seconds(time_obj):
106
+ """Convert datetime.time object to seconds."""
107
+ return time_obj.hour * 3600 + time_obj.minute * 60 + time_obj.second + time_obj.microsecond / 1e6
108
+
109
+ def extract_audio_from_video(video_path, audio_path):
110
+ """Extract audio from a video file and save it as a WAV file."""
111
+ try:
112
+ video = VideoFileClip(video_path)
113
+ video.audio.write_audiofile(audio_path, fps=16000, logger=None)
114
+ return audio_path
115
+ except Exception as e:
116
+ logging.error(f"Error extracting audio: {e}")
117
+ return None
118
+
119
+ def transcribe_audio_with_gemini(audio_path, source_language):
120
+ """Transcribe audio using Gemini with a prompt for accurate timestamps."""
121
+ try:
122
+ audio_file = genai.upload_file(path=audio_path)
123
+ language_prompt = f"in {source_language}" if source_language != "Auto Detect" else ""
124
+
125
+ prompt = f"""
126
+ You are a professional transcriber. Transcribe this audio accurately and verbatim {language_prompt}.
127
+ Include timestamps for each sentence in the SRT (SubRip) format.
128
+ Example:
129
+ 1
130
+ 00:00:01,234 --> 00:00:05,678
131
+ This is the first sentence.
132
+
133
+ 2
134
+ 00:00:06,123 --> 00:00:09,456
135
+ This is the second sentence.
136
+
137
+ Ensure the timestamps are precise and correspond to the start and end of each spoken sentence.
138
+ Respond ONLY with the transcription in the SRT format. Do not add explanations or any extra text.
139
+ """
140
+
141
+ response = model.generate_content([prompt, audio_file])
142
+ genai.delete_file(audio_file.name) # Clean up the uploaded file
143
+ return response.text.strip()
144
+ except Exception as e:
145
+ logging.error(f"Error during Gemini transcription: {e}")
146
+ return None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
147
 
148
 
149
  def translate_srt(srt_text, target_language):
150
+ """Translate an SRT file using Gemini while preserving timestamps."""
151
+ try:
152
+ prompt = f"""
153
+ Translate the following SRT subtitles into {target_language}.
154
+ Preserve the SRT format perfectly (index numbers, timestamps, and structure).
155
+ Translate only the subtitle text on the lines after the timestamps.
156
+ Do not add any explanations or extra text. Your output must be a valid SRT file.
157
+ Here is the SRT file content:
158
+ {srt_text}
159
+ """
160
+ response = model.generate_content(prompt)
161
+ return response.text.strip()
162
+ except Exception as e:
163
+ logging.error(f"Error during translation: {e}")
164
+ return None
165
+
166
+ def generate_tts_audio(srt_text, language, tts_audio_path):
167
+ """Generate TTS audio from SRT text."""
168
+ try:
169
+ subtitles = list(srt.parse(srt_text))
170
+ all_text = " ".join([sub.content for sub in subtitles])
171
+
172
+ lang_code = LANGUAGE_CODES.get(language, "en")
173
+
174
+ tts = gTTS(text=all_text, lang=lang_code, slow=False)
175
+ tts.save(tts_audio_path)
176
+ return tts_audio_path
177
+ except Exception as e:
178
+ logging.error(f"Error generating TTS audio: {e}")
179
+ return None
180
+
181
+ def add_subtitles_to_video(video_path, srt_text, output_video_path):
182
+ """Add subtitles to video and return the path to the new video."""
183
+ try:
184
+ def generator(txt):
185
+ return TextClip(txt, font='Arial-Bold', fontsize=24, color='white',
186
+ stroke_color='black', stroke_width=1)
187
+
188
+ # MoviePy's SubtitlesClip requires a file path
189
+ with tempfile.NamedTemporaryFile(mode='w', suffix='.srt', delete=False, encoding='utf-8') as temp_srt:
190
+ temp_srt.write(srt_text)
191
+ srt_path = temp_srt.name
192
+
193
+ video = VideoFileClip(video_path)
194
+ subtitles = SubtitlesClip(srt_path, generator)
195
+
196
+ result = CompositeVideoClip([video, subtitles.set_position(('center', 'bottom'))])
197
+
198
+ # Write output with original audio
199
+ result.write_videofile(output_video_path, codec='libx264', audio_codec='aac', threads=4, logger=None)
200
+
201
+ os.remove(srt_path) # Clean up temp srt file
202
+ return output_video_path
203
+ except Exception as e:
204
+ logging.error(f"Error adding subtitles to video: {e}")
205
+ return None
206
+
207
+
208
+ # --- Flask Routes ---
209
+
210
+ @app.route('/')
211
+ def index():
212
+ """Render the main page."""
213
+ session.clear() # Clear any old data
214
+ return render_template('index.html', supported_languages=SUPPORTED_LANGUAGES)
215
+
216
+ @app.route('/process', methods=['POST'])
217
+ def process():
218
+ """Handle the video processing request."""
219
+ if 'video' not in request.files:
220
+ flash('No video file selected. Please upload a video.', 'error')
221
+ return render_template('index.html', supported_languages=SUPPORTED_LANGUAGES)
222
+
223
+ video_file = request.files['video']
224
+ if video_file.filename == '':
225
+ flash('No video file selected. Please upload a video.', 'error')
226
+ return render_template('index.html', supported_languages=SUPPORTED_LANGUAGES)
227
+
228
+ # --- Get form options ---
229
+ source_language = request.form.get('source_language', 'Auto Detect')
230
+ translate_to = request.form.get('translate_to', 'None')
231
+ add_tts = 'add_tts' in request.form
232
+ add_subtitles = 'add_subtitles' in request.form
233
+
234
+ # --- Setup a unique session directory for this request ---
235
+ session_id = str(uuid.uuid4())
236
+ session['session_id'] = session_id
237
+ session_dir = os.path.join(app.config['UPLOAD_FOLDER'], session_id)
238
+ os.makedirs(session_dir, exist_ok=True)
239
+
240
+ filename = secure_filename(video_file.filename)
241
+ video_path = os.path.join(session_dir, filename)
242
+ video_file.save(video_path)
243
+
244
+ results = {}
245
+
246
+ # 1. Extract Audio
247
+ audio_path = os.path.join(session_dir, "extracted_audio.wav")
248
+ if not extract_audio_from_video(video_path, audio_path):
249
+ flash('Failed to extract audio from the video.', 'error')
250
+ return render_template('index.html', supported_languages=SUPPORTED_LANGUAGES)
251
+
252
+ # 2. Transcribe Audio
253
+ original_srt_text = transcribe_audio_with_gemini(audio_path, source_language)
254
+ if not original_srt_text:
255
+ flash('Failed to transcribe the audio. The API call might have failed.', 'error')
256
+ return render_template('index.html', supported_languages=SUPPORTED_LANGUAGES)
257
+
258
+ original_srt_path = os.path.join(session_dir, "original_subtitles.srt")
259
+ with open(original_srt_path, "w", encoding="utf-8") as f:
260
+ f.write(original_srt_text)
261
+ results['original_srt_file'] = "original_subtitles.srt"
262
+
263
+ # Keep track of the final SRT to use for TTS and video burn-in
264
+ final_srt_text = original_srt_text
265
+
266
+ # 3. Translate Subtitles (if requested)
267
+ if translate_to and translate_to != "None":
268
+ translated_srt_text = translate_srt(original_srt_text, translate_to)
269
+ if translated_srt_text:
270
+ translated_srt_path = os.path.join(session_dir, "translated_subtitles.srt")
271
+ with open(translated_srt_path, "w", encoding="utf-8") as f:
272
+ f.write(translated_srt_text)
273
+ results['translated_srt_file'] = "translated_subtitles.srt"
274
+ final_srt_text = translated_srt_text # Use translated text for next steps
275
+ else:
276
+ flash(f'Failed to translate subtitles to {translate_to}.', 'warning')
277
+
278
+ # 4. Generate TTS Audio (if requested)
279
+ if add_tts:
280
+ tts_lang = translate_to if translate_to and translate_to != "None" else source_language
281
+ # If source was auto-detect, we can't reliably guess the TTS language. Default to English.
282
+ if tts_lang == 'Auto Detect':
283
+ flash('TTS language cannot be "Auto Detect". Defaulting to English. For better results, please specify the source language.', 'warning')
284
+ tts_lang = 'English'
285
+
286
+ tts_audio_path = os.path.join(session_dir, "tts_audio.mp3")
287
+ if generate_tts_audio(final_srt_text, tts_lang, tts_audio_path):
288
+ results['tts_audio_file'] = "tts_audio.mp3"
289
+ else:
290
+ flash('Failed to generate Text-to-Speech audio.', 'warning')
291
+
292
+ # 5. Add Subtitles to Video (if requested)
293
+ if add_subtitles:
294
+ output_video_path = os.path.join(session_dir, "output_video.mp4")
295
+ if add_subtitles_to_video(video_path, final_srt_text, output_video_path):
296
+ results['output_video_file'] = "output_video.mp4"
297
+ else:
298
+ flash('Failed to add subtitles to the video.', 'warning')
299
+
300
+ # Clean up original extracted audio
301
+ os.remove(audio_path)
302
+
303
+ return render_template('index.html',
304
+ supported_languages=SUPPORTED_LANGUAGES,
305
+ results=results,
306
+ session_id=session_id)
307
+
308
+ @app.route('/download/<session_id>/<path:filename>')
309
+ def download_file(session_id, filename):
310
+ """Serve files from the session directory for download."""
311
+ directory = os.path.join(app.config['UPLOAD_FOLDER'], session_id)
312
+ return send_from_directory(directory, filename, as_attachment=True)
313
+
314
+
315
+ # --- Run the App ---
316
+ if __name__ == '__main__':
317
+ app.run(debug=True)