Athspi commited on
Commit
a8524a9
·
verified ·
1 Parent(s): a1976c9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +589 -281
app.py CHANGED
@@ -1,317 +1,625 @@
1
  import os
 
2
  import google.generativeai as genai
3
- from moviepy.editor import VideoFileClip, AudioFileClip, CompositeVideoClip, TextClip
 
 
 
 
 
 
4
  from moviepy.video.tools.subtitles import SubtitlesClip
 
5
  import tempfile
 
6
  import logging
7
- import srt
8
- import datetime
 
9
  from gtts import gTTS
10
- from flask import Flask, request, render_template, send_from_directory, url_for, flash, session
11
- from werkzeug.utils import secure_filename
12
- import uuid
13
 
14
- # --- Configuration ---
 
 
15
 
16
  # Suppress moviepy logs
 
17
  logging.getLogger("moviepy").setLevel(logging.ERROR)
18
 
19
- # Configure Gemini API
20
- # IMPORTANT: Set your GEMINI_API_KEY as an environment variable
21
- # For example, in your terminal: export GEMINI_API_KEY="YOUR_API_KEY"
22
- try:
23
- genai.configure(api_key=os.environ["GEMINI_API_KEY"])
24
- except KeyError:
25
- raise Exception("GEMINI_API_KEY environment variable not set. Please set it before running the app.")
26
 
27
 
28
- # --- Flask App Initialization ---
29
- app = Flask(__name__)
30
- app.config['SECRET_KEY'] = os.urandom(24)
31
- app.config['UPLOAD_FOLDER'] = os.path.join(os.getcwd(), 'temp_uploads')
32
 
33
- # Ensure the upload folder exists
34
- os.makedirs(app.config['UPLOAD_FOLDER'], exist_ok=True)
35
 
36
 
37
- # --- Model and Language Configuration ---
38
 
39
  # Create the Gemini model
 
40
  generation_config = {
41
- "temperature": 0.7,
42
- "top_p": 0.9,
43
- "top_k": 40,
44
- "max_output_tokens": 8192,
45
- "response_mime_type": "text/plain",
 
 
 
 
 
 
46
  }
47
 
 
 
48
  model = genai.GenerativeModel(
49
- model_name="gemini-1.5-pro-latest", # Using a stable and capable model
50
- generation_config=generation_config,
 
 
 
51
  )
52
 
 
 
53
  # List of all supported languages
 
54
  SUPPORTED_LANGUAGES = [
55
- "Auto Detect", "English", "Chinese", "German", "Spanish", "Russian", "Korean",
56
- "French", "Japanese", "Portuguese", "Turkish", "Polish", "Catalan", "Dutch",
57
- "Arabic", "Swedish", "Italian", "Indonesian", "Hindi", "Finnish", "Vietnamese",
58
- "Hebrew", "Ukrainian", "Greek", "Malay", "Czech", "Romanian", "Danish",
59
- "Hungarian", "Tamil", "Norwegian", "Thai", "Urdu", "Croatian", "Bulgarian",
60
- "Lithuanian", "Latin", "Maori", "Malayalam", "Welsh", "Slovak", "Telugu",
61
- "Persian", "Latvian", "Bengali", "Serbian", "Azerbaijani", "Slovenian",
62
- "Kannada", "Estonian", "Macedonian", "Breton", "Basque", "Icelandic",
63
- "Armenian", "Nepali", "Mongolian", "Bosnian", "Kazakh", "Albanian",
64
- "Swahili", "Galician", "Marathi", "Punjabi", "Sinhala", "Khmer", "Shona",
65
- "Yoruba", "Somali", "Afrikaans", "Occitan", "Georgian", "Belarusian",
66
- "Tajik", "Sindhi", "Gujarati", "Amharic", "Yiddish", "Lao", "Uzbek",
67
- "Faroese", "Haitian Creole", "Pashto", "Turkmen", "Nynorsk", "Maltese",
68
- "Sanskrit", "Luxembourgish", "Burmese", "Tibetan", "Tagalog", "Malagasy",
69
- "Assamese", "Tatar", "Hawaiian", "Lingala", "Hausa", "Bashkir", "Javanese",
70
- "Sundanese"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
71
  ]
72
 
 
 
73
  # Language code mapping for gTTS
 
74
  LANGUAGE_CODES = {
75
- "English": "en", "Chinese": "zh", "German": "de", "Spanish": "es",
76
- "Russian": "ru", "Korean": "ko", "French": "fr", "Japanese": "ja",
77
- "Portuguese": "pt", "Turkish": "tr", "Polish": "pl", "Catalan": "ca",
78
- "Dutch": "nl", "Arabic": "ar", "Swedish": "sv", "Italian": "it",
79
- "Indonesian": "id", "Hindi": "hi", "Finnish": "fi", "Vietnamese": "vi",
80
- "Hebrew": "he", "Ukrainian": "uk", "Greek": "el", "Malay": "ms",
81
- "Czech": "cs", "Romanian": "ro", "Danish": "da", "Hungarian": "hu",
82
- "Tamil": "ta", "Norwegian": "no", "Thai": "th", "Urdu": "ur",
83
- "Croatian": "hr", "Bulgarian": "bg", "Lithuanian": "lt", "Latin": "la",
84
- "Maori": "mi", "Malayalam": "ml", "Welsh": "cy", "Slovak": "sk",
85
- "Telugu": "te", "Persian": "fa", "Latvian": "lv", "Bengali": "bn",
86
- "Serbian": "sr", "Azerbaijani": "az", "Slovenian": "sl", "Kannada": "kn",
87
- "Estonian": "et", "Macedonian": "mk", "Breton": "br", "Basque": "eu",
88
- "Icelandic": "is", "Armenian": "hy", "Nepali": "ne", "Mongolian": "mn",
89
- "Bosnian": "bs", "Kazakh": "kk", "Albanian": "sq", "Swahili": "sw",
90
- "Galician": "gl", "Marathi": "mr", "Punjabi": "pa", "Sinhala": "si",
91
- "Khmer": "km", "Shona": "sn", "Yoruba": "yo", "Somali": "so",
92
- "Afrikaans": "af", "Occitan": "oc", "Georgian": "ka", "Belarusian": "be",
93
- "Tajik": "tg", "Sindhi": "sd", "Gujarati": "gu", "Amharic": "am",
94
- "Yiddish": "yi", "Lao": "lo", "Uzbek": "uz", "Faroese": "fo",
95
- "Haitian Creole": "ht", "Pashto": "ps", "Turkmen": "tk", "Nynorsk": "nn",
96
- "Maltese": "mt", "Sanskrit": "sa", "Luxembourgish": "lb", "Burmese": "my",
97
- "Tibetan": "bo", "Tagalog": "tl", "Malagasy": "mg", "Assamese": "as",
98
- "Tatar": "tt", "Hawaiian": "haw", "Lingala": "ln", "Hausa": "ha",
99
- "Bashkir": "ba", "Javanese": "jv", "Sundanese": "su"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
100
  }
101
 
102
 
103
- # --- Core Processing Functions ---
104
-
105
- def time_to_seconds(time_obj):
106
- """Convert datetime.time object to seconds."""
107
- return time_obj.hour * 3600 + time_obj.minute * 60 + time_obj.second + time_obj.microsecond / 1e6
108
-
109
- def extract_audio_from_video(video_path, audio_path):
110
- """Extract audio from a video file and save it as a WAV file."""
111
- try:
112
- video = VideoFileClip(video_path)
113
- video.audio.write_audiofile(audio_path, fps=16000, logger=None)
114
- return audio_path
115
- except Exception as e:
116
- logging.error(f"Error extracting audio: {e}")
117
- return None
118
-
119
- def transcribe_audio_with_gemini(audio_path, source_language):
120
- """Transcribe audio using Gemini with a prompt for accurate timestamps."""
121
- try:
122
- audio_file = genai.upload_file(path=audio_path)
123
- language_prompt = f"in {source_language}" if source_language != "Auto Detect" else ""
124
-
125
- prompt = f"""
126
- You are a professional transcriber. Transcribe this audio accurately and verbatim {language_prompt}.
127
- Include timestamps for each sentence in the SRT (SubRip) format.
128
- Example:
129
- 1
130
- 00:00:01,234 --> 00:00:05,678
131
- This is the first sentence.
132
-
133
- 2
134
- 00:00:06,123 --> 00:00:09,456
135
- This is the second sentence.
136
-
137
- Ensure the timestamps are precise and correspond to the start and end of each spoken sentence.
138
- Respond ONLY with the transcription in the SRT format. Do not add explanations or any extra text.
139
- """
140
-
141
- response = model.generate_content([prompt, audio_file])
142
- genai.delete_file(audio_file.name) # Clean up the uploaded file
143
- return response.text.strip()
144
- except Exception as e:
145
- logging.error(f"Error during Gemini transcription: {e}")
146
- return None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
147
 
148
 
149
  def translate_srt(srt_text, target_language):
150
- """Translate an SRT file using Gemini while preserving timestamps."""
151
- try:
152
- prompt = f"""
153
- Translate the following SRT subtitles into {target_language}.
154
- Preserve the SRT format perfectly (index numbers, timestamps, and structure).
155
- Translate only the subtitle text on the lines after the timestamps.
156
- Do not add any explanations or extra text. Your output must be a valid SRT file.
157
- Here is the SRT file content:
158
- {srt_text}
159
- """
160
- response = model.generate_content(prompt)
161
- return response.text.strip()
162
- except Exception as e:
163
- logging.error(f"Error during translation: {e}")
164
- return None
165
-
166
- def generate_tts_audio(srt_text, language, tts_audio_path):
167
- """Generate TTS audio from SRT text."""
168
- try:
169
- subtitles = list(srt.parse(srt_text))
170
- all_text = " ".join([sub.content for sub in subtitles])
171
-
172
- lang_code = LANGUAGE_CODES.get(language, "en")
173
-
174
- tts = gTTS(text=all_text, lang=lang_code, slow=False)
175
- tts.save(tts_audio_path)
176
- return tts_audio_path
177
- except Exception as e:
178
- logging.error(f"Error generating TTS audio: {e}")
179
- return None
180
-
181
- def add_subtitles_to_video(video_path, srt_text, output_video_path):
182
- """Add subtitles to video and return the path to the new video."""
183
- try:
184
- def generator(txt):
185
- return TextClip(txt, font='Arial-Bold', fontsize=24, color='white',
186
- stroke_color='black', stroke_width=1)
187
-
188
- # MoviePy's SubtitlesClip requires a file path
189
- with tempfile.NamedTemporaryFile(mode='w', suffix='.srt', delete=False, encoding='utf-8') as temp_srt:
190
- temp_srt.write(srt_text)
191
- srt_path = temp_srt.name
192
-
193
- video = VideoFileClip(video_path)
194
- subtitles = SubtitlesClip(srt_path, generator)
195
-
196
- result = CompositeVideoClip([video, subtitles.set_position(('center', 'bottom'))])
197
-
198
- # Write output with original audio
199
- result.write_videofile(output_video_path, codec='libx264', audio_codec='aac', threads=4, logger=None)
200
-
201
- os.remove(srt_path) # Clean up temp srt file
202
- return output_video_path
203
- except Exception as e:
204
- logging.error(f"Error adding subtitles to video: {e}")
205
- return None
206
-
207
-
208
- # --- Flask Routes ---
209
-
210
- @app.route('/')
211
- def index():
212
- """Render the main page."""
213
- session.clear() # Clear any old data
214
- return render_template('index.html', supported_languages=SUPPORTED_LANGUAGES)
215
-
216
- @app.route('/process', methods=['POST'])
217
- def process():
218
- """Handle the video processing request."""
219
- if 'video' not in request.files:
220
- flash('No video file selected. Please upload a video.', 'error')
221
- return render_template('index.html', supported_languages=SUPPORTED_LANGUAGES)
222
-
223
- video_file = request.files['video']
224
- if video_file.filename == '':
225
- flash('No video file selected. Please upload a video.', 'error')
226
- return render_template('index.html', supported_languages=SUPPORTED_LANGUAGES)
227
-
228
- # --- Get form options ---
229
- source_language = request.form.get('source_language', 'Auto Detect')
230
- translate_to = request.form.get('translate_to', 'None')
231
- add_tts = 'add_tts' in request.form
232
- add_subtitles = 'add_subtitles' in request.form
233
-
234
- # --- Setup a unique session directory for this request ---
235
- session_id = str(uuid.uuid4())
236
- session['session_id'] = session_id
237
- session_dir = os.path.join(app.config['UPLOAD_FOLDER'], session_id)
238
- os.makedirs(session_dir, exist_ok=True)
239
-
240
- filename = secure_filename(video_file.filename)
241
- video_path = os.path.join(session_dir, filename)
242
- video_file.save(video_path)
243
-
244
- results = {}
245
-
246
- # 1. Extract Audio
247
- audio_path = os.path.join(session_dir, "extracted_audio.wav")
248
- if not extract_audio_from_video(video_path, audio_path):
249
- flash('Failed to extract audio from the video.', 'error')
250
- return render_template('index.html', supported_languages=SUPPORTED_LANGUAGES)
251
-
252
- # 2. Transcribe Audio
253
- original_srt_text = transcribe_audio_with_gemini(audio_path, source_language)
254
- if not original_srt_text:
255
- flash('Failed to transcribe the audio. The API call might have failed.', 'error')
256
- return render_template('index.html', supported_languages=SUPPORTED_LANGUAGES)
257
-
258
- original_srt_path = os.path.join(session_dir, "original_subtitles.srt")
259
- with open(original_srt_path, "w", encoding="utf-8") as f:
260
- f.write(original_srt_text)
261
- results['original_srt_file'] = "original_subtitles.srt"
262
-
263
- # Keep track of the final SRT to use for TTS and video burn-in
264
- final_srt_text = original_srt_text
265
-
266
- # 3. Translate Subtitles (if requested)
267
- if translate_to and translate_to != "None":
268
- translated_srt_text = translate_srt(original_srt_text, translate_to)
269
- if translated_srt_text:
270
- translated_srt_path = os.path.join(session_dir, "translated_subtitles.srt")
271
- with open(translated_srt_path, "w", encoding="utf-8") as f:
272
- f.write(translated_srt_text)
273
- results['translated_srt_file'] = "translated_subtitles.srt"
274
- final_srt_text = translated_srt_text # Use translated text for next steps
275
- else:
276
- flash(f'Failed to translate subtitles to {translate_to}.', 'warning')
277
-
278
- # 4. Generate TTS Audio (if requested)
279
- if add_tts:
280
- tts_lang = translate_to if translate_to and translate_to != "None" else source_language
281
- # If source was auto-detect, we can't reliably guess the TTS language. Default to English.
282
- if tts_lang == 'Auto Detect':
283
- flash('TTS language cannot be "Auto Detect". Defaulting to English. For better results, please specify the source language.', 'warning')
284
- tts_lang = 'English'
285
-
286
- tts_audio_path = os.path.join(session_dir, "tts_audio.mp3")
287
- if generate_tts_audio(final_srt_text, tts_lang, tts_audio_path):
288
- results['tts_audio_file'] = "tts_audio.mp3"
289
- else:
290
- flash('Failed to generate Text-to-Speech audio.', 'warning')
291
-
292
- # 5. Add Subtitles to Video (if requested)
293
- if add_subtitles:
294
- output_video_path = os.path.join(session_dir, "output_video.mp4")
295
- if add_subtitles_to_video(video_path, final_srt_text, output_video_path):
296
- results['output_video_file'] = "output_video.mp4"
297
- else:
298
- flash('Failed to add subtitles to the video.', 'warning')
299
-
300
- # Clean up original extracted audio
301
- os.remove(audio_path)
302
-
303
- return render_template('index.html',
304
- supported_languages=SUPPORTED_LANGUAGES,
305
- results=results,
306
- session_id=session_id)
307
-
308
- @app.route('/download/<session_id>/<path:filename>')
309
- def download_file(session_id, filename):
310
- """Serve files from the session directory for download."""
311
- directory = os.path.join(app.config['UPLOAD_FOLDER'], session_id)
312
- return send_from_directory(directory, filename, as_attachment=True)
313
-
314
-
315
- # --- Run the App ---
316
- if __name__ == '__main__':
317
- app.run(host="0.0.0.0", port=7860)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import os
2
+
3
  import google.generativeai as genai
4
+
5
+ from moviepy.video.io.VideoFileClip import VideoFileClip
6
+
7
+ from moviepy.audio.io.AudioFileClip import AudioFileClip
8
+
9
+ from moviepy.video.compositing.CompositeVideoClip import CompositeVideoClip
10
+
11
  from moviepy.video.tools.subtitles import SubtitlesClip
12
+
13
  import tempfile
14
+
15
  import logging
16
+
17
+ import gradio as gr
18
+
19
  from gtts import gTTS
 
 
 
20
 
21
+ import srt
22
+
23
+
24
 
25
  # Suppress moviepy logs
26
+
27
  logging.getLogger("moviepy").setLevel(logging.ERROR)
28
 
 
 
 
 
 
 
 
29
 
30
 
31
+ # Configure Gemini API
 
 
 
32
 
33
+ genai.configure(api_key=os.environ["GEMINI_API_KEY"])
 
34
 
35
 
 
36
 
37
  # Create the Gemini model
38
+
39
  generation_config = {
40
+
41
+     "temperature": 0.7,
42
+
43
+     "top_p": 0.9,
44
+
45
+     "top_k": 40,
46
+
47
+     "max_output_tokens": 8192,
48
+
49
+     "response_mime_type": "text/plain",
50
+
51
  }
52
 
53
+
54
+
55
  model = genai.GenerativeModel(
56
+
57
+     model_name="gemini-2.0-pro-exp-02-05",
58
+
59
+     generation_config=generation_config,
60
+
61
  )
62
 
63
+
64
+
65
  # List of all supported languages
66
+
67
  SUPPORTED_LANGUAGES = [
68
+
69
+     "Auto Detect", "English", "Chinese", "German", "Spanish", "Russian", "Korean",
70
+
71
+     "French", "Japanese", "Portuguese", "Turkish", "Polish", "Catalan", "Dutch",
72
+
73
+     "Arabic", "Swedish", "Italian", "Indonesian", "Hindi", "Finnish", "Vietnamese",
74
+
75
+     "Hebrew", "Ukrainian", "Greek", "Malay", "Czech", "Romanian", "Danish",
76
+
77
+     "Hungarian", "Tamil", "Norwegian", "Thai", "Urdu", "Croatian", "Bulgarian",
78
+
79
+     "Lithuanian", "Latin", "Maori", "Malayalam", "Welsh", "Slovak", "Telugu",
80
+
81
+     "Persian", "Latvian", "Bengali", "Serbian", "Azerbaijani", "Slovenian",
82
+
83
+     "Kannada", "Estonian", "Macedonian", "Breton", "Basque", "Icelandic",
84
+
85
+     "Armenian", "Nepali", "Mongolian", "Bosnian", "Kazakh", "Albanian",
86
+
87
+     "Swahili", "Galician", "Marathi", "Punjabi", "Sinhala", "Khmer", "Shona",
88
+
89
+     "Yoruba", "Somali", "Afrikaans", "Occitan", "Georgian", "Belarusian",
90
+
91
+     "Tajik", "Sindhi", "Gujarati", "Amharic", "Yiddish", "Lao", "Uzbek",
92
+
93
+     "Faroese", "Haitian Creole", "Pashto", "Turkmen", "Nynorsk", "Maltese",
94
+
95
+     "Sanskrit", "Luxembourgish", "Burmese", "Tibetan", "Tagalog", "Malagasy",
96
+
97
+     "Assamese", "Tatar", "Hawaiian", "Lingala", "Hausa", "Bashkir", "Javanese",
98
+
99
+     "Sundanese"
100
+
101
  ]
102
 
103
+
104
+
105
  # Language code mapping for gTTS
106
+
107
  LANGUAGE_CODES = {
108
+
109
+     "English": "en", "Chinese": "zh", "German": "de", "Spanish": "es", 
110
+
111
+     "Russian": "ru", "Korean": "ko", "French": "fr", "Japanese": "ja",
112
+
113
+     "Portuguese": "pt", "Turkish": "tr", "Polish": "pl", "Catalan": "ca",
114
+
115
+     "Dutch": "nl", "Arabic": "ar", "Swedish": "sv", "Italian": "it",
116
+
117
+     "Indonesian": "id", "Hindi": "hi", "Finnish": "fi", "Vietnamese": "vi",
118
+
119
+     "Hebrew": "he", "Ukrainian": "uk", "Greek": "el", "Malay": "ms",
120
+
121
+     "Czech": "cs", "Romanian": "ro", "Danish": "da", "Hungarian": "hu",
122
+
123
+     "Tamil": "ta", "Norwegian": "no", "Thai": "th", "Urdu": "ur",
124
+
125
+     "Croatian": "hr", "Bulgarian": "bg", "Lithuanian": "lt", "Latin": "la",
126
+
127
+     "Maori": "mi", "Malayalam": "ml", "Welsh": "cy", "Slovak": "sk",
128
+
129
+     "Telugu": "te", "Persian": "fa", "Latvian": "lv", "Bengali": "bn",
130
+
131
+     "Serbian": "sr", "Azerbaijani": "az", "Slovenian": "sl", "Kannada": "kn",
132
+
133
+     "Estonian": "et", "Macedonian": "mk", "Breton": "br", "Basque": "eu",
134
+
135
+     "Icelandic": "is", "Armenian": "hy", "Nepali": "ne", "Mongolian": "mn",
136
+
137
+     "Bosnian": "bs", "Kazakh": "kk", "Albanian": "sq", "Swahili": "sw",
138
+
139
+     "Galician": "gl", "Marathi": "mr", "Punjabi": "pa", "Sinhala": "si",
140
+
141
+     "Khmer": "km", "Shona": "sn", "Yoruba": "yo", "Somali": "so",
142
+
143
+     "Afrikaans": "af", "Occitan": "oc", "Georgian": "ka", "Belarusian": "be",
144
+
145
+     "Tajik": "tg", "Sindhi": "sd", "Gujarati": "gu", "Amharic": "am",
146
+
147
+     "Yiddish": "yi", "Lao": "lo", "Uzbek": "uz", "Faroese": "fo",
148
+
149
+     "Haitian Creole": "ht", "Pashto": "ps", "Turkmen": "tk", "Nynorsk": "nn",
150
+
151
+     "Maltese": "mt", "Sanskrit": "sa", "Luxembourgish": "lb", "Burmese": "my",
152
+
153
+     "Tibetan": "bo", "Tagalog": "tl", "Malagasy": "mg", "Assamese": "as",
154
+
155
+     "Tatar": "tt", "Hawaiian": "haw", "Lingala": "ln", "Hausa": "ha",
156
+
157
+     "Bashkir": "ba", "Javanese": "jv", "Sundanese": "su"
158
+
159
  }
160
 
161
 
162
+
163
+ def extract_audio_from_video(video_file):
164
+
165
+     """Extract audio from a video file and save it as a WAV file."""
166
+
167
+     video = VideoFileClip(video_file)
168
+
169
+     audio_file = os.path.join(tempfile.gettempdir(), "extracted_audio.wav")
170
+
171
+     video.audio.write_audiofile(audio_file, fps=16000, logger=None)
172
+
173
+     return audio_file
174
+
175
+
176
+
177
+ def transcribe_audio_with_gemini(audio_file):
178
+
179
+     """Transcribe audio using Gemini with a magic prompt for accurate timestamps."""
180
+
181
+     with open(audio_file, "rb") as f:
182
+
183
+         audio_data = f.read()
184
+
185
+
186
+
187
+     audio_blob = {
188
+
189
+         'mime_type': 'audio/wav',
190
+
191
+         'data': audio_data
192
+
193
+     }
194
+
195
+
196
+
197
+     prompt = """
198
+
199
+     You are a professional transcriber. Transcribe this audio accurately and verbatim in the original language.
200
+
201
+     Include timestamps for each sentence in the following format:
202
+
203
+     [HH:MM:SS] Sentence 1
204
+
205
+     [HH:MM:SS] Sentence 2
206
+
207
+     ...
208
+
209
+     Ensure the timestamps are accurate and correspond to the start of each sentence.
210
+
211
+     Respond only with the transcription and timestamps. Do not add explanations or extra text.
212
+
213
+     """
214
+
215
+
216
+
217
+     convo = model.start_chat()
218
+
219
+     convo.send_message(prompt)
220
+
221
+     response = convo.send_message(audio_blob)
222
+
223
+     return response.text.strip()
224
+
225
+
226
+
227
+ def generate_subtitles(transcription):
228
+
229
+     """Generate SRT subtitles from transcription with timestamps."""
230
+
231
+     lines = transcription.split("\n")
232
+
233
+     srt_subtitles = []
234
+
235
+     
236
+
237
+     for i, line in enumerate(lines, start=1):
238
+
239
+         if not line.strip():
240
+
241
+             continue
242
+
243
+         
244
+
245
+         if line.startswith("["):
246
+
247
+             timestamp = line.split("]")[0] + "]"
248
+
249
+             text = line.split("]")[1].strip()
250
+
251
+         else:
252
+
253
+             timestamp = "[00:00:00]"
254
+
255
+             text = line.strip()
256
+
257
+         
258
+
259
+         start_time = timestamp[1:-1]
260
+
261
+         start_seconds = time_to_seconds(start_time)
262
+
263
+         end_seconds = start_seconds + 5  # Placeholder duration
264
+
265
+         
266
+
267
+         subtitle = srt.Subtitle(
268
+
269
+             index=i,
270
+
271
+             start=datetime.timedelta(seconds=start_seconds),
272
+
273
+             end=datetime.timedelta(seconds=end_seconds),
274
+
275
+             content=text
276
+
277
+         )
278
+
279
+         srt_subtitles.append(subtitle)
280
+
281
+     
282
+
283
+     return srt.compose(srt_subtitles)
284
+
285
+
286
+
287
+ def time_to_seconds(time_str):
288
+
289
+     """Convert HH:MM:SS to seconds."""
290
+
291
+     hh, mm, ss = map(int, time_str.split(":"))
292
+
293
+     return hh * 3600 + mm * 60 + ss
294
+
295
+
296
+
297
+ def seconds_to_time(seconds):
298
+
299
+     """Convert seconds to HH:MM:SS."""
300
+
301
+     hh = seconds // 3600
302
+
303
+     mm = (seconds % 3600) // 60
304
+
305
+     ss = seconds % 60
306
+
307
+     return f"{hh:02}:{mm:02}:{ss:02}"
308
+
309
 
310
 
311
  def translate_srt(srt_text, target_language):
312
+
313
+     """Translate an SRT file while preserving timestamps."""
314
+
315
+     prompt = f"""
316
+
317
+     Translate the following SRT subtitles into {target_language}.
318
+
319
+     Preserve the SRT format (timestamps and structure).
320
+
321
+     Translate only the text after the timestamp.
322
+
323
+     Do not add explanations or extra text.
324
+
325
+     Ensure the translation is accurate and culturally appropriate.
326
+
327
+     Here is the SRT file:
328
+
329
+     {srt_text}
330
+
331
+     """
332
+
333
+
334
+
335
+     response = model.generate_content(prompt)
336
+
337
+     return response.text
338
+
339
+
340
+
341
+ def generate_tts_audio(srt_text, language):
342
+
343
+     """Generate TTS audio from SRT text."""
344
+
345
+     # Extract all text from SRT
346
+
347
+     subtitles = list(srt.parse(srt_text))
348
+
349
+     all_text = " ".join([sub.content for sub in subtitles])
350
+
351
+     
352
+
353
+     # Get language code
354
+
355
+     lang_code = LANGUAGE_CODES.get(language, "en")
356
+
357
+     
358
+
359
+     # Generate TTS
360
+
361
+     tts = gTTS(text=all_text, lang=lang_code, slow=False)
362
+
363
+     audio_file = os.path.join(tempfile.gettempdir(), "tts_audio.mp3")
364
+
365
+     tts.save(audio_file)
366
+
367
+     return audio_file
368
+
369
+
370
+
371
+ def add_subtitles_to_video(video_file, srt_file, output_file):
372
+
373
+     """Add subtitles to video and return the path to the new video."""
374
+
375
+     # Create subtitle clip
376
+
377
+     generator = lambda txt: TextClip(txt, font='Arial', fontsize=24, color='white')
378
+
379
+     subtitles = SubtitlesClip(srt_file, generator)
380
+
381
+     
382
+
383
+     # Load video
384
+
385
+     video = VideoFileClip(video_file)
386
+
387
+     
388
+
389
+     # Composite video with subtitles
390
+
391
+     result = CompositeVideoClip([
392
+
393
+         video,
394
+
395
+         subtitles.set_position(('center', 'bottom'))
396
+
397
+     ])
398
+
399
+     
400
+
401
+     # Write output
402
+
403
+     result.write_videofile(output_file, codec='libx264', audio_codec='aac', threads=4)
404
+
405
+     return output_file
406
+
407
+
408
+
409
+ def process_video(video_file, language="Auto Detect", translate_to=None, add_tts=False, add_subtitles=False):
410
+
411
+     """Process a video file with full options."""
412
+
413
+     # Extract audio from the video
414
+
415
+     audio_file = extract_audio_from_video(video_file)
416
+
417
+     
418
+
419
+     # Transcribe audio using Gemini
420
+
421
+     transcription = transcribe_audio_with_gemini(audio_file)
422
+
423
+     
424
+
425
+     # Generate subtitles
426
+
427
+     subtitles = generate_subtitles(transcription)
428
+
429
+     
430
+
431
+     # Save original subtitles
432
+
433
+     original_srt_file = os.path.join(tempfile.gettempdir(), "original_subtitles.srt")
434
+
435
+     with open(original_srt_file, "w", encoding="utf-8") as f:
436
+
437
+         f.write(subtitles)
438
+
439
+     
440
+
441
+     # Translate subtitles if requested
442
+
443
+     translated_srt_file = None
444
+
445
+     if translate_to and translate_to != "None":
446
+
447
+         translated_subtitles = translate_srt(subtitles, translate_to)
448
+
449
+         translated_srt_file = os.path.join(tempfile.gettempdir(), "translated_subtitles.srt")
450
+
451
+         with open(translated_srt_file, "w", encoding="utf-8") as f:
452
+
453
+             f.write(translated_subtitles)
454
+
455
+     
456
+
457
+     # Generate TTS audio if requested
458
+
459
+     tts_audio_file = None
460
+
461
+     if add_tts:
462
+
463
+         target_lang = translate_to if translate_to and translate_to != "None" else language
464
+
465
+         tts_audio_file = generate_tts_audio(subtitles if not translated_srt_file else translated_subtitles, target_lang)
466
+
467
+     
468
+
469
+     # Create video with subtitles if requested
470
+
471
+     output_video_file = None
472
+
473
+     if add_subtitles:
474
+
475
+         srt_to_use = translated_srt_file if translated_srt_file else original_srt_file
476
+
477
+         output_video_file = os.path.join(tempfile.gettempdir(), "output_video.mp4")
478
+
479
+         add_subtitles_to_video(video_file, srt_to_use, output_video_file)
480
+
481
+     
482
+
483
+     # Clean up
484
+
485
+     os.remove(audio_file)
486
+
487
+     
488
+
489
+     return original_srt_file, translated_srt_file, tts_audio_file, output_video_file, "Detected Language: Auto"
490
+
491
+
492
+
493
+ # Define the Gradio interface
494
+
495
+ with gr.Blocks(title="AutoSubGen Pro - AI Video Subtitle Generator") as demo:
496
+
497
+     # Header
498
+
499
+     with gr.Column():
500
+
501
+         gr.Markdown("# 🎥 AutoSubGen Pro")
502
+
503
+         gr.Markdown("### Advanced AI-Powered Video Subtitle Generator")
504
+
505
+         gr.Markdown("Generate, translate, and add subtitles with text-to-speech audio to your videos.")
506
+
507
+     
508
+
509
+     # Main content
510
+
511
+     with gr.Tab("Generate Subtitles"):
512
+
513
+         gr.Markdown("### Upload a video file to process")
514
+
515
+         with gr.Row():
516
+
517
+             video_input = gr.Video(label="Upload Video File", scale=2)
518
+
519
+             with gr.Column():
520
+
521
+                 language_dropdown = gr.Dropdown(
522
+
523
+                     choices=SUPPORTED_LANGUAGES,
524
+
525
+                     label="Source Language",
526
+
527
+                     value="Auto Detect",
528
+
529
+                 )
530
+
531
+                 translate_to_dropdown = gr.Dropdown(
532
+
533
+                     choices=["None"] + SUPPORTED_LANGUAGES[1:],
534
+
535
+                     label="Translate To",
536
+
537
+                     value="None",
538
+
539
+                 )
540
+
541
+                 tts_checkbox = gr.Checkbox(label="Generate Text-to-Speech Audio")
542
+
543
+                 subtitles_checkbox = gr.Checkbox(label="Add Subtitles to Video")
544
+
545
+         
546
+
547
+         generate_button = gr.Button("Process Video", variant="primary")
548
+
549
+         
550
+
551
+         with gr.Row():
552
+
553
+             with gr.Column():
554
+
555
+                 original_subtitle_output = gr.File(label="Original Subtitles (SRT)")
556
+
557
+                 translated_subtitle_output = gr.File(label="Translated Subtitles (SRT)")
558
+
559
+             with gr.Column():
560
+
561
+                 tts_audio_output = gr.Audio(label="Text-to-Speech Audio", visible=False)
562
+
563
+                 video_output = gr.Video(label="Video with Subtitles", visible=False)
564
+
565
+         
566
+
567
+         detected_language_output = gr.Textbox(label="Detected Language")
568
+
569
+         
570
+
571
+         # Show/hide outputs based on checkboxes
572
+
573
+         def toggle_outputs(tts, subs):
574
+
575
+             return [
576
+
577
+                 gr.Audio(visible=tts),
578
+
579
+                 gr.Video(visible=subs)
580
+
581
+             ]
582
+
583
+         
584
+
585
+         tts_checkbox.change(
586
+
587
+             fn=lambda x: gr.Audio(visible=x),
588
+
589
+             inputs=tts_checkbox,
590
+
591
+             outputs=tts_audio_output
592
+
593
+         )
594
+
595
+         
596
+
597
+         subtitles_checkbox.change(
598
+
599
+             fn=lambda x: gr.Video(visible=x),
600
+
601
+             inputs=subtitles_checkbox,
602
+
603
+             outputs=video_output
604
+
605
+         )
606
+
607
+     
608
+
609
+     # Link button to function
610
+
611
+     generate_button.click(
612
+
613
+         process_video,
614
+
615
+         inputs=[video_input, language_dropdown, translate_to_dropdown, tts_checkbox, subtitles_checkbox],
616
+
617
+         outputs=[original_subtitle_output, translated_subtitle_output, tts_audio_output, video_output, detected_language_output]
618
+
619
+     )
620
+
621
+
622
+
623
+ # Launch the interface
624
+
625
+ demo.launch(share=True)