Athspi commited on
Commit
e2954cc
·
verified ·
1 Parent(s): c9abf90

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +90 -48
app.py CHANGED
@@ -1,9 +1,7 @@
1
  import os
2
  import google.generativeai as genai
3
- from moviepy.video.io.VideoFileClip import VideoFileClip
4
- from moviepy.audio.io.AudioFileClip import AudioFileClip
5
- from moviepy.video.compositing.CompositeVideoClip import CompositeVideoClip
6
- from moviepy.video.tools.subtitles import SubtitlesClip
7
  import tempfile
8
  import logging
9
  import srt
@@ -19,12 +17,28 @@ import uuid
19
  logging.basicConfig(level=logging.INFO)
20
  logging.getLogger("moviepy").setLevel(logging.ERROR)
21
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
  # Configure Gemini API
23
- # IMPORTANT: Set your GEMINI_API_KEY as an environment variable
24
  try:
25
  genai.configure(api_key=os.environ["GEMINI_API_KEY"])
26
  except KeyError:
27
- raise Exception("GEMINI_API_KEY environment variable not set. Please set it before running the app.")
 
28
 
29
 
30
  # --- Flask App Initialization ---
@@ -47,11 +61,6 @@ generation_config = {
47
  "response_mime_type": "text/plain",
48
  }
49
 
50
- model = genai.GenerativeModel(
51
- model_name="gemini-2.0-flash",
52
- generation_config=generation_config,
53
- )
54
-
55
  # List of all supported languages for the UI
56
  SUPPORTED_LANGUAGES = [
57
  "Auto Detect", "English", "Chinese", "German", "Spanish", "Russian", "Korean",
@@ -74,24 +83,33 @@ SUPPORTED_LANGUAGES = [
74
 
75
  # Language code mapping for Google Text-to-Speech (gTTS)
76
  LANGUAGE_CODES = {
77
- "English": "en", "Chinese": "zh", "German": "de", "Spanish": "es", "Russian": "ru",
78
- "Korean": "ko", "French": "fr", "Japanese": "ja", "Portuguese": "pt", "Turkish": "tr",
79
- "Polish": "pl", "Catalan": "ca", "Dutch": "nl", "Arabic": "ar", "Swedish": "sv",
80
- "Italian": "it", "Indonesian": "id", "Hindi": "hi", "Finnish": "fi", "Vietnamese": "vi",
81
- "Hebrew": "iw", "Ukrainian": "uk", "Greek": "el", "Malay": "ms", "Czech": "cs",
82
- "Romanian": "ro", "Danish": "da", "Hungarian": "hu", "Tamil": "ta", "Norwegian": "no",
83
- "Thai": "th", "Urdu": "ur", "Croatian": "hr", "Bulgarian": "bg", "Lithuanian": "lt",
84
- "Latin": "la", "Malayalam": "ml", "Welsh": "cy", "Slovak": "sk", "Telugu": "te",
85
- "Persian": "fa", "Latvian": "lv", "Bengali": "bn", "Serbian": "sr", "Slovenian": "sl",
86
- "Kannada": "kn", "Estonian": "et", "Macedonian": "mk", "Armenian": "hy", "Nepali": "ne",
87
- "Mongolian": "mn", "Bosnian": "bs", "Kazakh": "kk", "Albanian": "sq", "Swahili": "sw",
88
- "Gujarati": "gu", "Lao": "lo", "Uzbek": "uz", "Maltese": "mt", "Burmese": "my",
89
  "Tagalog": "tl", "Javanese": "jw", "Sundanese": "su", "Afrikaans": "af"
90
  }
91
 
92
 
93
  # --- Core Processing Functions ---
94
 
 
 
 
 
 
 
 
 
 
95
  def extract_audio_from_video(video_path, audio_path):
96
  """Extract audio from a video file into a WAV file."""
97
  try:
@@ -104,12 +122,15 @@ def extract_audio_from_video(video_path, audio_path):
104
 
105
  def transcribe_audio_with_gemini(audio_path, source_language):
106
  """Transcribe audio using Gemini, requesting output in SRT format."""
 
 
 
107
  try:
108
  logging.info(f"Uploading audio file: {audio_path}")
109
  audio_file = genai.upload_file(path=audio_path)
110
-
111
  language_prompt = f"in {source_language}" if source_language != "Auto Detect" else ""
112
-
113
  prompt = f"""
114
  You are a professional transcriber. Transcribe this audio accurately and verbatim {language_prompt}.
115
  Your output MUST be in the SRT (SubRip) format.
@@ -125,7 +146,7 @@ def transcribe_audio_with_gemini(audio_path, source_language):
125
  Ensure timestamps are precise. Respond ONLY with the transcription in the SRT format.
126
  Do not add explanations, notes, or any other text outside of the valid SRT content.
127
  """
128
-
129
  logging.info("Sending transcription request to Gemini...")
130
  response = model.generate_content([prompt, audio_file], request_options={"timeout": 600})
131
  genai.delete_file(audio_file.name) # Clean up the uploaded file
@@ -138,6 +159,9 @@ def transcribe_audio_with_gemini(audio_path, source_language):
138
 
139
  def translate_srt(srt_text, target_language):
140
  """Translate an SRT file using Gemini while preserving its structure."""
 
 
 
141
  try:
142
  prompt = f"""
143
  Translate the following SRT subtitles into {target_language}.
@@ -162,7 +186,7 @@ def generate_tts_audio(srt_text, language, tts_audio_path):
162
  try:
163
  subtitles = list(srt.parse(srt_text))
164
  full_text = " ".join([sub.content.replace('\n', ' ') for sub in subtitles])
165
-
166
  if not full_text:
167
  return None
168
 
@@ -170,7 +194,7 @@ def generate_tts_audio(srt_text, language, tts_audio_path):
170
  if not lang_code:
171
  flash(f"Language '{language}' not supported for TTS, defaulting to English.", "warning")
172
  lang_code = "en"
173
-
174
  logging.info(f"Generating TTS audio in '{language}' ({lang_code})...")
175
  tts = gTTS(text=full_text, lang=lang_code, slow=False)
176
  tts.save(tts_audio_path)
@@ -189,42 +213,45 @@ def create_final_video(original_video_path, srt_text, new_audio_path, output_pat
189
  try:
190
  logging.info("Creating final video...")
191
  original_clip = VideoFileClip(original_video_path)
192
-
 
193
  # If new audio is provided, replace the original audio track
194
  if new_audio_path:
195
  tts_audio_clip = AudioFileClip(new_audio_path)
 
 
 
196
  final_clip = original_clip.set_audio(tts_audio_clip)
197
- else:
198
- final_clip = original_clip
199
 
200
  # If subtitle text is provided, burn it into the video
201
- if srt_text:
202
  with tempfile.NamedTemporaryFile(mode='w', suffix='.srt', delete=False, encoding='utf-8') as temp_srt:
203
  temp_srt.write(srt_text)
204
  srt_filepath = temp_srt.name
205
-
206
  # Subtitle styling
207
  generator = lambda txt: TextClip(
208
  txt, font='Arial-Bold', fontsize=24, color='white',
209
- stroke_color='black', stroke_width=1.5, method='caption',
210
  size=(final_clip.w * 0.8, None) # Subtitles take 80% of video width
211
  )
212
-
213
  subtitles_clip = SubtitlesClip(srt_filepath, generator)
214
  # Composite the video with the subtitles
215
  final_video = CompositeVideoClip([final_clip, subtitles_clip.set_position(('center', 'bottom'))])
216
  os.remove(srt_filepath) # Clean up temp srt file
217
  else:
218
  final_video = final_clip
219
-
220
  # Write the final video file
221
  final_video.write_videofile(output_path, codec='libx264', audio_codec='aac', threads=4, logger=None)
222
-
223
  # Close all clips to release memory
224
- if 'tts_audio_clip' in locals():
225
  tts_audio_clip.close()
226
  final_video.close()
227
-
 
228
  logging.info(f"Final video saved to {output_path}")
229
  return output_path
230
  except Exception as e:
@@ -238,23 +265,35 @@ def create_final_video(original_video_path, srt_text, new_audio_path, output_pat
238
  def index():
239
  """Render the main page. Clear session for a fresh start."""
240
  session.clear()
241
- return render_template('index.html', supported_languages=SUPPORTED_LANGUAGES)
 
 
 
 
242
 
243
  @app.route('/process', methods=['POST'])
244
  def process():
245
  """Handle the video processing request from the form."""
 
 
 
 
246
  if 'video' not in request.files or request.files['video'].filename == '':
247
  flash('No video file selected. Please upload a video.', 'error')
248
  return redirect(url_for('index'))
249
 
250
  video_file = request.files['video']
251
-
252
  # --- Get form options ---
253
  source_language = request.form.get('source_language', 'Auto Detect')
254
  translate_to = request.form.get('translate_to', 'None')
255
  add_tts = 'add_tts' in request.form
256
  add_subtitles = 'add_subtitles' in request.form
257
-
 
 
 
 
258
  # --- Setup a unique session directory for this request ---
259
  session_id = str(uuid.uuid4())
260
  session['session_id'] = session_id
@@ -279,12 +318,12 @@ def process():
279
  if not original_srt_text:
280
  flash('Failed to transcribe the audio. The API call might have failed or the audio is silent.', 'error')
281
  return redirect(url_for('index'))
282
-
283
  original_srt_path = os.path.join(session_dir, "original_subtitles.srt")
284
  with open(original_srt_path, "w", encoding="utf-8") as f:
285
  f.write(original_srt_text)
286
  results['original_srt_file'] = "original_subtitles.srt"
287
-
288
  srt_for_final_video = None
289
  tts_audio_path = None
290
  final_srt_text = original_srt_text
@@ -300,14 +339,14 @@ def process():
300
  final_srt_text = translated_srt_text # Use translated text for subsequent steps
301
  else:
302
  flash(f'Failed to translate subtitles to {translate_to}. Using original subtitles.', 'warning')
303
-
304
  # 4. Generate TTS Audio (if requested)
305
  if add_tts:
306
  tts_lang = translate_to if translate_to != "None" else source_language
307
  if tts_lang == 'Auto Detect':
308
  flash('TTS language cannot be "Auto Detect". Defaulting to English.', 'warning')
309
  tts_lang = 'English'
310
-
311
  tts_audio_path_out = os.path.join(session_dir, "tts_audio.mp3")
312
  tts_audio_path = generate_tts_audio(final_srt_text, tts_lang, tts_audio_path_out)
313
  if tts_audio_path:
@@ -328,10 +367,13 @@ def process():
328
  else:
329
  flash('Failed to create the final processed video.', 'error')
330
 
 
331
  return render_template('index.html',
332
  supported_languages=SUPPORTED_LANGUAGES,
333
  results=results,
334
- session_id=session_id)
 
 
335
 
336
  @app.route('/download/<session_id>/<path:filename>')
337
  def download_file(session_id, filename):
@@ -342,4 +384,4 @@ def download_file(session_id, filename):
342
 
343
  # --- Run the App ---
344
  if __name__ == '__main__':
345
- app.run(host="0.0.0.0", port=7860)
 
1
  import os
2
  import google.generativeai as genai
3
+ from moviepy.editor import *
4
+ import moviepy.config as mp_config
 
 
5
  import tempfile
6
  import logging
7
  import srt
 
17
  logging.basicConfig(level=logging.INFO)
18
  logging.getLogger("moviepy").setLevel(logging.ERROR)
19
 
20
+ # Set a marker to check if ImageMagick is not found
21
+ IMAGEMAGICK_NOT_FOUND = False
22
+ try:
23
+ # This will raise an exception if ImageMagick is not found
24
+ TextClip("test", font='Arial', fontsize=24, color='white')
25
+ except Exception as e:
26
+ if "ImageMagick is not installed" in str(e) or "magick: not found" in str(e):
27
+ IMAGEMAGICK_NOT_FOUND = True
28
+ logging.warning("ImageMagick is not installed or not found in the system's PATH. Subtitle generation will be disabled.")
29
+ logging.warning("Please install ImageMagick and ensure its 'magick' command is accessible from your shell.")
30
+ else:
31
+ # Handle other potential startup errors from TextClip
32
+ logging.error(f"An unexpected error occurred with MoviePy/ImageMagick: {e}")
33
+
34
+
35
  # Configure Gemini API
36
+ # IMPORTANT: Set your GEMINI_API_KEY as an environment variable for this to work.
37
  try:
38
  genai.configure(api_key=os.environ["GEMINI_API_KEY"])
39
  except KeyError:
40
+ # This will be handled in the Flask app to show a message to the user
41
+ pass
42
 
43
 
44
  # --- Flask App Initialization ---
 
61
  "response_mime_type": "text/plain",
62
  }
63
 
 
 
 
 
 
64
  # List of all supported languages for the UI
65
  SUPPORTED_LANGUAGES = [
66
  "Auto Detect", "English", "Chinese", "German", "Spanish", "Russian", "Korean",
 
83
 
84
  # Language code mapping for Google Text-to-Speech (gTTS)
85
  LANGUAGE_CODES = {
86
+ "English": "en", "Chinese": "zh", "German": "de", "Spanish": "es", "Russian": "ru",
87
+ "Korean": "ko", "French": "fr", "Japanese": "ja", "Portuguese": "pt", "Turkish": "tr",
88
+ "Polish": "pl", "Catalan": "ca", "Dutch": "nl", "Arabic": "ar", "Swedish": "sv",
89
+ "Italian": "it", "Indonesian": "id", "Hindi": "hi", "Finnish": "fi", "Vietnamese": "vi",
90
+ "Hebrew": "iw", "Ukrainian": "uk", "Greek": "el", "Malay": "ms", "Czech": "cs",
91
+ "Romanian": "ro", "Danish": "da", "Hungarian": "hu", "Tamil": "ta", "Norwegian": "no",
92
+ "Thai": "th", "Urdu": "ur", "Croatian": "hr", "Bulgarian": "bg", "Lithuanian": "lt",
93
+ "Latin": "la", "Malayalam": "ml", "Welsh": "cy", "Slovak": "sk", "Telugu": "te",
94
+ "Persian": "fa", "Latvian": "lv", "Bengali": "bn", "Serbian": "sr", "Slovenian": "sl",
95
+ "Kannada": "kn", "Estonian": "et", "Macedonian": "mk", "Armenian": "hy", "Nepali": "ne",
96
+ "Mongolian": "mn", "Bosnian": "bs", "Kazakh": "kk", "Albanian": "sq", "Swahili": "sw",
97
+ "Gujarati": "gu", "Lao": "lo", "Uzbek": "uz", "Maltese": "mt", "Burmese": "my",
98
  "Tagalog": "tl", "Javanese": "jw", "Sundanese": "su", "Afrikaans": "af"
99
  }
100
 
101
 
102
  # --- Core Processing Functions ---
103
 
104
+ def get_gemini_model():
105
+ """Initializes and returns the Gemini model, checking for API key."""
106
+ if "GEMINI_API_KEY" not in os.environ:
107
+ return None
108
+ return genai.GenerativeModel(
109
+ model_name="gemini-1.5-flash",
110
+ generation_config=generation_config,
111
+ )
112
+
113
  def extract_audio_from_video(video_path, audio_path):
114
  """Extract audio from a video file into a WAV file."""
115
  try:
 
122
 
123
  def transcribe_audio_with_gemini(audio_path, source_language):
124
  """Transcribe audio using Gemini, requesting output in SRT format."""
125
+ model = get_gemini_model()
126
+ if not model:
127
+ return None
128
  try:
129
  logging.info(f"Uploading audio file: {audio_path}")
130
  audio_file = genai.upload_file(path=audio_path)
131
+
132
  language_prompt = f"in {source_language}" if source_language != "Auto Detect" else ""
133
+
134
  prompt = f"""
135
  You are a professional transcriber. Transcribe this audio accurately and verbatim {language_prompt}.
136
  Your output MUST be in the SRT (SubRip) format.
 
146
  Ensure timestamps are precise. Respond ONLY with the transcription in the SRT format.
147
  Do not add explanations, notes, or any other text outside of the valid SRT content.
148
  """
149
+
150
  logging.info("Sending transcription request to Gemini...")
151
  response = model.generate_content([prompt, audio_file], request_options={"timeout": 600})
152
  genai.delete_file(audio_file.name) # Clean up the uploaded file
 
159
 
160
  def translate_srt(srt_text, target_language):
161
  """Translate an SRT file using Gemini while preserving its structure."""
162
+ model = get_gemini_model()
163
+ if not model:
164
+ return None
165
  try:
166
  prompt = f"""
167
  Translate the following SRT subtitles into {target_language}.
 
186
  try:
187
  subtitles = list(srt.parse(srt_text))
188
  full_text = " ".join([sub.content.replace('\n', ' ') for sub in subtitles])
189
+
190
  if not full_text:
191
  return None
192
 
 
194
  if not lang_code:
195
  flash(f"Language '{language}' not supported for TTS, defaulting to English.", "warning")
196
  lang_code = "en"
197
+
198
  logging.info(f"Generating TTS audio in '{language}' ({lang_code})...")
199
  tts = gTTS(text=full_text, lang=lang_code, slow=False)
200
  tts.save(tts_audio_path)
 
213
  try:
214
  logging.info("Creating final video...")
215
  original_clip = VideoFileClip(original_video_path)
216
+ final_clip = original_clip
217
+
218
  # If new audio is provided, replace the original audio track
219
  if new_audio_path:
220
  tts_audio_clip = AudioFileClip(new_audio_path)
221
+ # If the new audio is shorter than the video, loop it.
222
+ if tts_audio_clip.duration < original_clip.duration:
223
+ tts_audio_clip = tts_audio_clip.fx(vfx.loop, duration=original_clip.duration)
224
  final_clip = original_clip.set_audio(tts_audio_clip)
 
 
225
 
226
  # If subtitle text is provided, burn it into the video
227
+ if srt_text and not IMAGEMAGICK_NOT_FOUND:
228
  with tempfile.NamedTemporaryFile(mode='w', suffix='.srt', delete=False, encoding='utf-8') as temp_srt:
229
  temp_srt.write(srt_text)
230
  srt_filepath = temp_srt.name
231
+
232
  # Subtitle styling
233
  generator = lambda txt: TextClip(
234
  txt, font='Arial-Bold', fontsize=24, color='white',
235
+ stroke_color='black', stroke_width=1, method='caption',
236
  size=(final_clip.w * 0.8, None) # Subtitles take 80% of video width
237
  )
238
+
239
  subtitles_clip = SubtitlesClip(srt_filepath, generator)
240
  # Composite the video with the subtitles
241
  final_video = CompositeVideoClip([final_clip, subtitles_clip.set_position(('center', 'bottom'))])
242
  os.remove(srt_filepath) # Clean up temp srt file
243
  else:
244
  final_video = final_clip
245
+
246
  # Write the final video file
247
  final_video.write_videofile(output_path, codec='libx264', audio_codec='aac', threads=4, logger=None)
248
+
249
  # Close all clips to release memory
250
+ if 'tts_audio_clip' in locals() and tts_audio_clip:
251
  tts_audio_clip.close()
252
  final_video.close()
253
+ original_clip.close()
254
+
255
  logging.info(f"Final video saved to {output_path}")
256
  return output_path
257
  except Exception as e:
 
265
  def index():
266
  """Render the main page. Clear session for a fresh start."""
267
  session.clear()
268
+ api_key_status = "GEMINI_API_KEY" in os.environ
269
+ return render_template('index.html',
270
+ supported_languages=SUPPORTED_LANGUAGES,
271
+ imagemagick_missing=IMAGEMAGICK_NOT_FOUND,
272
+ api_key_set=api_key_status)
273
 
274
  @app.route('/process', methods=['POST'])
275
  def process():
276
  """Handle the video processing request from the form."""
277
+ if "GEMINI_API_KEY" not in os.environ:
278
+ flash('Your GEMINI_API_KEY environment variable is not set. Please set it to use the application.', 'error')
279
+ return redirect(url_for('index'))
280
+
281
  if 'video' not in request.files or request.files['video'].filename == '':
282
  flash('No video file selected. Please upload a video.', 'error')
283
  return redirect(url_for('index'))
284
 
285
  video_file = request.files['video']
286
+
287
  # --- Get form options ---
288
  source_language = request.form.get('source_language', 'Auto Detect')
289
  translate_to = request.form.get('translate_to', 'None')
290
  add_tts = 'add_tts' in request.form
291
  add_subtitles = 'add_subtitles' in request.form
292
+
293
+ if IMAGEMAGICK_NOT_FOUND and add_subtitles:
294
+ flash("Cannot add subtitles because ImageMagick is not installed. The video will be processed without them.", "warning")
295
+ add_subtitles = False
296
+
297
  # --- Setup a unique session directory for this request ---
298
  session_id = str(uuid.uuid4())
299
  session['session_id'] = session_id
 
318
  if not original_srt_text:
319
  flash('Failed to transcribe the audio. The API call might have failed or the audio is silent.', 'error')
320
  return redirect(url_for('index'))
321
+
322
  original_srt_path = os.path.join(session_dir, "original_subtitles.srt")
323
  with open(original_srt_path, "w", encoding="utf-8") as f:
324
  f.write(original_srt_text)
325
  results['original_srt_file'] = "original_subtitles.srt"
326
+
327
  srt_for_final_video = None
328
  tts_audio_path = None
329
  final_srt_text = original_srt_text
 
339
  final_srt_text = translated_srt_text # Use translated text for subsequent steps
340
  else:
341
  flash(f'Failed to translate subtitles to {translate_to}. Using original subtitles.', 'warning')
342
+
343
  # 4. Generate TTS Audio (if requested)
344
  if add_tts:
345
  tts_lang = translate_to if translate_to != "None" else source_language
346
  if tts_lang == 'Auto Detect':
347
  flash('TTS language cannot be "Auto Detect". Defaulting to English.', 'warning')
348
  tts_lang = 'English'
349
+
350
  tts_audio_path_out = os.path.join(session_dir, "tts_audio.mp3")
351
  tts_audio_path = generate_tts_audio(final_srt_text, tts_lang, tts_audio_path_out)
352
  if tts_audio_path:
 
367
  else:
368
  flash('Failed to create the final processed video.', 'error')
369
 
370
+ api_key_status = "GEMINI_API_KEY" in os.environ
371
  return render_template('index.html',
372
  supported_languages=SUPPORTED_LANGUAGES,
373
  results=results,
374
+ session_id=session_id,
375
+ imagemagick_missing=IMAGEMAGICK_NOT_FOUND,
376
+ api_key_set=api_key_status)
377
 
378
  @app.route('/download/<session_id>/<path:filename>')
379
  def download_file(session_id, filename):
 
384
 
385
  # --- Run the App ---
386
  if __name__ == '__main__':
387
+ app.run(host="0.0.0.0", port=7860)