Athspi commited on
Commit
1b72949
·
verified ·
1 Parent(s): 12ef355

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +227 -223
app.py CHANGED
@@ -1,20 +1,40 @@
1
  import os
2
  import google.generativeai as genai
3
- from moviepy.video.io.VideoFileClip import VideoFileClip
4
- from moviepy.audio.io.AudioFileClip import AudioFileClip
5
- from moviepy.video.compositing.CompositeVideoClip import CompositeVideoClip
6
  from moviepy.video.tools.subtitles import SubtitlesClip
7
  import tempfile
8
  import logging
9
- import gradio as gr
10
- from gtts import gTTS
11
  import srt
 
 
 
 
 
 
 
12
 
13
  # Suppress moviepy logs
14
  logging.getLogger("moviepy").setLevel(logging.ERROR)
15
 
16
  # Configure Gemini API
17
- genai.configure(api_key=os.environ["GEMINI_API_KEY"])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
 
19
  # Create the Gemini model
20
  generation_config = {
@@ -26,7 +46,7 @@ generation_config = {
26
  }
27
 
28
  model = genai.GenerativeModel(
29
- model_name="gemini-2.0-pro-exp-02-05",
30
  generation_config=generation_config,
31
  )
32
 
@@ -52,7 +72,7 @@ SUPPORTED_LANGUAGES = [
52
 
53
  # Language code mapping for gTTS
54
  LANGUAGE_CODES = {
55
- "English": "en", "Chinese": "zh", "German": "de", "Spanish": "es",
56
  "Russian": "ru", "Korean": "ko", "French": "fr", "Japanese": "ja",
57
  "Portuguese": "pt", "Turkish": "tr", "Polish": "pl", "Catalan": "ca",
58
  "Dutch": "nl", "Arabic": "ar", "Swedish": "sv", "Italian": "it",
@@ -79,235 +99,219 @@ LANGUAGE_CODES = {
79
  "Bashkir": "ba", "Javanese": "jv", "Sundanese": "su"
80
  }
81
 
82
- def extract_audio_from_video(video_file):
 
 
 
 
 
 
 
83
  """Extract audio from a video file and save it as a WAV file."""
84
- video = VideoFileClip(video_file)
85
- audio_file = os.path.join(tempfile.gettempdir(), "extracted_audio.wav")
86
- video.audio.write_audiofile(audio_file, fps=16000, logger=None)
87
- return audio_file
88
-
89
- def transcribe_audio_with_gemini(audio_file):
90
- """Transcribe audio using Gemini with a magic prompt for accurate timestamps."""
91
- with open(audio_file, "rb") as f:
92
- audio_data = f.read()
93
-
94
- audio_blob = {
95
- 'mime_type': 'audio/wav',
96
- 'data': audio_data
97
- }
98
-
99
- prompt = """
100
- You are a professional transcriber. Transcribe this audio accurately and verbatim in the original language.
101
- Include timestamps for each sentence in the following format:
102
- [HH:MM:SS] Sentence 1
103
- [HH:MM:SS] Sentence 2
104
- ...
105
- Ensure the timestamps are accurate and correspond to the start of each sentence.
106
- Respond only with the transcription and timestamps. Do not add explanations or extra text.
107
- """
108
-
109
- convo = model.start_chat()
110
- convo.send_message(prompt)
111
- response = convo.send_message(audio_blob)
112
- return response.text.strip()
113
-
114
- def generate_subtitles(transcription):
115
- """Generate SRT subtitles from transcription with timestamps."""
116
- lines = transcription.split("\n")
117
- srt_subtitles = []
118
-
119
- for i, line in enumerate(lines, start=1):
120
- if not line.strip():
121
- continue
122
-
123
- if line.startswith("["):
124
- timestamp = line.split("]")[0] + "]"
125
- text = line.split("]")[1].strip()
126
- else:
127
- timestamp = "[00:00:00]"
128
- text = line.strip()
129
-
130
- start_time = timestamp[1:-1]
131
- start_seconds = time_to_seconds(start_time)
132
- end_seconds = start_seconds + 5 # Placeholder duration
133
 
134
- subtitle = srt.Subtitle(
135
- index=i,
136
- start=datetime.timedelta(seconds=start_seconds),
137
- end=datetime.timedelta(seconds=end_seconds),
138
- content=text
139
- )
140
- srt_subtitles.append(subtitle)
141
-
142
- return srt.compose(srt_subtitles)
 
 
143
 
144
- def time_to_seconds(time_str):
145
- """Convert HH:MM:SS to seconds."""
146
- hh, mm, ss = map(int, time_str.split(":"))
147
- return hh * 3600 + mm * 60 + ss
 
 
 
 
 
 
148
 
149
- def seconds_to_time(seconds):
150
- """Convert seconds to HH:MM:SS."""
151
- hh = seconds // 3600
152
- mm = (seconds % 3600) // 60
153
- ss = seconds % 60
154
- return f"{hh:02}:{mm:02}:{ss:02}"
155
 
156
  def translate_srt(srt_text, target_language):
157
- """Translate an SRT file while preserving timestamps."""
158
- prompt = f"""
159
- Translate the following SRT subtitles into {target_language}.
160
- Preserve the SRT format (timestamps and structure).
161
- Translate only the text after the timestamp.
162
- Do not add explanations or extra text.
163
- Ensure the translation is accurate and culturally appropriate.
164
- Here is the SRT file:
165
- {srt_text}
166
- """
167
-
168
- response = model.generate_content(prompt)
169
- return response.text
170
-
171
- def generate_tts_audio(srt_text, language):
 
 
172
  """Generate TTS audio from SRT text."""
173
- # Extract all text from SRT
174
- subtitles = list(srt.parse(srt_text))
175
- all_text = " ".join([sub.content for sub in subtitles])
176
-
177
- # Get language code
178
- lang_code = LANGUAGE_CODES.get(language, "en")
179
-
180
- # Generate TTS
181
- tts = gTTS(text=all_text, lang=lang_code, slow=False)
182
- audio_file = os.path.join(tempfile.gettempdir(), "tts_audio.mp3")
183
- tts.save(audio_file)
184
- return audio_file
185
 
186
- def add_subtitles_to_video(video_file, srt_file, output_file):
187
  """Add subtitles to video and return the path to the new video."""
188
- # Create subtitle clip
189
- generator = lambda txt: TextClip(txt, font='Arial', fontsize=24, color='white')
190
- subtitles = SubtitlesClip(srt_file, generator)
191
-
192
- # Load video
193
- video = VideoFileClip(video_file)
194
-
195
- # Composite video with subtitles
196
- result = CompositeVideoClip([
197
- video,
198
- subtitles.set_position(('center', 'bottom'))
199
- ])
200
-
201
- # Write output
202
- result.write_videofile(output_file, codec='libx264', audio_codec='aac', threads=4)
203
- return output_file
204
-
205
- def process_video(video_file, language="Auto Detect", translate_to=None, add_tts=False, add_subtitles=False):
206
- """Process a video file with full options."""
207
- # Extract audio from the video
208
- audio_file = extract_audio_from_video(video_file)
209
-
210
- # Transcribe audio using Gemini
211
- transcription = transcribe_audio_with_gemini(audio_file)
212
-
213
- # Generate subtitles
214
- subtitles = generate_subtitles(transcription)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
215
 
216
- # Save original subtitles
217
- original_srt_file = os.path.join(tempfile.gettempdir(), "original_subtitles.srt")
218
- with open(original_srt_file, "w", encoding="utf-8") as f:
219
- f.write(subtitles)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
220
 
221
- # Translate subtitles if requested
222
- translated_srt_file = None
 
 
223
  if translate_to and translate_to != "None":
224
- translated_subtitles = translate_srt(subtitles, translate_to)
225
- translated_srt_file = os.path.join(tempfile.gettempdir(), "translated_subtitles.srt")
226
- with open(translated_srt_file, "w", encoding="utf-8") as f:
227
- f.write(translated_subtitles)
 
 
 
 
 
228
 
229
- # Generate TTS audio if requested
230
- tts_audio_file = None
231
  if add_tts:
232
- target_lang = translate_to if translate_to and translate_to != "None" else language
233
- tts_audio_file = generate_tts_audio(subtitles if not translated_srt_file else translated_subtitles, target_lang)
 
 
 
 
 
 
 
 
 
234
 
235
- # Create video with subtitles if requested
236
- output_video_file = None
237
  if add_subtitles:
238
- srt_to_use = translated_srt_file if translated_srt_file else original_srt_file
239
- output_video_file = os.path.join(tempfile.gettempdir(), "output_video.mp4")
240
- add_subtitles_to_video(video_file, srt_to_use, output_video_file)
241
-
242
- # Clean up
243
- os.remove(audio_file)
244
-
245
- return original_srt_file, translated_srt_file, tts_audio_file, output_video_file, "Detected Language: Auto"
246
-
247
- # Define the Gradio interface
248
- with gr.Blocks(title="AutoSubGen Pro - AI Video Subtitle Generator") as demo:
249
- # Header
250
- with gr.Column():
251
- gr.Markdown("# 🎥 AutoSubGen Pro")
252
- gr.Markdown("### Advanced AI-Powered Video Subtitle Generator")
253
- gr.Markdown("Generate, translate, and add subtitles with text-to-speech audio to your videos.")
254
-
255
- # Main content
256
- with gr.Tab("Generate Subtitles"):
257
- gr.Markdown("### Upload a video file to process")
258
- with gr.Row():
259
- video_input = gr.Video(label="Upload Video File", scale=2)
260
- with gr.Column():
261
- language_dropdown = gr.Dropdown(
262
- choices=SUPPORTED_LANGUAGES,
263
- label="Source Language",
264
- value="Auto Detect",
265
- )
266
- translate_to_dropdown = gr.Dropdown(
267
- choices=["None"] + SUPPORTED_LANGUAGES[1:],
268
- label="Translate To",
269
- value="None",
270
- )
271
- tts_checkbox = gr.Checkbox(label="Generate Text-to-Speech Audio")
272
- subtitles_checkbox = gr.Checkbox(label="Add Subtitles to Video")
273
-
274
- generate_button = gr.Button("Process Video", variant="primary")
275
-
276
- with gr.Row():
277
- with gr.Column():
278
- original_subtitle_output = gr.File(label="Original Subtitles (SRT)")
279
- translated_subtitle_output = gr.File(label="Translated Subtitles (SRT)")
280
- with gr.Column():
281
- tts_audio_output = gr.Audio(label="Text-to-Speech Audio", visible=False)
282
- video_output = gr.Video(label="Video with Subtitles", visible=False)
283
-
284
- detected_language_output = gr.Textbox(label="Detected Language")
285
-
286
- # Show/hide outputs based on checkboxes
287
- def toggle_outputs(tts, subs):
288
- return [
289
- gr.Audio(visible=tts),
290
- gr.Video(visible=subs)
291
- ]
292
-
293
- tts_checkbox.change(
294
- fn=lambda x: gr.Audio(visible=x),
295
- inputs=tts_checkbox,
296
- outputs=tts_audio_output
297
- )
298
-
299
- subtitles_checkbox.change(
300
- fn=lambda x: gr.Video(visible=x),
301
- inputs=subtitles_checkbox,
302
- outputs=video_output
303
- )
304
 
305
- # Link button to function
306
- generate_button.click(
307
- process_video,
308
- inputs=[video_input, language_dropdown, translate_to_dropdown, tts_checkbox, subtitles_checkbox],
309
- outputs=[original_subtitle_output, translated_subtitle_output, tts_audio_output, video_output, detected_language_output]
310
- )
311
-
312
- # Launch the interface
313
- demo.launch(share=True)
 
 
 
 
 
 
 
1
  import os
2
  import google.generativeai as genai
3
+ from moviepy.editor import VideoFileClip, AudioFileClip, CompositeVideoClip, TextClip
 
 
4
  from moviepy.video.tools.subtitles import SubtitlesClip
5
  import tempfile
6
  import logging
 
 
7
  import srt
8
+ import datetime
9
+ from gtts import gTTS
10
+ from flask import Flask, request, render_template, send_from_directory, url_for, flash, session
11
+ from werkzeug.utils import secure_filename
12
+ import uuid
13
+
14
+ # --- Configuration ---
15
 
16
  # Suppress moviepy logs
17
  logging.getLogger("moviepy").setLevel(logging.ERROR)
18
 
19
  # Configure Gemini API
20
+ # IMPORTANT: Set your GEMINI_API_KEY as an environment variable
21
+ # For example, in your terminal: export GEMINI_API_KEY="YOUR_API_KEY"
22
+ try:
23
+ genai.configure(api_key=os.environ["GEMINI_API_KEY"])
24
+ except KeyError:
25
+ raise Exception("GEMINI_API_KEY environment variable not set. Please set it before running the app.")
26
+
27
+
28
+ # --- Flask App Initialization ---
29
+ app = Flask(__name__)
30
+ app.config['SECRET_KEY'] = os.urandom(24)
31
+ app.config['UPLOAD_FOLDER'] = os.path.join(os.getcwd(), 'temp_uploads')
32
+
33
+ # Ensure the upload folder exists
34
+ os.makedirs(app.config['UPLOAD_FOLDER'], exist_ok=True)
35
+
36
+
37
+ # --- Model and Language Configuration ---
38
 
39
  # Create the Gemini model
40
  generation_config = {
 
46
  }
47
 
48
  model = genai.GenerativeModel(
49
+ model_name="gemini-1.5-pro-latest", # Using a stable and capable model
50
  generation_config=generation_config,
51
  )
52
 
 
72
 
73
  # Language code mapping for gTTS
74
  LANGUAGE_CODES = {
75
+ "English": "en", "Chinese": "zh", "German": "de", "Spanish": "es",
76
  "Russian": "ru", "Korean": "ko", "French": "fr", "Japanese": "ja",
77
  "Portuguese": "pt", "Turkish": "tr", "Polish": "pl", "Catalan": "ca",
78
  "Dutch": "nl", "Arabic": "ar", "Swedish": "sv", "Italian": "it",
 
99
  "Bashkir": "ba", "Javanese": "jv", "Sundanese": "su"
100
  }
101
 
102
+
103
+ # --- Core Processing Functions ---
104
+
105
+ def time_to_seconds(time_obj):
106
+ """Convert datetime.time object to seconds."""
107
+ return time_obj.hour * 3600 + time_obj.minute * 60 + time_obj.second + time_obj.microsecond / 1e6
108
+
109
+ def extract_audio_from_video(video_path, audio_path):
110
  """Extract audio from a video file and save it as a WAV file."""
111
+ try:
112
+ video = VideoFileClip(video_path)
113
+ video.audio.write_audiofile(audio_path, fps=16000, logger=None)
114
+ return audio_path
115
+ except Exception as e:
116
+ logging.error(f"Error extracting audio: {e}")
117
+ return None
118
+
119
+ def transcribe_audio_with_gemini(audio_path, source_language):
120
+ """Transcribe audio using Gemini with a prompt for accurate timestamps."""
121
+ try:
122
+ audio_file = genai.upload_file(path=audio_path)
123
+ language_prompt = f"in {source_language}" if source_language != "Auto Detect" else ""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
124
 
125
+ prompt = f"""
126
+ You are a professional transcriber. Transcribe this audio accurately and verbatim {language_prompt}.
127
+ Include timestamps for each sentence in the SRT (SubRip) format.
128
+ Example:
129
+ 1
130
+ 00:00:01,234 --> 00:00:05,678
131
+ This is the first sentence.
132
+
133
+ 2
134
+ 00:00:06,123 --> 00:00:09,456
135
+ This is the second sentence.
136
 
137
+ Ensure the timestamps are precise and correspond to the start and end of each spoken sentence.
138
+ Respond ONLY with the transcription in the SRT format. Do not add explanations or any extra text.
139
+ """
140
+
141
+ response = model.generate_content([prompt, audio_file])
142
+ genai.delete_file(audio_file.name) # Clean up the uploaded file
143
+ return response.text.strip()
144
+ except Exception as e:
145
+ logging.error(f"Error during Gemini transcription: {e}")
146
+ return None
147
 
 
 
 
 
 
 
148
 
149
  def translate_srt(srt_text, target_language):
150
+ """Translate an SRT file using Gemini while preserving timestamps."""
151
+ try:
152
+ prompt = f"""
153
+ Translate the following SRT subtitles into {target_language}.
154
+ Preserve the SRT format perfectly (index numbers, timestamps, and structure).
155
+ Translate only the subtitle text on the lines after the timestamps.
156
+ Do not add any explanations or extra text. Your output must be a valid SRT file.
157
+ Here is the SRT file content:
158
+ {srt_text}
159
+ """
160
+ response = model.generate_content(prompt)
161
+ return response.text.strip()
162
+ except Exception as e:
163
+ logging.error(f"Error during translation: {e}")
164
+ return None
165
+
166
+ def generate_tts_audio(srt_text, language, tts_audio_path):
167
  """Generate TTS audio from SRT text."""
168
+ try:
169
+ subtitles = list(srt.parse(srt_text))
170
+ all_text = " ".join([sub.content for sub in subtitles])
171
+
172
+ lang_code = LANGUAGE_CODES.get(language, "en")
173
+
174
+ tts = gTTS(text=all_text, lang=lang_code, slow=False)
175
+ tts.save(tts_audio_path)
176
+ return tts_audio_path
177
+ except Exception as e:
178
+ logging.error(f"Error generating TTS audio: {e}")
179
+ return None
180
 
181
+ def add_subtitles_to_video(video_path, srt_text, output_video_path):
182
  """Add subtitles to video and return the path to the new video."""
183
+ try:
184
+ def generator(txt):
185
+ return TextClip(txt, font='Arial-Bold', fontsize=24, color='white',
186
+ stroke_color='black', stroke_width=1)
187
+
188
+ # MoviePy's SubtitlesClip requires a file path
189
+ with tempfile.NamedTemporaryFile(mode='w', suffix='.srt', delete=False, encoding='utf-8') as temp_srt:
190
+ temp_srt.write(srt_text)
191
+ srt_path = temp_srt.name
192
+
193
+ video = VideoFileClip(video_path)
194
+ subtitles = SubtitlesClip(srt_path, generator)
195
+
196
+ result = CompositeVideoClip([video, subtitles.set_position(('center', 'bottom'))])
197
+
198
+ # Write output with original audio
199
+ result.write_videofile(output_video_path, codec='libx264', audio_codec='aac', threads=4, logger=None)
200
+
201
+ os.remove(srt_path) # Clean up temp srt file
202
+ return output_video_path
203
+ except Exception as e:
204
+ logging.error(f"Error adding subtitles to video: {e}")
205
+ return None
206
+
207
+
208
+ # --- Flask Routes ---
209
+
210
+ @app.route('/')
211
+ def index():
212
+ """Render the main page."""
213
+ session.clear() # Clear any old data
214
+ return render_template('index.html', supported_languages=SUPPORTED_LANGUAGES)
215
+
216
+ @app.route('/process', methods=['POST'])
217
+ def process():
218
+ """Handle the video processing request."""
219
+ if 'video' not in request.files:
220
+ flash('No video file selected. Please upload a video.', 'error')
221
+ return render_template('index.html', supported_languages=SUPPORTED_LANGUAGES)
222
+
223
+ video_file = request.files['video']
224
+ if video_file.filename == '':
225
+ flash('No video file selected. Please upload a video.', 'error')
226
+ return render_template('index.html', supported_languages=SUPPORTED_LANGUAGES)
227
+
228
+ # --- Get form options ---
229
+ source_language = request.form.get('source_language', 'Auto Detect')
230
+ translate_to = request.form.get('translate_to', 'None')
231
+ add_tts = 'add_tts' in request.form
232
+ add_subtitles = 'add_subtitles' in request.form
233
 
234
+ # --- Setup a unique session directory for this request ---
235
+ session_id = str(uuid.uuid4())
236
+ session['session_id'] = session_id
237
+ session_dir = os.path.join(app.config['UPLOAD_FOLDER'], session_id)
238
+ os.makedirs(session_dir, exist_ok=True)
239
+
240
+ filename = secure_filename(video_file.filename)
241
+ video_path = os.path.join(session_dir, filename)
242
+ video_file.save(video_path)
243
+
244
+ results = {}
245
+
246
+ # 1. Extract Audio
247
+ audio_path = os.path.join(session_dir, "extracted_audio.wav")
248
+ if not extract_audio_from_video(video_path, audio_path):
249
+ flash('Failed to extract audio from the video.', 'error')
250
+ return render_template('index.html', supported_languages=SUPPORTED_LANGUAGES)
251
+
252
+ # 2. Transcribe Audio
253
+ original_srt_text = transcribe_audio_with_gemini(audio_path, source_language)
254
+ if not original_srt_text:
255
+ flash('Failed to transcribe the audio. The API call might have failed.', 'error')
256
+ return render_template('index.html', supported_languages=SUPPORTED_LANGUAGES)
257
+
258
+ original_srt_path = os.path.join(session_dir, "original_subtitles.srt")
259
+ with open(original_srt_path, "w", encoding="utf-8") as f:
260
+ f.write(original_srt_text)
261
+ results['original_srt_file'] = "original_subtitles.srt"
262
 
263
+ # Keep track of the final SRT to use for TTS and video burn-in
264
+ final_srt_text = original_srt_text
265
+
266
+ # 3. Translate Subtitles (if requested)
267
  if translate_to and translate_to != "None":
268
+ translated_srt_text = translate_srt(original_srt_text, translate_to)
269
+ if translated_srt_text:
270
+ translated_srt_path = os.path.join(session_dir, "translated_subtitles.srt")
271
+ with open(translated_srt_path, "w", encoding="utf-8") as f:
272
+ f.write(translated_srt_text)
273
+ results['translated_srt_file'] = "translated_subtitles.srt"
274
+ final_srt_text = translated_srt_text # Use translated text for next steps
275
+ else:
276
+ flash(f'Failed to translate subtitles to {translate_to}.', 'warning')
277
 
278
+ # 4. Generate TTS Audio (if requested)
 
279
  if add_tts:
280
+ tts_lang = translate_to if translate_to and translate_to != "None" else source_language
281
+ # If source was auto-detect, we can't reliably guess the TTS language. Default to English.
282
+ if tts_lang == 'Auto Detect':
283
+ flash('TTS language cannot be "Auto Detect". Defaulting to English. For better results, please specify the source language.', 'warning')
284
+ tts_lang = 'English'
285
+
286
+ tts_audio_path = os.path.join(session_dir, "tts_audio.mp3")
287
+ if generate_tts_audio(final_srt_text, tts_lang, tts_audio_path):
288
+ results['tts_audio_file'] = "tts_audio.mp3"
289
+ else:
290
+ flash('Failed to generate Text-to-Speech audio.', 'warning')
291
 
292
+ # 5. Add Subtitles to Video (if requested)
 
293
  if add_subtitles:
294
+ output_video_path = os.path.join(session_dir, "output_video.mp4")
295
+ if add_subtitles_to_video(video_path, final_srt_text, output_video_path):
296
+ results['output_video_file'] = "output_video.mp4"
297
+ else:
298
+ flash('Failed to add subtitles to the video.', 'warning')
299
+
300
+ # Clean up original extracted audio
301
+ os.remove(audio_path)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
302
 
303
+ return render_template('index.html',
304
+ supported_languages=SUPPORTED_LANGUAGES,
305
+ results=results,
306
+ session_id=session_id)
307
+
308
+ @app.route('/download/<session_id>/<path:filename>')
309
+ def download_file(session_id, filename):
310
+ """Serve files from the session directory for download."""
311
+ directory = os.path.join(app.config['UPLOAD_FOLDER'], session_id)
312
+ return send_from_directory(directory, filename, as_attachment=True)
313
+
314
+
315
+ # --- Run the App ---
316
+ if __name__ == '__main__':
317
+ app.run(debug=True)