Spaces:
Runtime error
Runtime error
File size: 13,670 Bytes
91f8d48 e6d59c3 1b72949 86417c2 e6d59c3 91f8d48 86417c2 1b72949 91f8d48 1b72949 818e336 e4d42f1 86417c2 e4d42f1 1b72949 e4d42f1 91f8d48 86417c2 e4d42f1 91f8d48 86417c2 1b72949 86417c2 1b72949 e4d42f1 1b72949 224f399 1b72949 e4d42f1 1b72949 c463c7e e4d42f1 1b72949 86417c2 1b72949 86417c2 1b72949 86417c2 1b72949 43fec16 1b72949 43fec16 1b72949 e4d42f1 1b72949 91f8d48 1b72949 86417c2 1b72949 86417c2 1b72949 86417c2 1b72949 91f8d48 1b72949 7ad479d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 |
import os
import google.generativeai as genai
from moviepy.editor import VideoFileClip, AudioFileClip, CompositeVideoClip, TextClip
from moviepy.video.tools.subtitles import SubtitlesClip
import tempfile
import logging
import srt
import datetime
from gtts import gTTS
from flask import Flask, request, render_template, send_from_directory, url_for, flash, session
from werkzeug.utils import secure_filename
import uuid
# --- Configuration ---
# Suppress moviepy logs
logging.getLogger("moviepy").setLevel(logging.ERROR)
# Configure Gemini API
# IMPORTANT: Set your GEMINI_API_KEY as an environment variable
# For example, in your terminal: export GEMINI_API_KEY="YOUR_API_KEY"
try:
genai.configure(api_key=os.environ["GEMINI_API_KEY"])
except KeyError:
raise Exception("GEMINI_API_KEY environment variable not set. Please set it before running the app.")
# --- Flask App Initialization ---
app = Flask(__name__)
app.config['SECRET_KEY'] = os.urandom(24)
app.config['UPLOAD_FOLDER'] = os.path.join(os.getcwd(), 'temp_uploads')
# Ensure the upload folder exists
os.makedirs(app.config['UPLOAD_FOLDER'], exist_ok=True)
# --- Model and Language Configuration ---
# Create the Gemini model
generation_config = {
"temperature": 0.7,
"top_p": 0.9,
"top_k": 40,
"max_output_tokens": 8192,
"response_mime_type": "text/plain",
}
model = genai.GenerativeModel(
model_name="gemini-1.5-pro-latest", # Using a stable and capable model
generation_config=generation_config,
)
# List of all supported languages
SUPPORTED_LANGUAGES = [
"Auto Detect", "English", "Chinese", "German", "Spanish", "Russian", "Korean",
"French", "Japanese", "Portuguese", "Turkish", "Polish", "Catalan", "Dutch",
"Arabic", "Swedish", "Italian", "Indonesian", "Hindi", "Finnish", "Vietnamese",
"Hebrew", "Ukrainian", "Greek", "Malay", "Czech", "Romanian", "Danish",
"Hungarian", "Tamil", "Norwegian", "Thai", "Urdu", "Croatian", "Bulgarian",
"Lithuanian", "Latin", "Maori", "Malayalam", "Welsh", "Slovak", "Telugu",
"Persian", "Latvian", "Bengali", "Serbian", "Azerbaijani", "Slovenian",
"Kannada", "Estonian", "Macedonian", "Breton", "Basque", "Icelandic",
"Armenian", "Nepali", "Mongolian", "Bosnian", "Kazakh", "Albanian",
"Swahili", "Galician", "Marathi", "Punjabi", "Sinhala", "Khmer", "Shona",
"Yoruba", "Somali", "Afrikaans", "Occitan", "Georgian", "Belarusian",
"Tajik", "Sindhi", "Gujarati", "Amharic", "Yiddish", "Lao", "Uzbek",
"Faroese", "Haitian Creole", "Pashto", "Turkmen", "Nynorsk", "Maltese",
"Sanskrit", "Luxembourgish", "Burmese", "Tibetan", "Tagalog", "Malagasy",
"Assamese", "Tatar", "Hawaiian", "Lingala", "Hausa", "Bashkir", "Javanese",
"Sundanese"
]
# Language code mapping for gTTS
LANGUAGE_CODES = {
"English": "en", "Chinese": "zh", "German": "de", "Spanish": "es",
"Russian": "ru", "Korean": "ko", "French": "fr", "Japanese": "ja",
"Portuguese": "pt", "Turkish": "tr", "Polish": "pl", "Catalan": "ca",
"Dutch": "nl", "Arabic": "ar", "Swedish": "sv", "Italian": "it",
"Indonesian": "id", "Hindi": "hi", "Finnish": "fi", "Vietnamese": "vi",
"Hebrew": "he", "Ukrainian": "uk", "Greek": "el", "Malay": "ms",
"Czech": "cs", "Romanian": "ro", "Danish": "da", "Hungarian": "hu",
"Tamil": "ta", "Norwegian": "no", "Thai": "th", "Urdu": "ur",
"Croatian": "hr", "Bulgarian": "bg", "Lithuanian": "lt", "Latin": "la",
"Maori": "mi", "Malayalam": "ml", "Welsh": "cy", "Slovak": "sk",
"Telugu": "te", "Persian": "fa", "Latvian": "lv", "Bengali": "bn",
"Serbian": "sr", "Azerbaijani": "az", "Slovenian": "sl", "Kannada": "kn",
"Estonian": "et", "Macedonian": "mk", "Breton": "br", "Basque": "eu",
"Icelandic": "is", "Armenian": "hy", "Nepali": "ne", "Mongolian": "mn",
"Bosnian": "bs", "Kazakh": "kk", "Albanian": "sq", "Swahili": "sw",
"Galician": "gl", "Marathi": "mr", "Punjabi": "pa", "Sinhala": "si",
"Khmer": "km", "Shona": "sn", "Yoruba": "yo", "Somali": "so",
"Afrikaans": "af", "Occitan": "oc", "Georgian": "ka", "Belarusian": "be",
"Tajik": "tg", "Sindhi": "sd", "Gujarati": "gu", "Amharic": "am",
"Yiddish": "yi", "Lao": "lo", "Uzbek": "uz", "Faroese": "fo",
"Haitian Creole": "ht", "Pashto": "ps", "Turkmen": "tk", "Nynorsk": "nn",
"Maltese": "mt", "Sanskrit": "sa", "Luxembourgish": "lb", "Burmese": "my",
"Tibetan": "bo", "Tagalog": "tl", "Malagasy": "mg", "Assamese": "as",
"Tatar": "tt", "Hawaiian": "haw", "Lingala": "ln", "Hausa": "ha",
"Bashkir": "ba", "Javanese": "jv", "Sundanese": "su"
}
# --- Core Processing Functions ---
def time_to_seconds(time_obj):
"""Convert datetime.time object to seconds."""
return time_obj.hour * 3600 + time_obj.minute * 60 + time_obj.second + time_obj.microsecond / 1e6
def extract_audio_from_video(video_path, audio_path):
"""Extract audio from a video file and save it as a WAV file."""
try:
video = VideoFileClip(video_path)
video.audio.write_audiofile(audio_path, fps=16000, logger=None)
return audio_path
except Exception as e:
logging.error(f"Error extracting audio: {e}")
return None
def transcribe_audio_with_gemini(audio_path, source_language):
"""Transcribe audio using Gemini with a prompt for accurate timestamps."""
try:
audio_file = genai.upload_file(path=audio_path)
language_prompt = f"in {source_language}" if source_language != "Auto Detect" else ""
prompt = f"""
You are a professional transcriber. Transcribe this audio accurately and verbatim {language_prompt}.
Include timestamps for each sentence in the SRT (SubRip) format.
Example:
1
00:00:01,234 --> 00:00:05,678
This is the first sentence.
2
00:00:06,123 --> 00:00:09,456
This is the second sentence.
Ensure the timestamps are precise and correspond to the start and end of each spoken sentence.
Respond ONLY with the transcription in the SRT format. Do not add explanations or any extra text.
"""
response = model.generate_content([prompt, audio_file])
genai.delete_file(audio_file.name) # Clean up the uploaded file
return response.text.strip()
except Exception as e:
logging.error(f"Error during Gemini transcription: {e}")
return None
def translate_srt(srt_text, target_language):
"""Translate an SRT file using Gemini while preserving timestamps."""
try:
prompt = f"""
Translate the following SRT subtitles into {target_language}.
Preserve the SRT format perfectly (index numbers, timestamps, and structure).
Translate only the subtitle text on the lines after the timestamps.
Do not add any explanations or extra text. Your output must be a valid SRT file.
Here is the SRT file content:
{srt_text}
"""
response = model.generate_content(prompt)
return response.text.strip()
except Exception as e:
logging.error(f"Error during translation: {e}")
return None
def generate_tts_audio(srt_text, language, tts_audio_path):
"""Generate TTS audio from SRT text."""
try:
subtitles = list(srt.parse(srt_text))
all_text = " ".join([sub.content for sub in subtitles])
lang_code = LANGUAGE_CODES.get(language, "en")
tts = gTTS(text=all_text, lang=lang_code, slow=False)
tts.save(tts_audio_path)
return tts_audio_path
except Exception as e:
logging.error(f"Error generating TTS audio: {e}")
return None
def add_subtitles_to_video(video_path, srt_text, output_video_path):
"""Add subtitles to video and return the path to the new video."""
try:
def generator(txt):
return TextClip(txt, font='Arial-Bold', fontsize=24, color='white',
stroke_color='black', stroke_width=1)
# MoviePy's SubtitlesClip requires a file path
with tempfile.NamedTemporaryFile(mode='w', suffix='.srt', delete=False, encoding='utf-8') as temp_srt:
temp_srt.write(srt_text)
srt_path = temp_srt.name
video = VideoFileClip(video_path)
subtitles = SubtitlesClip(srt_path, generator)
result = CompositeVideoClip([video, subtitles.set_position(('center', 'bottom'))])
# Write output with original audio
result.write_videofile(output_video_path, codec='libx264', audio_codec='aac', threads=4, logger=None)
os.remove(srt_path) # Clean up temp srt file
return output_video_path
except Exception as e:
logging.error(f"Error adding subtitles to video: {e}")
return None
# --- Flask Routes ---
@app.route('/')
def index():
"""Render the main page."""
session.clear() # Clear any old data
return render_template('index.html', supported_languages=SUPPORTED_LANGUAGES)
@app.route('/process', methods=['POST'])
def process():
"""Handle the video processing request."""
if 'video' not in request.files:
flash('No video file selected. Please upload a video.', 'error')
return render_template('index.html', supported_languages=SUPPORTED_LANGUAGES)
video_file = request.files['video']
if video_file.filename == '':
flash('No video file selected. Please upload a video.', 'error')
return render_template('index.html', supported_languages=SUPPORTED_LANGUAGES)
# --- Get form options ---
source_language = request.form.get('source_language', 'Auto Detect')
translate_to = request.form.get('translate_to', 'None')
add_tts = 'add_tts' in request.form
add_subtitles = 'add_subtitles' in request.form
# --- Setup a unique session directory for this request ---
session_id = str(uuid.uuid4())
session['session_id'] = session_id
session_dir = os.path.join(app.config['UPLOAD_FOLDER'], session_id)
os.makedirs(session_dir, exist_ok=True)
filename = secure_filename(video_file.filename)
video_path = os.path.join(session_dir, filename)
video_file.save(video_path)
results = {}
# 1. Extract Audio
audio_path = os.path.join(session_dir, "extracted_audio.wav")
if not extract_audio_from_video(video_path, audio_path):
flash('Failed to extract audio from the video.', 'error')
return render_template('index.html', supported_languages=SUPPORTED_LANGUAGES)
# 2. Transcribe Audio
original_srt_text = transcribe_audio_with_gemini(audio_path, source_language)
if not original_srt_text:
flash('Failed to transcribe the audio. The API call might have failed.', 'error')
return render_template('index.html', supported_languages=SUPPORTED_LANGUAGES)
original_srt_path = os.path.join(session_dir, "original_subtitles.srt")
with open(original_srt_path, "w", encoding="utf-8") as f:
f.write(original_srt_text)
results['original_srt_file'] = "original_subtitles.srt"
# Keep track of the final SRT to use for TTS and video burn-in
final_srt_text = original_srt_text
# 3. Translate Subtitles (if requested)
if translate_to and translate_to != "None":
translated_srt_text = translate_srt(original_srt_text, translate_to)
if translated_srt_text:
translated_srt_path = os.path.join(session_dir, "translated_subtitles.srt")
with open(translated_srt_path, "w", encoding="utf-8") as f:
f.write(translated_srt_text)
results['translated_srt_file'] = "translated_subtitles.srt"
final_srt_text = translated_srt_text # Use translated text for next steps
else:
flash(f'Failed to translate subtitles to {translate_to}.', 'warning')
# 4. Generate TTS Audio (if requested)
if add_tts:
tts_lang = translate_to if translate_to and translate_to != "None" else source_language
# If source was auto-detect, we can't reliably guess the TTS language. Default to English.
if tts_lang == 'Auto Detect':
flash('TTS language cannot be "Auto Detect". Defaulting to English. For better results, please specify the source language.', 'warning')
tts_lang = 'English'
tts_audio_path = os.path.join(session_dir, "tts_audio.mp3")
if generate_tts_audio(final_srt_text, tts_lang, tts_audio_path):
results['tts_audio_file'] = "tts_audio.mp3"
else:
flash('Failed to generate Text-to-Speech audio.', 'warning')
# 5. Add Subtitles to Video (if requested)
if add_subtitles:
output_video_path = os.path.join(session_dir, "output_video.mp4")
if add_subtitles_to_video(video_path, final_srt_text, output_video_path):
results['output_video_file'] = "output_video.mp4"
else:
flash('Failed to add subtitles to the video.', 'warning')
# Clean up original extracted audio
os.remove(audio_path)
return render_template('index.html',
supported_languages=SUPPORTED_LANGUAGES,
results=results,
session_id=session_id)
@app.route('/download/<session_id>/<path:filename>')
def download_file(session_id, filename):
"""Serve files from the session directory for download."""
directory = os.path.join(app.config['UPLOAD_FOLDER'], session_id)
return send_from_directory(directory, filename, as_attachment=True)
# --- Run the App ---
if __name__ == '__main__':
app.run(host="0.0.0.0", port=7860) |