Spaces:
Runtime error
Runtime error
import os | |
import google.generativeai as genai | |
from moviepy.editor import VideoFileClip, AudioFileClip, CompositeVideoClip, TextClip | |
from moviepy.video.tools.subtitles import SubtitlesClip | |
import tempfile | |
import logging | |
import srt | |
import datetime | |
from gtts import gTTS | |
from flask import Flask, request, render_template, send_from_directory, url_for, flash, session | |
from werkzeug.utils import secure_filename | |
import uuid | |
# --- Configuration --- | |
# Suppress moviepy logs | |
logging.getLogger("moviepy").setLevel(logging.ERROR) | |
# Configure Gemini API | |
# IMPORTANT: Set your GEMINI_API_KEY as an environment variable | |
# For example, in your terminal: export GEMINI_API_KEY="YOUR_API_KEY" | |
try: | |
genai.configure(api_key=os.environ["GEMINI_API_KEY"]) | |
except KeyError: | |
raise Exception("GEMINI_API_KEY environment variable not set. Please set it before running the app.") | |
# --- Flask App Initialization --- | |
app = Flask(__name__) | |
app.config['SECRET_KEY'] = os.urandom(24) | |
app.config['UPLOAD_FOLDER'] = os.path.join(os.getcwd(), 'temp_uploads') | |
# Ensure the upload folder exists | |
os.makedirs(app.config['UPLOAD_FOLDER'], exist_ok=True) | |
# --- Model and Language Configuration --- | |
# Create the Gemini model | |
generation_config = { | |
"temperature": 0.7, | |
"top_p": 0.9, | |
"top_k": 40, | |
"max_output_tokens": 8192, | |
"response_mime_type": "text/plain", | |
} | |
model = genai.GenerativeModel( | |
model_name="gemini-1.5-pro-latest", # Using a stable and capable model | |
generation_config=generation_config, | |
) | |
# List of all supported languages | |
SUPPORTED_LANGUAGES = [ | |
"Auto Detect", "English", "Chinese", "German", "Spanish", "Russian", "Korean", | |
"French", "Japanese", "Portuguese", "Turkish", "Polish", "Catalan", "Dutch", | |
"Arabic", "Swedish", "Italian", "Indonesian", "Hindi", "Finnish", "Vietnamese", | |
"Hebrew", "Ukrainian", "Greek", "Malay", "Czech", "Romanian", "Danish", | |
"Hungarian", "Tamil", "Norwegian", "Thai", "Urdu", "Croatian", "Bulgarian", | |
"Lithuanian", "Latin", "Maori", "Malayalam", "Welsh", "Slovak", "Telugu", | |
"Persian", "Latvian", "Bengali", "Serbian", "Azerbaijani", "Slovenian", | |
"Kannada", "Estonian", "Macedonian", "Breton", "Basque", "Icelandic", | |
"Armenian", "Nepali", "Mongolian", "Bosnian", "Kazakh", "Albanian", | |
"Swahili", "Galician", "Marathi", "Punjabi", "Sinhala", "Khmer", "Shona", | |
"Yoruba", "Somali", "Afrikaans", "Occitan", "Georgian", "Belarusian", | |
"Tajik", "Sindhi", "Gujarati", "Amharic", "Yiddish", "Lao", "Uzbek", | |
"Faroese", "Haitian Creole", "Pashto", "Turkmen", "Nynorsk", "Maltese", | |
"Sanskrit", "Luxembourgish", "Burmese", "Tibetan", "Tagalog", "Malagasy", | |
"Assamese", "Tatar", "Hawaiian", "Lingala", "Hausa", "Bashkir", "Javanese", | |
"Sundanese" | |
] | |
# Language code mapping for gTTS | |
LANGUAGE_CODES = { | |
"English": "en", "Chinese": "zh", "German": "de", "Spanish": "es", | |
"Russian": "ru", "Korean": "ko", "French": "fr", "Japanese": "ja", | |
"Portuguese": "pt", "Turkish": "tr", "Polish": "pl", "Catalan": "ca", | |
"Dutch": "nl", "Arabic": "ar", "Swedish": "sv", "Italian": "it", | |
"Indonesian": "id", "Hindi": "hi", "Finnish": "fi", "Vietnamese": "vi", | |
"Hebrew": "he", "Ukrainian": "uk", "Greek": "el", "Malay": "ms", | |
"Czech": "cs", "Romanian": "ro", "Danish": "da", "Hungarian": "hu", | |
"Tamil": "ta", "Norwegian": "no", "Thai": "th", "Urdu": "ur", | |
"Croatian": "hr", "Bulgarian": "bg", "Lithuanian": "lt", "Latin": "la", | |
"Maori": "mi", "Malayalam": "ml", "Welsh": "cy", "Slovak": "sk", | |
"Telugu": "te", "Persian": "fa", "Latvian": "lv", "Bengali": "bn", | |
"Serbian": "sr", "Azerbaijani": "az", "Slovenian": "sl", "Kannada": "kn", | |
"Estonian": "et", "Macedonian": "mk", "Breton": "br", "Basque": "eu", | |
"Icelandic": "is", "Armenian": "hy", "Nepali": "ne", "Mongolian": "mn", | |
"Bosnian": "bs", "Kazakh": "kk", "Albanian": "sq", "Swahili": "sw", | |
"Galician": "gl", "Marathi": "mr", "Punjabi": "pa", "Sinhala": "si", | |
"Khmer": "km", "Shona": "sn", "Yoruba": "yo", "Somali": "so", | |
"Afrikaans": "af", "Occitan": "oc", "Georgian": "ka", "Belarusian": "be", | |
"Tajik": "tg", "Sindhi": "sd", "Gujarati": "gu", "Amharic": "am", | |
"Yiddish": "yi", "Lao": "lo", "Uzbek": "uz", "Faroese": "fo", | |
"Haitian Creole": "ht", "Pashto": "ps", "Turkmen": "tk", "Nynorsk": "nn", | |
"Maltese": "mt", "Sanskrit": "sa", "Luxembourgish": "lb", "Burmese": "my", | |
"Tibetan": "bo", "Tagalog": "tl", "Malagasy": "mg", "Assamese": "as", | |
"Tatar": "tt", "Hawaiian": "haw", "Lingala": "ln", "Hausa": "ha", | |
"Bashkir": "ba", "Javanese": "jv", "Sundanese": "su" | |
} | |
# --- Core Processing Functions --- | |
def time_to_seconds(time_obj): | |
"""Convert datetime.time object to seconds.""" | |
return time_obj.hour * 3600 + time_obj.minute * 60 + time_obj.second + time_obj.microsecond / 1e6 | |
def extract_audio_from_video(video_path, audio_path): | |
"""Extract audio from a video file and save it as a WAV file.""" | |
try: | |
video = VideoFileClip(video_path) | |
video.audio.write_audiofile(audio_path, fps=16000, logger=None) | |
return audio_path | |
except Exception as e: | |
logging.error(f"Error extracting audio: {e}") | |
return None | |
def transcribe_audio_with_gemini(audio_path, source_language): | |
"""Transcribe audio using Gemini with a prompt for accurate timestamps.""" | |
try: | |
audio_file = genai.upload_file(path=audio_path) | |
language_prompt = f"in {source_language}" if source_language != "Auto Detect" else "" | |
prompt = f""" | |
You are a professional transcriber. Transcribe this audio accurately and verbatim {language_prompt}. | |
Include timestamps for each sentence in the SRT (SubRip) format. | |
Example: | |
1 | |
00:00:01,234 --> 00:00:05,678 | |
This is the first sentence. | |
2 | |
00:00:06,123 --> 00:00:09,456 | |
This is the second sentence. | |
Ensure the timestamps are precise and correspond to the start and end of each spoken sentence. | |
Respond ONLY with the transcription in the SRT format. Do not add explanations or any extra text. | |
""" | |
response = model.generate_content([prompt, audio_file]) | |
genai.delete_file(audio_file.name) # Clean up the uploaded file | |
return response.text.strip() | |
except Exception as e: | |
logging.error(f"Error during Gemini transcription: {e}") | |
return None | |
def translate_srt(srt_text, target_language): | |
"""Translate an SRT file using Gemini while preserving timestamps.""" | |
try: | |
prompt = f""" | |
Translate the following SRT subtitles into {target_language}. | |
Preserve the SRT format perfectly (index numbers, timestamps, and structure). | |
Translate only the subtitle text on the lines after the timestamps. | |
Do not add any explanations or extra text. Your output must be a valid SRT file. | |
Here is the SRT file content: | |
{srt_text} | |
""" | |
response = model.generate_content(prompt) | |
return response.text.strip() | |
except Exception as e: | |
logging.error(f"Error during translation: {e}") | |
return None | |
def generate_tts_audio(srt_text, language, tts_audio_path): | |
"""Generate TTS audio from SRT text.""" | |
try: | |
subtitles = list(srt.parse(srt_text)) | |
all_text = " ".join([sub.content for sub in subtitles]) | |
lang_code = LANGUAGE_CODES.get(language, "en") | |
tts = gTTS(text=all_text, lang=lang_code, slow=False) | |
tts.save(tts_audio_path) | |
return tts_audio_path | |
except Exception as e: | |
logging.error(f"Error generating TTS audio: {e}") | |
return None | |
def add_subtitles_to_video(video_path, srt_text, output_video_path): | |
"""Add subtitles to video and return the path to the new video.""" | |
try: | |
def generator(txt): | |
return TextClip(txt, font='Arial-Bold', fontsize=24, color='white', | |
stroke_color='black', stroke_width=1) | |
# MoviePy's SubtitlesClip requires a file path | |
with tempfile.NamedTemporaryFile(mode='w', suffix='.srt', delete=False, encoding='utf-8') as temp_srt: | |
temp_srt.write(srt_text) | |
srt_path = temp_srt.name | |
video = VideoFileClip(video_path) | |
subtitles = SubtitlesClip(srt_path, generator) | |
result = CompositeVideoClip([video, subtitles.set_position(('center', 'bottom'))]) | |
# Write output with original audio | |
result.write_videofile(output_video_path, codec='libx264', audio_codec='aac', threads=4, logger=None) | |
os.remove(srt_path) # Clean up temp srt file | |
return output_video_path | |
except Exception as e: | |
logging.error(f"Error adding subtitles to video: {e}") | |
return None | |
# --- Flask Routes --- | |
def index(): | |
"""Render the main page.""" | |
session.clear() # Clear any old data | |
return render_template('index.html', supported_languages=SUPPORTED_LANGUAGES) | |
def process(): | |
"""Handle the video processing request.""" | |
if 'video' not in request.files: | |
flash('No video file selected. Please upload a video.', 'error') | |
return render_template('index.html', supported_languages=SUPPORTED_LANGUAGES) | |
video_file = request.files['video'] | |
if video_file.filename == '': | |
flash('No video file selected. Please upload a video.', 'error') | |
return render_template('index.html', supported_languages=SUPPORTED_LANGUAGES) | |
# --- Get form options --- | |
source_language = request.form.get('source_language', 'Auto Detect') | |
translate_to = request.form.get('translate_to', 'None') | |
add_tts = 'add_tts' in request.form | |
add_subtitles = 'add_subtitles' in request.form | |
# --- Setup a unique session directory for this request --- | |
session_id = str(uuid.uuid4()) | |
session['session_id'] = session_id | |
session_dir = os.path.join(app.config['UPLOAD_FOLDER'], session_id) | |
os.makedirs(session_dir, exist_ok=True) | |
filename = secure_filename(video_file.filename) | |
video_path = os.path.join(session_dir, filename) | |
video_file.save(video_path) | |
results = {} | |
# 1. Extract Audio | |
audio_path = os.path.join(session_dir, "extracted_audio.wav") | |
if not extract_audio_from_video(video_path, audio_path): | |
flash('Failed to extract audio from the video.', 'error') | |
return render_template('index.html', supported_languages=SUPPORTED_LANGUAGES) | |
# 2. Transcribe Audio | |
original_srt_text = transcribe_audio_with_gemini(audio_path, source_language) | |
if not original_srt_text: | |
flash('Failed to transcribe the audio. The API call might have failed.', 'error') | |
return render_template('index.html', supported_languages=SUPPORTED_LANGUAGES) | |
original_srt_path = os.path.join(session_dir, "original_subtitles.srt") | |
with open(original_srt_path, "w", encoding="utf-8") as f: | |
f.write(original_srt_text) | |
results['original_srt_file'] = "original_subtitles.srt" | |
# Keep track of the final SRT to use for TTS and video burn-in | |
final_srt_text = original_srt_text | |
# 3. Translate Subtitles (if requested) | |
if translate_to and translate_to != "None": | |
translated_srt_text = translate_srt(original_srt_text, translate_to) | |
if translated_srt_text: | |
translated_srt_path = os.path.join(session_dir, "translated_subtitles.srt") | |
with open(translated_srt_path, "w", encoding="utf-8") as f: | |
f.write(translated_srt_text) | |
results['translated_srt_file'] = "translated_subtitles.srt" | |
final_srt_text = translated_srt_text # Use translated text for next steps | |
else: | |
flash(f'Failed to translate subtitles to {translate_to}.', 'warning') | |
# 4. Generate TTS Audio (if requested) | |
if add_tts: | |
tts_lang = translate_to if translate_to and translate_to != "None" else source_language | |
# If source was auto-detect, we can't reliably guess the TTS language. Default to English. | |
if tts_lang == 'Auto Detect': | |
flash('TTS language cannot be "Auto Detect". Defaulting to English. For better results, please specify the source language.', 'warning') | |
tts_lang = 'English' | |
tts_audio_path = os.path.join(session_dir, "tts_audio.mp3") | |
if generate_tts_audio(final_srt_text, tts_lang, tts_audio_path): | |
results['tts_audio_file'] = "tts_audio.mp3" | |
else: | |
flash('Failed to generate Text-to-Speech audio.', 'warning') | |
# 5. Add Subtitles to Video (if requested) | |
if add_subtitles: | |
output_video_path = os.path.join(session_dir, "output_video.mp4") | |
if add_subtitles_to_video(video_path, final_srt_text, output_video_path): | |
results['output_video_file'] = "output_video.mp4" | |
else: | |
flash('Failed to add subtitles to the video.', 'warning') | |
# Clean up original extracted audio | |
os.remove(audio_path) | |
return render_template('index.html', | |
supported_languages=SUPPORTED_LANGUAGES, | |
results=results, | |
session_id=session_id) | |
def download_file(session_id, filename): | |
"""Serve files from the session directory for download.""" | |
directory = os.path.join(app.config['UPLOAD_FOLDER'], session_id) | |
return send_from_directory(directory, filename, as_attachment=True) | |
# --- Run the App --- | |
if __name__ == '__main__': | |
app.run(host="0.0.0.0", port=7860) |