Spaces:

Athspi-ai
/

AutoSubGen

Runtime error

App Files Files Community

AutoSubGen / app.py

Athspi

Update app.py

7ad479d verified 15 days ago

raw

history blame

13.7 kB

	import os
	import google.generativeai as genai
	from moviepy.editor import VideoFileClip, AudioFileClip, CompositeVideoClip, TextClip
	from moviepy.video.tools.subtitles import SubtitlesClip
	import tempfile
	import logging
	import srt
	import datetime
	from gtts import gTTS
	from flask import Flask, request, render_template, send_from_directory, url_for, flash, session
	from werkzeug.utils import secure_filename
	import uuid

	# --- Configuration ---

	# Suppress moviepy logs
	logging.getLogger("moviepy").setLevel(logging.ERROR)

	# Configure Gemini API
	# IMPORTANT: Set your GEMINI_API_KEY as an environment variable
	# For example, in your terminal: export GEMINI_API_KEY="YOUR_API_KEY"
	try:
	genai.configure(api_key=os.environ["GEMINI_API_KEY"])
	except KeyError:
	raise Exception("GEMINI_API_KEY environment variable not set. Please set it before running the app.")


	# --- Flask App Initialization ---
	app = Flask(__name__)
	app.config['SECRET_KEY'] = os.urandom(24)
	app.config['UPLOAD_FOLDER'] = os.path.join(os.getcwd(), 'temp_uploads')

	# Ensure the upload folder exists
	os.makedirs(app.config['UPLOAD_FOLDER'], exist_ok=True)


	# --- Model and Language Configuration ---

	# Create the Gemini model
	generation_config = {
	"temperature": 0.7,
	"top_p": 0.9,
	"top_k": 40,
	"max_output_tokens": 8192,
	"response_mime_type": "text/plain",
	}

	model = genai.GenerativeModel(
	model_name="gemini-1.5-pro-latest", # Using a stable and capable model
	generation_config=generation_config,
	)

	# List of all supported languages
	SUPPORTED_LANGUAGES = [
	"Auto Detect", "English", "Chinese", "German", "Spanish", "Russian", "Korean",
	"French", "Japanese", "Portuguese", "Turkish", "Polish", "Catalan", "Dutch",
	"Arabic", "Swedish", "Italian", "Indonesian", "Hindi", "Finnish", "Vietnamese",
	"Hebrew", "Ukrainian", "Greek", "Malay", "Czech", "Romanian", "Danish",
	"Hungarian", "Tamil", "Norwegian", "Thai", "Urdu", "Croatian", "Bulgarian",
	"Lithuanian", "Latin", "Maori", "Malayalam", "Welsh", "Slovak", "Telugu",
	"Persian", "Latvian", "Bengali", "Serbian", "Azerbaijani", "Slovenian",
	"Kannada", "Estonian", "Macedonian", "Breton", "Basque", "Icelandic",
	"Armenian", "Nepali", "Mongolian", "Bosnian", "Kazakh", "Albanian",
	"Swahili", "Galician", "Marathi", "Punjabi", "Sinhala", "Khmer", "Shona",
	"Yoruba", "Somali", "Afrikaans", "Occitan", "Georgian", "Belarusian",
	"Tajik", "Sindhi", "Gujarati", "Amharic", "Yiddish", "Lao", "Uzbek",
	"Faroese", "Haitian Creole", "Pashto", "Turkmen", "Nynorsk", "Maltese",
	"Sanskrit", "Luxembourgish", "Burmese", "Tibetan", "Tagalog", "Malagasy",
	"Assamese", "Tatar", "Hawaiian", "Lingala", "Hausa", "Bashkir", "Javanese",
	"Sundanese"
	]

	# Language code mapping for gTTS
	LANGUAGE_CODES = {
	"English": "en", "Chinese": "zh", "German": "de", "Spanish": "es",
	"Russian": "ru", "Korean": "ko", "French": "fr", "Japanese": "ja",
	"Portuguese": "pt", "Turkish": "tr", "Polish": "pl", "Catalan": "ca",
	"Dutch": "nl", "Arabic": "ar", "Swedish": "sv", "Italian": "it",
	"Indonesian": "id", "Hindi": "hi", "Finnish": "fi", "Vietnamese": "vi",
	"Hebrew": "he", "Ukrainian": "uk", "Greek": "el", "Malay": "ms",
	"Czech": "cs", "Romanian": "ro", "Danish": "da", "Hungarian": "hu",
	"Tamil": "ta", "Norwegian": "no", "Thai": "th", "Urdu": "ur",
	"Croatian": "hr", "Bulgarian": "bg", "Lithuanian": "lt", "Latin": "la",
	"Maori": "mi", "Malayalam": "ml", "Welsh": "cy", "Slovak": "sk",
	"Telugu": "te", "Persian": "fa", "Latvian": "lv", "Bengali": "bn",
	"Serbian": "sr", "Azerbaijani": "az", "Slovenian": "sl", "Kannada": "kn",
	"Estonian": "et", "Macedonian": "mk", "Breton": "br", "Basque": "eu",
	"Icelandic": "is", "Armenian": "hy", "Nepali": "ne", "Mongolian": "mn",
	"Bosnian": "bs", "Kazakh": "kk", "Albanian": "sq", "Swahili": "sw",
	"Galician": "gl", "Marathi": "mr", "Punjabi": "pa", "Sinhala": "si",
	"Khmer": "km", "Shona": "sn", "Yoruba": "yo", "Somali": "so",
	"Afrikaans": "af", "Occitan": "oc", "Georgian": "ka", "Belarusian": "be",
	"Tajik": "tg", "Sindhi": "sd", "Gujarati": "gu", "Amharic": "am",
	"Yiddish": "yi", "Lao": "lo", "Uzbek": "uz", "Faroese": "fo",
	"Haitian Creole": "ht", "Pashto": "ps", "Turkmen": "tk", "Nynorsk": "nn",
	"Maltese": "mt", "Sanskrit": "sa", "Luxembourgish": "lb", "Burmese": "my",
	"Tibetan": "bo", "Tagalog": "tl", "Malagasy": "mg", "Assamese": "as",
	"Tatar": "tt", "Hawaiian": "haw", "Lingala": "ln", "Hausa": "ha",
	"Bashkir": "ba", "Javanese": "jv", "Sundanese": "su"
	}


	# --- Core Processing Functions ---

	def time_to_seconds(time_obj):
	"""Convert datetime.time object to seconds."""
	return time_obj.hour * 3600 + time_obj.minute * 60 + time_obj.second + time_obj.microsecond / 1e6

	def extract_audio_from_video(video_path, audio_path):
	"""Extract audio from a video file and save it as a WAV file."""
	try:
	video = VideoFileClip(video_path)
	video.audio.write_audiofile(audio_path, fps=16000, logger=None)
	return audio_path
	except Exception as e:
	logging.error(f"Error extracting audio: {e}")
	return None

	def transcribe_audio_with_gemini(audio_path, source_language):
	"""Transcribe audio using Gemini with a prompt for accurate timestamps."""
	try:
	audio_file = genai.upload_file(path=audio_path)
	language_prompt = f"in {source_language}" if source_language != "Auto Detect" else ""

	prompt = f"""
	You are a professional transcriber. Transcribe this audio accurately and verbatim {language_prompt}.
	Include timestamps for each sentence in the SRT (SubRip) format.
	Example:
	1
	00:00:01,234 --> 00:00:05,678
	This is the first sentence.

	2
	00:00:06,123 --> 00:00:09,456
	This is the second sentence.

	Ensure the timestamps are precise and correspond to the start and end of each spoken sentence.
	Respond ONLY with the transcription in the SRT format. Do not add explanations or any extra text.
	"""

	response = model.generate_content([prompt, audio_file])
	genai.delete_file(audio_file.name) # Clean up the uploaded file
	return response.text.strip()
	except Exception as e:
	logging.error(f"Error during Gemini transcription: {e}")
	return None


	def translate_srt(srt_text, target_language):
	"""Translate an SRT file using Gemini while preserving timestamps."""
	try:
	prompt = f"""
	Translate the following SRT subtitles into {target_language}.
	Preserve the SRT format perfectly (index numbers, timestamps, and structure).
	Translate only the subtitle text on the lines after the timestamps.
	Do not add any explanations or extra text. Your output must be a valid SRT file.
	Here is the SRT file content:
	{srt_text}
	"""
	response = model.generate_content(prompt)
	return response.text.strip()
	except Exception as e:
	logging.error(f"Error during translation: {e}")
	return None

	def generate_tts_audio(srt_text, language, tts_audio_path):
	"""Generate TTS audio from SRT text."""
	try:
	subtitles = list(srt.parse(srt_text))
	all_text = " ".join([sub.content for sub in subtitles])

	lang_code = LANGUAGE_CODES.get(language, "en")

	tts = gTTS(text=all_text, lang=lang_code, slow=False)
	tts.save(tts_audio_path)
	return tts_audio_path
	except Exception as e:
	logging.error(f"Error generating TTS audio: {e}")
	return None

	def add_subtitles_to_video(video_path, srt_text, output_video_path):
	"""Add subtitles to video and return the path to the new video."""
	try:
	def generator(txt):
	return TextClip(txt, font='Arial-Bold', fontsize=24, color='white',
	stroke_color='black', stroke_width=1)

	# MoviePy's SubtitlesClip requires a file path
	with tempfile.NamedTemporaryFile(mode='w', suffix='.srt', delete=False, encoding='utf-8') as temp_srt:
	temp_srt.write(srt_text)
	srt_path = temp_srt.name

	video = VideoFileClip(video_path)
	subtitles = SubtitlesClip(srt_path, generator)

	result = CompositeVideoClip([video, subtitles.set_position(('center', 'bottom'))])

	# Write output with original audio
	result.write_videofile(output_video_path, codec='libx264', audio_codec='aac', threads=4, logger=None)

	os.remove(srt_path) # Clean up temp srt file
	return output_video_path
	except Exception as e:
	logging.error(f"Error adding subtitles to video: {e}")
	return None


	# --- Flask Routes ---

	@app.route('/')
	def index():
	"""Render the main page."""
	session.clear() # Clear any old data
	return render_template('index.html', supported_languages=SUPPORTED_LANGUAGES)

	@app.route('/process', methods=['POST'])
	def process():
	"""Handle the video processing request."""
	if 'video' not in request.files:
	flash('No video file selected. Please upload a video.', 'error')
	return render_template('index.html', supported_languages=SUPPORTED_LANGUAGES)

	video_file = request.files['video']
	if video_file.filename == '':
	flash('No video file selected. Please upload a video.', 'error')
	return render_template('index.html', supported_languages=SUPPORTED_LANGUAGES)

	# --- Get form options ---
	source_language = request.form.get('source_language', 'Auto Detect')
	translate_to = request.form.get('translate_to', 'None')
	add_tts = 'add_tts' in request.form
	add_subtitles = 'add_subtitles' in request.form

	# --- Setup a unique session directory for this request ---
	session_id = str(uuid.uuid4())
	session['session_id'] = session_id
	session_dir = os.path.join(app.config['UPLOAD_FOLDER'], session_id)
	os.makedirs(session_dir, exist_ok=True)

	filename = secure_filename(video_file.filename)
	video_path = os.path.join(session_dir, filename)
	video_file.save(video_path)

	results = {}

	# 1. Extract Audio
	audio_path = os.path.join(session_dir, "extracted_audio.wav")
	if not extract_audio_from_video(video_path, audio_path):
	flash('Failed to extract audio from the video.', 'error')
	return render_template('index.html', supported_languages=SUPPORTED_LANGUAGES)

	# 2. Transcribe Audio
	original_srt_text = transcribe_audio_with_gemini(audio_path, source_language)
	if not original_srt_text:
	flash('Failed to transcribe the audio. The API call might have failed.', 'error')
	return render_template('index.html', supported_languages=SUPPORTED_LANGUAGES)

	original_srt_path = os.path.join(session_dir, "original_subtitles.srt")
	with open(original_srt_path, "w", encoding="utf-8") as f:
	f.write(original_srt_text)
	results['original_srt_file'] = "original_subtitles.srt"

	# Keep track of the final SRT to use for TTS and video burn-in
	final_srt_text = original_srt_text

	# 3. Translate Subtitles (if requested)
	if translate_to and translate_to != "None":
	translated_srt_text = translate_srt(original_srt_text, translate_to)
	if translated_srt_text:
	translated_srt_path = os.path.join(session_dir, "translated_subtitles.srt")
	with open(translated_srt_path, "w", encoding="utf-8") as f:
	f.write(translated_srt_text)
	results['translated_srt_file'] = "translated_subtitles.srt"
	final_srt_text = translated_srt_text # Use translated text for next steps
	else:
	flash(f'Failed to translate subtitles to {translate_to}.', 'warning')

	# 4. Generate TTS Audio (if requested)
	if add_tts:
	tts_lang = translate_to if translate_to and translate_to != "None" else source_language
	# If source was auto-detect, we can't reliably guess the TTS language. Default to English.
	if tts_lang == 'Auto Detect':
	flash('TTS language cannot be "Auto Detect". Defaulting to English. For better results, please specify the source language.', 'warning')
	tts_lang = 'English'

	tts_audio_path = os.path.join(session_dir, "tts_audio.mp3")
	if generate_tts_audio(final_srt_text, tts_lang, tts_audio_path):
	results['tts_audio_file'] = "tts_audio.mp3"
	else:
	flash('Failed to generate Text-to-Speech audio.', 'warning')

	# 5. Add Subtitles to Video (if requested)
	if add_subtitles:
	output_video_path = os.path.join(session_dir, "output_video.mp4")
	if add_subtitles_to_video(video_path, final_srt_text, output_video_path):
	results['output_video_file'] = "output_video.mp4"
	else:
	flash('Failed to add subtitles to the video.', 'warning')

	# Clean up original extracted audio
	os.remove(audio_path)

	return render_template('index.html',
	supported_languages=SUPPORTED_LANGUAGES,
	results=results,
	session_id=session_id)

	@app.route('/download/<session_id>/<path:filename>')
	def download_file(session_id, filename):
	"""Serve files from the session directory for download."""
	directory = os.path.join(app.config['UPLOAD_FOLDER'], session_id)
	return send_from_directory(directory, filename, as_attachment=True)


	# --- Run the App ---
	if __name__ == '__main__':
	app.run(host="0.0.0.0", port=7860)