File size: 13,670 Bytes
91f8d48
e6d59c3
1b72949
86417c2
e6d59c3
91f8d48
86417c2
1b72949
 
 
 
 
 
 
91f8d48
 
 
 
 
1b72949
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
818e336
e4d42f1
 
86417c2
e4d42f1
 
 
 
 
 
 
1b72949
e4d42f1
 
 
 
91f8d48
86417c2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e4d42f1
91f8d48
 
86417c2
 
1b72949
86417c2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1b72949
 
 
 
 
 
 
 
e4d42f1
1b72949
 
 
 
 
 
 
 
 
 
 
 
 
224f399
1b72949
 
 
 
 
 
 
 
 
 
 
e4d42f1
1b72949
 
 
 
 
 
 
 
 
 
c463c7e
 
e4d42f1
1b72949
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
86417c2
1b72949
 
 
 
 
 
 
 
 
 
 
 
86417c2
1b72949
86417c2
1b72949
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43fec16
1b72949
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43fec16
1b72949
 
 
 
e4d42f1
1b72949
 
 
 
 
 
 
 
 
91f8d48
1b72949
86417c2
1b72949
 
 
 
 
 
 
 
 
 
 
86417c2
1b72949
86417c2
1b72949
 
 
 
 
 
 
 
91f8d48
1b72949
 
 
 
 
 
 
 
 
 
 
 
 
 
7ad479d
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
import os
import google.generativeai as genai
from moviepy.editor import VideoFileClip, AudioFileClip, CompositeVideoClip, TextClip
from moviepy.video.tools.subtitles import SubtitlesClip
import tempfile
import logging
import srt
import datetime
from gtts import gTTS
from flask import Flask, request, render_template, send_from_directory, url_for, flash, session
from werkzeug.utils import secure_filename
import uuid

# --- Configuration ---

# Suppress moviepy logs
logging.getLogger("moviepy").setLevel(logging.ERROR)

# Configure Gemini API
# IMPORTANT: Set your GEMINI_API_KEY as an environment variable
# For example, in your terminal: export GEMINI_API_KEY="YOUR_API_KEY"
try:
    genai.configure(api_key=os.environ["GEMINI_API_KEY"])
except KeyError:
    raise Exception("GEMINI_API_KEY environment variable not set. Please set it before running the app.")


# --- Flask App Initialization ---
app = Flask(__name__)
app.config['SECRET_KEY'] = os.urandom(24)
app.config['UPLOAD_FOLDER'] = os.path.join(os.getcwd(), 'temp_uploads')

# Ensure the upload folder exists
os.makedirs(app.config['UPLOAD_FOLDER'], exist_ok=True)


# --- Model and Language Configuration ---

# Create the Gemini model
generation_config = {
    "temperature": 0.7,
    "top_p": 0.9,
    "top_k": 40,
    "max_output_tokens": 8192,
    "response_mime_type": "text/plain",
}

model = genai.GenerativeModel(
    model_name="gemini-1.5-pro-latest", # Using a stable and capable model
    generation_config=generation_config,
)

# List of all supported languages
SUPPORTED_LANGUAGES = [
    "Auto Detect", "English", "Chinese", "German", "Spanish", "Russian", "Korean",
    "French", "Japanese", "Portuguese", "Turkish", "Polish", "Catalan", "Dutch",
    "Arabic", "Swedish", "Italian", "Indonesian", "Hindi", "Finnish", "Vietnamese",
    "Hebrew", "Ukrainian", "Greek", "Malay", "Czech", "Romanian", "Danish",
    "Hungarian", "Tamil", "Norwegian", "Thai", "Urdu", "Croatian", "Bulgarian",
    "Lithuanian", "Latin", "Maori", "Malayalam", "Welsh", "Slovak", "Telugu",
    "Persian", "Latvian", "Bengali", "Serbian", "Azerbaijani", "Slovenian",
    "Kannada", "Estonian", "Macedonian", "Breton", "Basque", "Icelandic",
    "Armenian", "Nepali", "Mongolian", "Bosnian", "Kazakh", "Albanian",
    "Swahili", "Galician", "Marathi", "Punjabi", "Sinhala", "Khmer", "Shona",
    "Yoruba", "Somali", "Afrikaans", "Occitan", "Georgian", "Belarusian",
    "Tajik", "Sindhi", "Gujarati", "Amharic", "Yiddish", "Lao", "Uzbek",
    "Faroese", "Haitian Creole", "Pashto", "Turkmen", "Nynorsk", "Maltese",
    "Sanskrit", "Luxembourgish", "Burmese", "Tibetan", "Tagalog", "Malagasy",
    "Assamese", "Tatar", "Hawaiian", "Lingala", "Hausa", "Bashkir", "Javanese",
    "Sundanese"
]

# Language code mapping for gTTS
LANGUAGE_CODES = {
    "English": "en", "Chinese": "zh", "German": "de", "Spanish": "es",
    "Russian": "ru", "Korean": "ko", "French": "fr", "Japanese": "ja",
    "Portuguese": "pt", "Turkish": "tr", "Polish": "pl", "Catalan": "ca",
    "Dutch": "nl", "Arabic": "ar", "Swedish": "sv", "Italian": "it",
    "Indonesian": "id", "Hindi": "hi", "Finnish": "fi", "Vietnamese": "vi",
    "Hebrew": "he", "Ukrainian": "uk", "Greek": "el", "Malay": "ms",
    "Czech": "cs", "Romanian": "ro", "Danish": "da", "Hungarian": "hu",
    "Tamil": "ta", "Norwegian": "no", "Thai": "th", "Urdu": "ur",
    "Croatian": "hr", "Bulgarian": "bg", "Lithuanian": "lt", "Latin": "la",
    "Maori": "mi", "Malayalam": "ml", "Welsh": "cy", "Slovak": "sk",
    "Telugu": "te", "Persian": "fa", "Latvian": "lv", "Bengali": "bn",
    "Serbian": "sr", "Azerbaijani": "az", "Slovenian": "sl", "Kannada": "kn",
    "Estonian": "et", "Macedonian": "mk", "Breton": "br", "Basque": "eu",
    "Icelandic": "is", "Armenian": "hy", "Nepali": "ne", "Mongolian": "mn",
    "Bosnian": "bs", "Kazakh": "kk", "Albanian": "sq", "Swahili": "sw",
    "Galician": "gl", "Marathi": "mr", "Punjabi": "pa", "Sinhala": "si",
    "Khmer": "km", "Shona": "sn", "Yoruba": "yo", "Somali": "so",
    "Afrikaans": "af", "Occitan": "oc", "Georgian": "ka", "Belarusian": "be",
    "Tajik": "tg", "Sindhi": "sd", "Gujarati": "gu", "Amharic": "am",
    "Yiddish": "yi", "Lao": "lo", "Uzbek": "uz", "Faroese": "fo",
    "Haitian Creole": "ht", "Pashto": "ps", "Turkmen": "tk", "Nynorsk": "nn",
    "Maltese": "mt", "Sanskrit": "sa", "Luxembourgish": "lb", "Burmese": "my",
    "Tibetan": "bo", "Tagalog": "tl", "Malagasy": "mg", "Assamese": "as",
    "Tatar": "tt", "Hawaiian": "haw", "Lingala": "ln", "Hausa": "ha",
    "Bashkir": "ba", "Javanese": "jv", "Sundanese": "su"
}


# --- Core Processing Functions ---

def time_to_seconds(time_obj):
    """Convert datetime.time object to seconds."""
    return time_obj.hour * 3600 + time_obj.minute * 60 + time_obj.second + time_obj.microsecond / 1e6

def extract_audio_from_video(video_path, audio_path):
    """Extract audio from a video file and save it as a WAV file."""
    try:
        video = VideoFileClip(video_path)
        video.audio.write_audiofile(audio_path, fps=16000, logger=None)
        return audio_path
    except Exception as e:
        logging.error(f"Error extracting audio: {e}")
        return None

def transcribe_audio_with_gemini(audio_path, source_language):
    """Transcribe audio using Gemini with a prompt for accurate timestamps."""
    try:
        audio_file = genai.upload_file(path=audio_path)
        language_prompt = f"in {source_language}" if source_language != "Auto Detect" else ""
        
        prompt = f"""
        You are a professional transcriber. Transcribe this audio accurately and verbatim {language_prompt}.
        Include timestamps for each sentence in the SRT (SubRip) format.
        Example:
        1
        00:00:01,234 --> 00:00:05,678
        This is the first sentence.

        2
        00:00:06,123 --> 00:00:09,456
        This is the second sentence.

        Ensure the timestamps are precise and correspond to the start and end of each spoken sentence.
        Respond ONLY with the transcription in the SRT format. Do not add explanations or any extra text.
        """
        
        response = model.generate_content([prompt, audio_file])
        genai.delete_file(audio_file.name) # Clean up the uploaded file
        return response.text.strip()
    except Exception as e:
        logging.error(f"Error during Gemini transcription: {e}")
        return None


def translate_srt(srt_text, target_language):
    """Translate an SRT file using Gemini while preserving timestamps."""
    try:
        prompt = f"""
        Translate the following SRT subtitles into {target_language}.
        Preserve the SRT format perfectly (index numbers, timestamps, and structure).
        Translate only the subtitle text on the lines after the timestamps.
        Do not add any explanations or extra text. Your output must be a valid SRT file.
        Here is the SRT file content:
        {srt_text}
        """
        response = model.generate_content(prompt)
        return response.text.strip()
    except Exception as e:
        logging.error(f"Error during translation: {e}")
        return None

def generate_tts_audio(srt_text, language, tts_audio_path):
    """Generate TTS audio from SRT text."""
    try:
        subtitles = list(srt.parse(srt_text))
        all_text = " ".join([sub.content for sub in subtitles])
        
        lang_code = LANGUAGE_CODES.get(language, "en")
        
        tts = gTTS(text=all_text, lang=lang_code, slow=False)
        tts.save(tts_audio_path)
        return tts_audio_path
    except Exception as e:
        logging.error(f"Error generating TTS audio: {e}")
        return None

def add_subtitles_to_video(video_path, srt_text, output_video_path):
    """Add subtitles to video and return the path to the new video."""
    try:
        def generator(txt):
             return TextClip(txt, font='Arial-Bold', fontsize=24, color='white',
                            stroke_color='black', stroke_width=1)
        
        # MoviePy's SubtitlesClip requires a file path
        with tempfile.NamedTemporaryFile(mode='w', suffix='.srt', delete=False, encoding='utf-8') as temp_srt:
            temp_srt.write(srt_text)
            srt_path = temp_srt.name

        video = VideoFileClip(video_path)
        subtitles = SubtitlesClip(srt_path, generator)
        
        result = CompositeVideoClip([video, subtitles.set_position(('center', 'bottom'))])
        
        # Write output with original audio
        result.write_videofile(output_video_path, codec='libx264', audio_codec='aac', threads=4, logger=None)
        
        os.remove(srt_path) # Clean up temp srt file
        return output_video_path
    except Exception as e:
        logging.error(f"Error adding subtitles to video: {e}")
        return None


# --- Flask Routes ---

@app.route('/')
def index():
    """Render the main page."""
    session.clear() # Clear any old data
    return render_template('index.html', supported_languages=SUPPORTED_LANGUAGES)

@app.route('/process', methods=['POST'])
def process():
    """Handle the video processing request."""
    if 'video' not in request.files:
        flash('No video file selected. Please upload a video.', 'error')
        return render_template('index.html', supported_languages=SUPPORTED_LANGUAGES)

    video_file = request.files['video']
    if video_file.filename == '':
        flash('No video file selected. Please upload a video.', 'error')
        return render_template('index.html', supported_languages=SUPPORTED_LANGUAGES)

    # --- Get form options ---
    source_language = request.form.get('source_language', 'Auto Detect')
    translate_to = request.form.get('translate_to', 'None')
    add_tts = 'add_tts' in request.form
    add_subtitles = 'add_subtitles' in request.form
    
    # --- Setup a unique session directory for this request ---
    session_id = str(uuid.uuid4())
    session['session_id'] = session_id
    session_dir = os.path.join(app.config['UPLOAD_FOLDER'], session_id)
    os.makedirs(session_dir, exist_ok=True)

    filename = secure_filename(video_file.filename)
    video_path = os.path.join(session_dir, filename)
    video_file.save(video_path)

    results = {}

    # 1. Extract Audio
    audio_path = os.path.join(session_dir, "extracted_audio.wav")
    if not extract_audio_from_video(video_path, audio_path):
        flash('Failed to extract audio from the video.', 'error')
        return render_template('index.html', supported_languages=SUPPORTED_LANGUAGES)

    # 2. Transcribe Audio
    original_srt_text = transcribe_audio_with_gemini(audio_path, source_language)
    if not original_srt_text:
        flash('Failed to transcribe the audio. The API call might have failed.', 'error')
        return render_template('index.html', supported_languages=SUPPORTED_LANGUAGES)

    original_srt_path = os.path.join(session_dir, "original_subtitles.srt")
    with open(original_srt_path, "w", encoding="utf-8") as f:
        f.write(original_srt_text)
    results['original_srt_file'] = "original_subtitles.srt"
    
    # Keep track of the final SRT to use for TTS and video burn-in
    final_srt_text = original_srt_text

    # 3. Translate Subtitles (if requested)
    if translate_to and translate_to != "None":
        translated_srt_text = translate_srt(original_srt_text, translate_to)
        if translated_srt_text:
            translated_srt_path = os.path.join(session_dir, "translated_subtitles.srt")
            with open(translated_srt_path, "w", encoding="utf-8") as f:
                f.write(translated_srt_text)
            results['translated_srt_file'] = "translated_subtitles.srt"
            final_srt_text = translated_srt_text # Use translated text for next steps
        else:
            flash(f'Failed to translate subtitles to {translate_to}.', 'warning')
    
    # 4. Generate TTS Audio (if requested)
    if add_tts:
        tts_lang = translate_to if translate_to and translate_to != "None" else source_language
        # If source was auto-detect, we can't reliably guess the TTS language. Default to English.
        if tts_lang == 'Auto Detect':
             flash('TTS language cannot be "Auto Detect". Defaulting to English. For better results, please specify the source language.', 'warning')
             tts_lang = 'English'

        tts_audio_path = os.path.join(session_dir, "tts_audio.mp3")
        if generate_tts_audio(final_srt_text, tts_lang, tts_audio_path):
             results['tts_audio_file'] = "tts_audio.mp3"
        else:
             flash('Failed to generate Text-to-Speech audio.', 'warning')
    
    # 5. Add Subtitles to Video (if requested)
    if add_subtitles:
        output_video_path = os.path.join(session_dir, "output_video.mp4")
        if add_subtitles_to_video(video_path, final_srt_text, output_video_path):
            results['output_video_file'] = "output_video.mp4"
        else:
            flash('Failed to add subtitles to the video.', 'warning')
            
    # Clean up original extracted audio
    os.remove(audio_path)
    
    return render_template('index.html',
                           supported_languages=SUPPORTED_LANGUAGES,
                           results=results,
                           session_id=session_id)

@app.route('/download/<session_id>/<path:filename>')
def download_file(session_id, filename):
    """Serve files from the session directory for download."""
    directory = os.path.join(app.config['UPLOAD_FOLDER'], session_id)
    return send_from_directory(directory, filename, as_attachment=True)


# --- Run the App ---
if __name__ == '__main__':
    app.run(host="0.0.0.0", port=7860)