malvin noel commited on
Commit
76e06c0
Β·
1 Parent(s): 11210e5

Update the app

Browse files
Files changed (1) hide show
  1. app.py +197 -123
app.py CHANGED
@@ -1,9 +1,12 @@
1
  import gradio as gr
2
  import os
3
  import shutil
 
4
  from typing import List, Optional
5
- import spaces
6
 
 
 
 
7
  from scripts.generate_scripts import generate_script, generate_title, generate_description
8
  from scripts.generate_voice import generate_voice
9
  from scripts.get_footage import get_video_montage_from_folder
@@ -15,49 +18,51 @@ from scripts.generate_subtitles import (
15
  )
16
 
17
  # ──────────────────────────────────────────────────────────────────────────────
18
- # Constants & helper utils
19
  # ──────────────────────────────────────────────────────────────────────────────
20
-
21
  WORDS_PER_SECOND = 2.3 # ≃ 140 wpm
 
 
 
 
 
 
22
 
 
 
23
 
24
  def safe_copy(src: str, dst: str) -> str:
 
25
  if os.path.abspath(src) == os.path.abspath(dst):
26
  return src
27
  shutil.copy(src, dst)
28
  return dst
29
 
 
 
 
 
 
 
30
  # ──────────────────────────────────────────────────────────────────────────────
31
- # Core processing pipeline
32
  # ──────────────────────────────────────────────────────────────────────────────
33
- def process_video(
 
34
  context: str,
35
  instruction: str,
36
  target_duration: int,
37
  script_mode: str,
38
  custom_script: Optional[str],
39
- lum: float,
40
- contrast: float,
41
- gamma: float,
42
- add_subs: bool,
43
- accumulated_videos: List[str] | None = None,
44
- user_music: Optional[str] = None,
45
- show_progress_bar: bool = True,
46
  ):
47
- """Build the final video using user‑defined visual parameters (brightness, contrast, gamma)."""
48
-
49
- if not accumulated_videos:
50
- raise ValueError("❌ Please upload at least one background video (.mp4) before generating.")
51
 
52
  approx_words = int(target_duration * WORDS_PER_SECOND)
53
 
54
- # --- 1. Script (AI or custom) ---
55
  if script_mode == "Use my script":
56
  if not custom_script or not custom_script.strip():
57
- raise ValueError("❌ You selected 'Use my script' but the script field is empty!")
58
  script = custom_script.strip()
59
- title = generate_title(script)
60
- description = generate_description(script)
61
  else:
62
  prompt = (
63
  f"You are a video creation expert. Here is the context: {context.strip()}\n"
@@ -65,155 +70,224 @@ def process_video(
65
  f"πŸ”΄ Strict target duration: {target_duration}s β€” β‰ˆ {approx_words} words (must be respected)."
66
  )
67
  script = generate_script(prompt)
68
- title = generate_title(script)
69
- description = generate_description(script)
70
 
71
- # --- 2. Prepare folders ---
72
- for folder in ("./assets/audio", "./assets/backgrounds", "./assets/output"):
73
- os.makedirs(folder, exist_ok=True)
 
 
 
 
 
 
 
 
 
 
74
 
75
- voice_path = "./assets/audio/voice.mp3"
76
- final_no_subs = "./assets/output/final_video.mp4"
77
- final_with_subs = "./assets/output/final_video_subtitles.mp4"
78
 
79
- # --- 3. Copy videos ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
80
  for f in os.listdir("./assets/backgrounds"):
81
  if f.lower().endswith(".mp4"):
82
  os.remove(os.path.join("./assets/backgrounds", f))
83
  for idx, v in enumerate(accumulated_videos):
84
- if not os.path.isfile(v) or not v.lower().endswith(".mp4"):
85
- raise ValueError(f"❌ Invalid file: {v}")
86
  safe_copy(v, os.path.join("./assets/backgrounds", f"video_{idx:03d}.mp4"))
87
 
88
- # --- 4. AI voice ---
89
- generate_voice(script, voice_path)
90
-
91
- # --- 5. Video montage ---
92
- music_path = user_music if user_music and os.path.isfile(user_music) else None
93
- out_no_audio = get_video_montage_from_folder(
94
  folder_path="./assets/backgrounds",
95
  audio_path=voice_path,
96
  output_dir="./assets/video_music",
97
  lum=lum,
98
  contrast=contrast,
99
  gamma=gamma,
100
- show_progress_bar=show_progress_bar,
101
  )
 
 
 
 
102
 
103
- # --- 6. Mixing & subtitles ---
104
- edit_video(out_no_audio, voice_path, music_path, final_no_subs)
105
 
106
- if add_subs:
107
- segments = transcribe_audio_to_subs(voice_path)
108
- subs = chunk_text_by_words(segments, max_words=3)
109
- add_subtitles_to_video(final_no_subs, subs, final_with_subs)
110
- return script, title, description, final_with_subs
111
- else:
112
- return script, title, description, final_no_subs
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
113
 
114
  # ──────────────────────────────────────────────────────────────────────────────
115
- # Upload helper
116
  # ──────────────────────────────────────────────────────────────────────────────
117
 
118
- def accumulate_files(new: List[str], state: List[str] | None):
119
- state = state or []
120
- for f in new or []:
121
- if isinstance(f, str) and os.path.isfile(f) and f.lower().endswith(".mp4") and f not in state:
122
- state.append(f)
123
- return state
124
 
125
- # ──────────────────────────────────────────────────────────────────────────────
126
- # Gradio UI
127
- # ──────────────────────────────────────────────────────────────────────────────
 
 
128
 
129
- with gr.Blocks(theme="gradio/soft") as demo:
130
- gr.Markdown("# 🎬 AI Video Generator β€” Advanced Controls")
 
 
 
131
 
132
- # ------------------- Parameters -------------------
133
- with gr.Tab("πŸ› οΈ Settings"):
134
  with gr.Row():
135
- context_input = gr.Textbox(label="🧠 Context", lines=4)
136
- instruction_input = gr.Textbox(label="🎯 Instruction", lines=4)
137
-
138
  duration_slider = gr.Slider(5, 120, 1, 60, label="⏱️ Target duration (s)")
139
-
140
  script_mode = gr.Radio([
141
  "Generate script with AI",
142
  "Use my script",
143
  ], value="Generate script with AI", label="Script mode")
 
144
 
145
- custom_script_input = gr.Textbox(label="✍️ My script", lines=8, interactive=False)
146
-
147
- def toggle_script_input(mode):
148
  return gr.update(interactive=(mode == "Use my script"))
149
 
150
- script_mode.change(toggle_script_input, inputs=script_mode, outputs=custom_script_input)
151
 
152
- with gr.Accordion("🎨 Video Settings (brightness/contrast/gamma)", open=False):
153
- lum_slider = gr.Slider(0, 20, 6, step=0.5, label="Brightness (0–20)")
154
- contrast_slider = gr.Slider(0.5, 2.0, 1.0, step=0.05, label="Contrast (0.5–2.0)")
155
- gamma_slider = gr.Slider(0.5, 2.0, 1.0, step=0.05, label="Gamma (0.5–2.0)")
156
 
157
- with gr.Row():
158
- add_subs_checkbox = gr.Checkbox(label="Add dynamic subtitles", value=True)
 
 
 
159
 
160
- with gr.Row():
161
- show_bar = gr.Checkbox(label="Show progress bar", value=True)
 
 
 
 
 
 
 
 
 
 
 
162
 
163
- # Upload videos
 
164
  videos_dropzone = gr.Files(label="🎞️ Background videos (MP4)", file_types=[".mp4"], type="filepath")
165
  videos_state = gr.State([])
166
- video_list_display = gr.Textbox(label="βœ… Selected videos", interactive=False, lines=4)
167
  videos_dropzone.upload(accumulate_files, [videos_dropzone, videos_state], videos_state, queue=False)
168
- videos_state.change(lambda s: "\n".join(os.path.basename(f) for f in s), videos_state, video_list_display, queue=False)
169
-
170
 
171
- user_music = gr.File(label="🎡 Background music (MP3, optional)", file_types=[".mp3"], type="filepath")
 
 
 
 
172
 
173
- generate_btn = gr.Button("πŸš€ Generate the video", variant="primary")
 
174
 
175
- with gr.Tab("πŸ“€ Results"):
176
- video_output = gr.Video(label="🎬 Generated Video")
 
 
 
177
 
178
- # Script + copy button
179
- script_output = gr.Textbox(label="πŸ“ Script", lines=6, interactive=False)
180
- copy_script_btn = gr.Button("πŸ“‹ Copy")
181
- copy_script_btn.click(
182
- None,
183
- inputs=[script_output],
184
- outputs=None,
185
- js="(text) => navigator.clipboard.writeText(text)"
 
 
 
 
 
 
 
 
 
 
 
 
186
  )
187
 
188
- # Title + copy button
189
- title_output = gr.Textbox(label="🎬 Title", lines=1, interactive=False)
190
- copy_title_btn = gr.Button("πŸ“‹ Copy")
191
- copy_title_btn.click(None, inputs=title_output, outputs=None, js="(text) => {navigator.clipboard.writeText(text);}")
192
-
193
- # Description + copy button
194
- desc_output = gr.Textbox(label="πŸ“„ Description", lines=3, interactive=False)
195
- copy_desc_btn = gr.Button("πŸ“‹ Copy")
196
- copy_desc_btn.click(None, inputs=desc_output, outputs=None, js="(text) => {navigator.clipboard.writeText(text);}")
197
-
198
- # ------------------- Generation Callback -------------------
199
- generate_btn.click(
200
- fn=process_video,
201
- inputs=[
202
- context_input,
203
- instruction_input,
204
- duration_slider,
205
- script_mode,
206
- custom_script_input,
207
- lum_slider,
208
- contrast_slider,
209
- gamma_slider,
210
- add_subs_checkbox,
211
- videos_state,
212
- user_music,
213
- show_bar,
214
- ],
215
- outputs=[script_output, title_output, desc_output, video_output],
216
- )
217
 
 
218
  demo.launch()
219
-
 
1
  import gradio as gr
2
  import os
3
  import shutil
4
+ import datetime
5
  from typing import List, Optional
 
6
 
7
+ # ──────────────────────────────────────────────────────────────────────────────
8
+ # Import project‑specific helpers β€” unchanged from initial version
9
+ # ──────────────────────────────────────────────────────────────────────────────
10
  from scripts.generate_scripts import generate_script, generate_title, generate_description
11
  from scripts.generate_voice import generate_voice
12
  from scripts.get_footage import get_video_montage_from_folder
 
18
  )
19
 
20
  # ──────────────────────────────────────────────────────────────────────────────
21
+ # Constants & utilities
22
  # ──────────────────────────────────────────────────────────────────────────────
 
23
  WORDS_PER_SECOND = 2.3 # ≃ 140 wpm
24
+ ASSETS_DIRS = (
25
+ "./assets/audio",
26
+ "./assets/backgrounds",
27
+ "./assets/output",
28
+ "./assets/video_music",
29
+ )
30
 
31
+ for d in ASSETS_DIRS:
32
+ os.makedirs(d, exist_ok=True)
33
 
34
  def safe_copy(src: str, dst: str) -> str:
35
+ """Copy src β†’ dst unless they are the same file, returns destination path."""
36
  if os.path.abspath(src) == os.path.abspath(dst):
37
  return src
38
  shutil.copy(src, dst)
39
  return dst
40
 
41
+ # Wrapper util to timestamp generated files so different runs don't overwrite each other
42
+
43
+ def timestamped_filename(prefix: str, ext: str) -> str:
44
+ ts = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
45
+ return os.path.join("./assets/output", f"{prefix}_{ts}.{ext}")
46
+
47
  # ──────────────────────────────────────────────────────────────────────────────
48
+ # Independent functional endpoints (Gradio callbacks)
49
  # ──────────────────────────────────────────────────────────────────────────────
50
+
51
+ def cb_generate_script(
52
  context: str,
53
  instruction: str,
54
  target_duration: int,
55
  script_mode: str,
56
  custom_script: Optional[str],
 
 
 
 
 
 
 
57
  ):
58
+ """Generate (or accept) a script + title/description."""
 
 
 
59
 
60
  approx_words = int(target_duration * WORDS_PER_SECOND)
61
 
 
62
  if script_mode == "Use my script":
63
  if not custom_script or not custom_script.strip():
64
+ raise gr.Error("❌ You selected 'Use my script' but the script field is empty!")
65
  script = custom_script.strip()
 
 
66
  else:
67
  prompt = (
68
  f"You are a video creation expert. Here is the context: {context.strip()}\n"
 
70
  f"πŸ”΄ Strict target duration: {target_duration}s β€” β‰ˆ {approx_words} words (must be respected)."
71
  )
72
  script = generate_script(prompt)
 
 
73
 
74
+ title = generate_title(script)
75
+ description = generate_description(script)
76
+ return script, title, description, script # last return for state update
77
+
78
+
79
+ def cb_generate_voice(script: str):
80
+ """TTS generation from a given script, returns path to MP3."""
81
+ if not script or not script.strip():
82
+ raise gr.Error("❌ Script text is empty – generate or paste a script first.")
83
+
84
+ voice_path = timestamped_filename("voice", "mp3")
85
+ generate_voice(script, voice_path)
86
+ return voice_path, voice_path # second value updates state
87
 
 
 
 
88
 
89
+ def accumulate_files(new: List[str], state: List[str] | None):
90
+ """Append only new valid MP4 files to state."""
91
+ state = state or []
92
+ for f in new or []:
93
+ if (
94
+ isinstance(f, str)
95
+ and os.path.isfile(f)
96
+ and f.lower().endswith(".mp4")
97
+ and f not in state
98
+ ):
99
+ state.append(f)
100
+ return state
101
+
102
+
103
+ def cb_create_montage(
104
+ accumulated_videos: List[str],
105
+ voice_path: str,
106
+ lum: float,
107
+ contrast: float,
108
+ gamma: float,
109
+ show_bar: bool,
110
+ ):
111
+ """Create the background‑video montage synced to the narration audio."""
112
+
113
+ if not accumulated_videos:
114
+ raise gr.Error("❌ Please upload at least one background video (.mp4) before generating the montage.")
115
+ if not voice_path or not os.path.isfile(voice_path):
116
+ raise gr.Error("❌ A narration audio file (.mp3) is required – generate or upload one first.")
117
+
118
+ # Clean previous backgrounds, then copy new ones
119
  for f in os.listdir("./assets/backgrounds"):
120
  if f.lower().endswith(".mp4"):
121
  os.remove(os.path.join("./assets/backgrounds", f))
122
  for idx, v in enumerate(accumulated_videos):
 
 
123
  safe_copy(v, os.path.join("./assets/backgrounds", f"video_{idx:03d}.mp4"))
124
 
125
+ montage_path = timestamped_filename("montage", "mp4")
126
+ _ = get_video_montage_from_folder(
 
 
 
 
127
  folder_path="./assets/backgrounds",
128
  audio_path=voice_path,
129
  output_dir="./assets/video_music",
130
  lum=lum,
131
  contrast=contrast,
132
  gamma=gamma,
133
+ show_progress_bar=show_bar,
134
  )
135
+ # get_video_montage_from_folder already saves the file – we just need its path
136
+ # It returns the path, so capture it
137
+ montage_path = _
138
+ return montage_path, montage_path
139
 
 
 
140
 
141
+ def cb_mix_audio(
142
+ montage_path: str,
143
+ voice_path: str,
144
+ music_file: Optional[str] = None,
145
+ ):
146
+ """Combine montage video, voice audio, and optional background music."""
147
+ if not montage_path or not os.path.isfile(montage_path):
148
+ raise gr.Error("❌ Please generate a montage video first.")
149
+ if not voice_path or not os.path.isfile(voice_path):
150
+ raise gr.Error("❌ Narration audio missing – generate or upload it.")
151
+
152
+ music_path = music_file if music_file and os.path.isfile(music_file) else None
153
+ final_no_subs = timestamped_filename("final_no_subs", "mp4")
154
+ edit_video(montage_path, voice_path, music_path, final_no_subs)
155
+ return final_no_subs, final_no_subs
156
+
157
+
158
+ def cb_add_subtitles(final_no_subs: str, voice_path: str):
159
+ """Overlay dynamic subtitles on the mixed video."""
160
+ if not final_no_subs or not os.path.isfile(final_no_subs):
161
+ raise gr.Error("❌ Mixed video not found – run the 'Mix Audio/Video' step first.")
162
+ if not voice_path or not os.path.isfile(voice_path):
163
+ raise gr.Error("❌ Narration audio missing.")
164
+
165
+ segments = transcribe_audio_to_subs(voice_path)
166
+ subs = chunk_text_by_words(segments, max_words=3)
167
+ final_with_subs = timestamped_filename("final_with_subs", "mp4")
168
+ add_subtitles_to_video(final_no_subs, subs, final_with_subs)
169
+ return final_with_subs
170
 
171
  # ──────────────────────────────────────────────────────────────────────────────
172
+ # Gradio UI – one tab per function
173
  # ──────────────────────────────────────────────────────────────────────────────
174
 
175
+ demo = gr.Blocks(theme="gradio/soft")
 
 
 
 
 
176
 
177
+ with demo:
178
+ gr.Markdown("# 🎬 Modular AI Video Toolkit")
179
+ gr.Markdown(
180
+ "Each tab exposes **one single processing step** so you can mix & match them as you like. πŸ’‘"
181
+ )
182
 
183
+ # Shared state across tabs
184
+ script_state = gr.State("")
185
+ voice_state = gr.State("")
186
+ montage_state = gr.State("")
187
+ final_no_subs_state = gr.State("")
188
 
189
+ # ───────────────────────── Script generation ─────────────────────────
190
+ with gr.Tab("1️⃣ Generate Script"):
191
  with gr.Row():
192
+ context_in = gr.Textbox(label="🧠 Context", lines=4)
193
+ instruction_in = gr.Textbox(label="🎯 Instruction", lines=4)
 
194
  duration_slider = gr.Slider(5, 120, 1, 60, label="⏱️ Target duration (s)")
 
195
  script_mode = gr.Radio([
196
  "Generate script with AI",
197
  "Use my script",
198
  ], value="Generate script with AI", label="Script mode")
199
+ custom_script_in = gr.Textbox(label="✍️ My script", lines=8, interactive=False)
200
 
201
+ def _toggle(mode):
 
 
202
  return gr.update(interactive=(mode == "Use my script"))
203
 
204
+ script_mode.change(_toggle, inputs=script_mode, outputs=custom_script_in)
205
 
206
+ gen_script_btn = gr.Button("πŸ“ Create Script", variant="primary")
207
+ script_out = gr.Textbox(label="Script", lines=8, interactive=False)
208
+ title_out = gr.Textbox(label="Title", lines=1, interactive=False)
209
+ desc_out = gr.Textbox(label="Description", lines=3, interactive=False)
210
 
211
+ gen_script_btn.click(
212
+ cb_generate_script,
213
+ [context_in, instruction_in, duration_slider, script_mode, custom_script_in],
214
+ [script_out, title_out, desc_out, script_state],
215
+ )
216
 
217
+ # ───────────────────────── Voice generation ─────────────────────────
218
+ with gr.Tab("2️⃣ Generate Voice"):
219
+ script_in_voice = gr.Textbox(label="Script (paste or use from previous step)", lines=8)
220
+ gen_voice_btn = gr.Button("πŸ”ˆ Synthesize Voice", variant="primary")
221
+ voice_audio = gr.Audio(label="Generated voice", interactive=False)
222
+
223
+ gen_voice_btn.click(
224
+ cb_generate_voice,
225
+ inputs=[script_in_voice],
226
+ outputs=[voice_audio, voice_state],
227
+ )
228
+ # Auto‑populate script textbox with state when it updates
229
+ script_state.change(lambda s: s, script_state, script_in_voice, queue=False)
230
 
231
+ # ───────────────────────── Montage creation ─────────────────────────
232
+ with gr.Tab("3️⃣ Create Montage"):
233
  videos_dropzone = gr.Files(label="🎞️ Background videos (MP4)", file_types=[".mp4"], type="filepath")
234
  videos_state = gr.State([])
 
235
  videos_dropzone.upload(accumulate_files, [videos_dropzone, videos_state], videos_state, queue=False)
236
+ videos_display = gr.Textbox(label="Selected videos", interactive=False)
237
+ videos_state.change(lambda s: "\n".join(os.path.basename(f) for f in s), videos_state, videos_display, queue=False)
238
 
239
+ with gr.Accordion("🎨 Visual settings", open=False):
240
+ lum_slider = gr.Slider(0, 20, 6, step=0.5, label="Brightness (0–20)")
241
+ contrast_slider = gr.Slider(0.5, 2.0, 1.0, step=0.05, label="Contrast (0.5–2.0)")
242
+ gamma_slider = gr.Slider(0.5, 2.0, 1.0, step=0.05, label="Gamma (0.5–2.0)")
243
+ show_bar = gr.Checkbox(label="Show progress bar", value=True)
244
 
245
+ create_montage_btn = gr.Button("🎞️ Build Montage", variant="primary")
246
+ montage_video = gr.Video(label="Montage Preview")
247
 
248
+ create_montage_btn.click(
249
+ cb_create_montage,
250
+ [videos_state, voice_state, lum_slider, contrast_slider, gamma_slider, show_bar],
251
+ [montage_video, montage_state],
252
+ )
253
 
254
+ # ───────────────────────── Mixing (voice + music) ─────────────────────────
255
+ with gr.Tab("4️⃣ Mix Audio / Video"):
256
+ voice_in = gr.File(label="Narration MP3 (optional – leave empty to use state)", file_types=[".mp3"], type="filepath")
257
+ montage_in = gr.File(label="Montage MP4 (optional – leave empty to use state)", file_types=[".mp4"], type="filepath")
258
+ music_in = gr.File(label="Background music (MP3 – optional)", file_types=[".mp3"], type="filepath")
259
+
260
+ def _use_state(file, state):
261
+ return file if file else state
262
+
263
+ mix_btn = gr.Button("🎚️ Mix", variant="primary")
264
+ final_no_subs_vid = gr.Video(label="Mixed video (no subtitles)")
265
+
266
+ mix_btn.click(
267
+ lambda montage, voice, music, montage_state_val, voice_state_val: cb_mix_audio(
268
+ _use_state(montage, montage_state_val),
269
+ _use_state(voice, voice_state_val),
270
+ music,
271
+ ),
272
+ [montage_in, voice_in, music_in, montage_state, voice_state],
273
+ [final_no_subs_vid, final_no_subs_state],
274
  )
275
 
276
+ # ───────────────────────── Subtitles ─────────────────────────
277
+ with gr.Tab("5️⃣ Add Subtitles"):
278
+ video_in_sub = gr.File(label="Video MP4 (optional – defaults to last mixed video)", type="filepath", file_types=[".mp4"])
279
+ voice_in_sub = gr.File(label="Narration MP3 (optional – defaults to last generated voice)", type="filepath", file_types=[".mp3"])
280
+ add_subs_btn = gr.Button("πŸ”€ Add Subtitles", variant="primary")
281
+ final_subs_video = gr.Video(label="Final video with subtitles")
282
+
283
+ add_subs_btn.click(
284
+ lambda v_in, a_in, v_state, a_state: cb_add_subtitles(
285
+ v_in if v_in else v_state,
286
+ a_in if a_in else a_state,
287
+ ),
288
+ [video_in_sub, voice_in_sub, final_no_subs_state, voice_state],
289
+ final_subs_video,
290
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
291
 
292
+ # Startup
293
  demo.launch()