ginipick commited on
Commit
7b942c6
ยท
verified ยท
1 Parent(s): 2cf6cdf

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +276 -249
app.py CHANGED
@@ -71,18 +71,23 @@ audio_model_config.download_if_needed()
71
  setup_eval_logging()
72
 
73
  # Initialize NAG Video Model
74
- vae = AutoencoderKLWan.from_pretrained(MODEL_ID, subfolder="vae", torch_dtype=torch.float32)
75
- wan_path = hf_hub_download(repo_id=SUB_MODEL_ID, filename=SUB_MODEL_FILENAME)
76
- transformer = NagWanTransformer3DModel.from_single_file(wan_path, torch_dtype=torch.bfloat16)
77
- pipe = NAGWanPipeline.from_pretrained(
78
- MODEL_ID, vae=vae, transformer=transformer, torch_dtype=torch.bfloat16
79
- )
80
- pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config, flow_shift=5.0)
81
- pipe.to("cuda")
 
82
 
83
- pipe.transformer.__class__.attn_processors = NagWanTransformer3DModel.attn_processors
84
- pipe.transformer.__class__.set_attn_processor = NagWanTransformer3DModel.set_attn_processor
85
- pipe.transformer.__class__.forward = NagWanTransformer3DModel.forward
 
 
 
 
86
 
87
  # Initialize MMAudio Model
88
  def get_mmaudio_model() -> tuple[MMAudio, FeaturesUtils, SequenceConfig]:
@@ -102,40 +107,53 @@ def get_mmaudio_model() -> tuple[MMAudio, FeaturesUtils, SequenceConfig]:
102
 
103
  return net, feature_utils, seq_cfg
104
 
105
- audio_net, audio_feature_utils, audio_seq_cfg = get_mmaudio_model()
 
 
 
 
 
106
 
107
  # Audio generation function
108
  @torch.inference_mode()
109
  def add_audio_to_video(video_path, prompt, audio_negative_prompt, audio_steps, audio_cfg_strength, duration):
110
  """Generate and add audio to video using MMAudio"""
111
- rng = torch.Generator(device=device)
112
- rng.seed() # Random seed for audio
113
- fm = FlowMatching(min_sigma=0, inference_mode='euler', num_steps=audio_steps)
114
-
115
- video_info = load_video(video_path, duration)
116
- clip_frames = video_info.clip_frames
117
- sync_frames = video_info.sync_frames
118
- duration = video_info.duration_sec
119
- clip_frames = clip_frames.unsqueeze(0)
120
- sync_frames = sync_frames.unsqueeze(0)
121
- audio_seq_cfg.duration = duration
122
- audio_net.update_seq_lengths(audio_seq_cfg.latent_seq_len, audio_seq_cfg.clip_seq_len, audio_seq_cfg.sync_seq_len)
123
-
124
- audios = mmaudio_generate(clip_frames,
125
- sync_frames, [prompt],
126
- negative_text=[audio_negative_prompt],
127
- feature_utils=audio_feature_utils,
128
- net=audio_net,
129
- fm=fm,
130
- rng=rng,
131
- cfg_strength=audio_cfg_strength)
132
- audio = audios.float().cpu()[0]
133
-
134
- # Create video with audio
135
- video_with_audio_path = tempfile.NamedTemporaryFile(delete=False, suffix='.mp4').name
136
- make_video(video_info, video_with_audio_path, audio, sampling_rate=audio_seq_cfg.sampling_rate)
137
-
138
- return video_with_audio_path
 
 
 
 
 
 
 
 
139
 
140
  # Combined generation function
141
  def get_duration(prompt, nag_negative_prompt, nag_scale, height, width, duration_seconds,
@@ -156,53 +174,65 @@ def generate_video_with_audio(
156
  enable_audio=True, audio_negative_prompt=DEFAULT_AUDIO_NEGATIVE_PROMPT,
157
  audio_steps=25, audio_cfg_strength=4.5,
158
  ):
159
- # Generate video first
160
- target_h = max(MOD_VALUE, (int(height) // MOD_VALUE) * MOD_VALUE)
161
- target_w = max(MOD_VALUE, (int(width) // MOD_VALUE) * MOD_VALUE)
162
-
163
- num_frames = np.clip(int(round(int(duration_seconds) * FIXED_FPS) + 1), MIN_FRAMES_MODEL, MAX_FRAMES_MODEL)
164
-
165
- current_seed = random.randint(0, MAX_SEED) if randomize_seed else int(seed)
166
-
167
- with torch.inference_mode():
168
- nag_output_frames_list = pipe(
169
- prompt=prompt,
170
- nag_negative_prompt=nag_negative_prompt,
171
- nag_scale=nag_scale,
172
- nag_tau=3.5,
173
- nag_alpha=0.5,
174
- height=target_h, width=target_w, num_frames=num_frames,
175
- guidance_scale=0.,
176
- num_inference_steps=int(steps),
177
- generator=torch.Generator(device="cuda").manual_seed(current_seed)
178
- ).frames[0]
179
-
180
- # Save initial video without audio
181
- with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as tmpfile:
182
- temp_video_path = tmpfile.name
183
- export_to_video(nag_output_frames_list, temp_video_path, fps=FIXED_FPS)
184
-
185
- # Add audio if enabled
186
- if enable_audio:
187
- try:
188
- final_video_path = add_audio_to_video(
189
- temp_video_path,
190
- prompt, # Use the same prompt for audio generation
191
- audio_negative_prompt,
192
- audio_steps,
193
- audio_cfg_strength,
194
- duration_seconds
195
- )
196
- # Clean up temp video
197
- if os.path.exists(temp_video_path):
198
- os.remove(temp_video_path)
199
- except Exception as e:
200
- log.error(f"Audio generation failed: {e}")
 
 
 
 
 
 
 
 
 
 
 
201
  final_video_path = temp_video_path
202
- else:
203
- final_video_path = temp_video_path
204
-
205
- return final_video_path, current_seed
 
206
 
207
  # Example generation function
208
  def generate_with_example(prompt, nag_negative_prompt, nag_scale):
@@ -224,11 +254,11 @@ def generate_with_example(prompt, nag_negative_prompt, nag_scale):
224
  # Examples with audio descriptions
225
  examples = [
226
  ["Midnight highway outside a neon-lit city. A black 1973 Porsche 911 Carrera RS speeds at 120 km/h. Inside, a stylish singer-guitarist sings while driving, vintage sunburst guitar on the passenger seat. Sodium streetlights streak over the hood; RGB panels shift magenta to blue on the driver. Camera: drone dive, Russian-arm low wheel shot, interior gimbal, FPV barrel roll, overhead spiral. Neo-noir palette, rain-slick asphalt reflections, roaring flat-six engine blended with live guitar.", DEFAULT_NAG_NEGATIVE_PROMPT, 11],
227
- ["Arena rock concert packed with 20 000 fans. A flamboyant lead guitarist in leather jacket and mirrored aviators shreds a cherry-red Flying V on a thrust stage. Pyro flames shoot up on every downbeat, COโ‚‚ jets burst behind. Moving-head spotlights swirl teal and amber, follow-spots rim-light the guitaristโ€™s hair. Steadicam 360-orbit, crane shot rising over crowd, ultra-slow-motion pick attack at 1 000 fps. Film-grain teal-orange grade, thunderous crowd roar mixes with screaming guitar solo.", DEFAULT_NAG_NEGATIVE_PROMPT, 11],
228
  ["Golden-hour countryside road winding through rolling wheat fields. A man and woman ride a vintage cafรฉ-racer motorcycle, hair and scarf fluttering in the warm breeze. Drone chase shot reveals endless patchwork farmland; low slider along rear wheel captures dust trail. Sun-flare back-lights the riders, lens blooms on highlights. Soft acoustic rock underscore; engine rumble mixed at โ€“8 dB. Warm pastel color grade, gentle film-grain for nostalgic vibe.", DEFAULT_NAG_NEGATIVE_PROMPT, 11],
229
  ]
230
 
231
- # CSS styling
232
  css = """
233
  .container {
234
  max-width: 1400px;
@@ -309,184 +339,181 @@ css = """
309
  }
310
  """
311
 
312
- # Gradio interface
313
  with gr.Blocks(css=css, theme=gr.themes.Soft()) as demo:
314
- with gr.Column(elem_classes="container"):
315
- gr.HTML("""
316
  <h1 class="main-title">๐ŸŽฌ VEO3 Free</h1>
317
  <p class="subtitle">Wan2.1-T2V-14B + Fast 4-step with NAG + Automatic Audio Generation</p>
318
- """)
319
-
320
-
321
- gr.HTML(
322
- """
323
- <div class='container' style='display:flex; justify-content:center; gap:12px;'>
324
- <a href="https://huggingface.co/spaces/openfree/Best-AI" target="_blank">
325
- <img src="https://img.shields.io/static/v1?label=OpenFree&message=BEST%20AI%20Services&color=%230000ff&labelColor=%23000080&logo=huggingface&logoColor=%23ffa500&style=for-the-badge" alt="OpenFree badge">
326
- </a>
327
 
328
- <a href="https://discord.gg/openfreeai" target="_blank">
329
- <img src="https://img.shields.io/static/v1?label=Discord&message=Openfree%20AI&color=%230000ff&labelColor=%23800080&logo=discord&logoColor=white&style=for-the-badge" alt="Discord badge">
330
- </a>
331
- </div>
332
- """
333
- )
 
 
 
 
 
334
 
335
-
336
- with gr.Row():
337
- with gr.Column(scale=1):
338
- with gr.Group(elem_classes="prompt-container"):
339
- prompt = gr.Textbox(
340
- label="โœจ Video Prompt (also used for audio generation)",
341
- placeholder="Describe your video scene in detail...",
342
- lines=3,
343
- elem_classes="prompt-input"
 
 
 
 
 
 
344
  )
345
-
346
- with gr.Accordion("๐ŸŽจ Advanced Video Settings", open=False):
347
- nag_negative_prompt = gr.Textbox(
348
- label="Video Negative Prompt",
349
- value=DEFAULT_NAG_NEGATIVE_PROMPT,
350
- lines=2,
351
- )
352
- nag_scale = gr.Slider(
353
- label="NAG Scale",
354
- minimum=1.0,
355
- maximum=20.0,
356
- step=0.25,
357
- value=11.0,
358
- info="Higher values = stronger guidance"
359
- )
360
 
361
- with gr.Group(elem_classes="settings-panel"):
362
- gr.Markdown("### โš™๏ธ Video Settings")
363
-
364
- with gr.Row():
365
- duration_seconds_input = gr.Slider(
366
- minimum=1,
367
- maximum=8,
368
- step=1,
369
- value=DEFAULT_DURATION_SECONDS,
370
- label="๐Ÿ“ฑ Duration (seconds)",
371
- elem_classes="slider-container"
372
- )
373
- steps_slider = gr.Slider(
374
- minimum=1,
375
- maximum=8,
376
- step=1,
377
- value=DEFAULT_STEPS,
378
- label="๐Ÿ”„ Inference Steps",
379
- elem_classes="slider-container"
380
- )
381
-
382
- with gr.Row():
383
- height_input = gr.Slider(
384
- minimum=SLIDER_MIN_H,
385
- maximum=SLIDER_MAX_H,
386
- step=MOD_VALUE,
387
- value=DEFAULT_H_SLIDER_VALUE,
388
- label=f"๐Ÿ“ Height (ร—{MOD_VALUE})",
389
- elem_classes="slider-container"
390
- )
391
- width_input = gr.Slider(
392
- minimum=SLIDER_MIN_W,
393
- maximum=SLIDER_MAX_W,
394
- step=MOD_VALUE,
395
- value=DEFAULT_W_SLIDER_VALUE,
396
- label=f"๐Ÿ“ Width (ร—{MOD_VALUE})",
397
- elem_classes="slider-container"
398
- )
399
-
400
- with gr.Row():
401
- seed_input = gr.Slider(
402
- label="๐ŸŒฑ Seed",
403
- minimum=0,
404
- maximum=MAX_SEED,
405
- step=1,
406
- value=DEFAULT_SEED,
407
- interactive=True
408
- )
409
- randomize_seed_checkbox = gr.Checkbox(
410
- label="๐ŸŽฒ Random Seed",
411
- value=True,
412
- interactive=True
413
- )
414
 
415
- with gr.Group(elem_classes="audio-settings"):
416
- gr.Markdown("### ๐ŸŽต Audio Generation Settings")
417
-
418
- enable_audio = gr.Checkbox(
419
- label="๐Ÿ”Š Enable Automatic Audio Generation",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
420
  value=True,
421
  interactive=True
422
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
423
 
424
- with gr.Column(visible=True) as audio_settings_group:
425
- audio_negative_prompt = gr.Textbox(
426
- label="Audio Negative Prompt",
427
- value=DEFAULT_AUDIO_NEGATIVE_PROMPT,
428
- placeholder="Elements to avoid in audio (e.g., music, speech)",
 
 
 
 
 
 
 
 
 
 
 
429
  )
430
-
431
- with gr.Row():
432
- audio_steps = gr.Slider(
433
- minimum=10,
434
- maximum=50,
435
- step=5,
436
- value=25,
437
- label="๐ŸŽš๏ธ Audio Steps",
438
- info="More steps = better quality"
439
- )
440
- audio_cfg_strength = gr.Slider(
441
- minimum=1.0,
442
- maximum=10.0,
443
- step=0.5,
444
- value=4.5,
445
- label="๐ŸŽ›๏ธ Audio Guidance",
446
- info="Strength of prompt guidance"
447
- )
448
-
449
- # Toggle audio settings visibility
450
- enable_audio.change(
451
- fn=lambda x: gr.update(visible=x),
452
- inputs=[enable_audio],
453
- outputs=[audio_settings_group]
454
- )
455
 
456
- generate_button = gr.Button(
457
- "๐ŸŽฌ Generate Video with Audio",
458
- variant="primary",
459
- elem_classes="generate-btn"
 
460
  )
461
 
462
- with gr.Column(scale=1):
463
- video_output = gr.Video(
464
- label="Generated Video with Audio",
465
- autoplay=True,
466
- interactive=False,
467
- elem_classes="video-output"
468
- )
469
-
470
- gr.HTML("""
471
- <div style="text-align: center; margin-top: 20px; color: #6b7280;">
472
- <p>๐Ÿ’ก Tip: The same prompt is used for both video and audio generation!</p>
473
- <p>๐ŸŽง Audio is automatically matched to the visual content</p>
474
- </div>
475
- """)
476
 
477
- gr.Markdown("### ๐ŸŽฏ Example Prompts")
478
- gr.Examples(
479
- examples=examples,
480
- fn=generate_with_example,
481
- inputs=[prompt, nag_negative_prompt, nag_scale],
482
- outputs=[
483
- video_output,
484
- height_input, width_input, duration_seconds_input,
485
- steps_slider, seed_input,
486
- enable_audio, audio_negative_prompt, audio_steps, audio_cfg_strength
487
- ],
488
- cache_examples="lazy"
489
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
490
 
491
  # Connect UI elements
492
  ui_inputs = [
 
71
  setup_eval_logging()
72
 
73
  # Initialize NAG Video Model
74
+ try:
75
+ vae = AutoencoderKLWan.from_pretrained(MODEL_ID, subfolder="vae", torch_dtype=torch.float32)
76
+ wan_path = hf_hub_download(repo_id=SUB_MODEL_ID, filename=SUB_MODEL_FILENAME)
77
+ transformer = NagWanTransformer3DModel.from_single_file(wan_path, torch_dtype=torch.bfloat16)
78
+ pipe = NAGWanPipeline.from_pretrained(
79
+ MODEL_ID, vae=vae, transformer=transformer, torch_dtype=torch.bfloat16
80
+ )
81
+ pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config, flow_shift=5.0)
82
+ pipe.to("cuda")
83
 
84
+ pipe.transformer.__class__.attn_processors = NagWanTransformer3DModel.attn_processors
85
+ pipe.transformer.__class__.set_attn_processor = NagWanTransformer3DModel.set_attn_processor
86
+ pipe.transformer.__class__.forward = NagWanTransformer3DModel.forward
87
+ print("NAG Video Model loaded successfully!")
88
+ except Exception as e:
89
+ print(f"Error loading NAG Video Model: {e}")
90
+ pipe = None
91
 
92
  # Initialize MMAudio Model
93
  def get_mmaudio_model() -> tuple[MMAudio, FeaturesUtils, SequenceConfig]:
 
107
 
108
  return net, feature_utils, seq_cfg
109
 
110
+ try:
111
+ audio_net, audio_feature_utils, audio_seq_cfg = get_mmaudio_model()
112
+ print("MMAudio Model loaded successfully!")
113
+ except Exception as e:
114
+ print(f"Error loading MMAudio Model: {e}")
115
+ audio_net = None
116
 
117
  # Audio generation function
118
  @torch.inference_mode()
119
  def add_audio_to_video(video_path, prompt, audio_negative_prompt, audio_steps, audio_cfg_strength, duration):
120
  """Generate and add audio to video using MMAudio"""
121
+ if audio_net is None:
122
+ print("MMAudio model not loaded, returning video without audio")
123
+ return video_path
124
+
125
+ try:
126
+ rng = torch.Generator(device=device)
127
+ rng.seed() # Random seed for audio
128
+ fm = FlowMatching(min_sigma=0, inference_mode='euler', num_steps=audio_steps)
129
+
130
+ video_info = load_video(video_path, duration)
131
+ clip_frames = video_info.clip_frames
132
+ sync_frames = video_info.sync_frames
133
+ duration = video_info.duration_sec
134
+ clip_frames = clip_frames.unsqueeze(0)
135
+ sync_frames = sync_frames.unsqueeze(0)
136
+ audio_seq_cfg.duration = duration
137
+ audio_net.update_seq_lengths(audio_seq_cfg.latent_seq_len, audio_seq_cfg.clip_seq_len, audio_seq_cfg.sync_seq_len)
138
+
139
+ audios = mmaudio_generate(clip_frames,
140
+ sync_frames, [prompt],
141
+ negative_text=[audio_negative_prompt],
142
+ feature_utils=audio_feature_utils,
143
+ net=audio_net,
144
+ fm=fm,
145
+ rng=rng,
146
+ cfg_strength=audio_cfg_strength)
147
+ audio = audios.float().cpu()[0]
148
+
149
+ # Create video with audio
150
+ video_with_audio_path = tempfile.NamedTemporaryFile(delete=False, suffix='.mp4').name
151
+ make_video(video_info, video_with_audio_path, audio, sampling_rate=audio_seq_cfg.sampling_rate)
152
+
153
+ return video_with_audio_path
154
+ except Exception as e:
155
+ print(f"Error in audio generation: {e}")
156
+ return video_path
157
 
158
  # Combined generation function
159
  def get_duration(prompt, nag_negative_prompt, nag_scale, height, width, duration_seconds,
 
174
  enable_audio=True, audio_negative_prompt=DEFAULT_AUDIO_NEGATIVE_PROMPT,
175
  audio_steps=25, audio_cfg_strength=4.5,
176
  ):
177
+ if pipe is None:
178
+ return None, DEFAULT_SEED
179
+
180
+ try:
181
+ # Generate video first
182
+ target_h = max(MOD_VALUE, (int(height) // MOD_VALUE) * MOD_VALUE)
183
+ target_w = max(MOD_VALUE, (int(width) // MOD_VALUE) * MOD_VALUE)
184
+
185
+ num_frames = np.clip(int(round(int(duration_seconds) * FIXED_FPS) + 1), MIN_FRAMES_MODEL, MAX_FRAMES_MODEL)
186
+
187
+ current_seed = random.randint(0, MAX_SEED) if randomize_seed else int(seed)
188
+
189
+ print(f"Generating video with: prompt='{prompt}', resolution={target_w}x{target_h}, frames={num_frames}")
190
+
191
+ with torch.inference_mode():
192
+ nag_output_frames_list = pipe(
193
+ prompt=prompt,
194
+ nag_negative_prompt=nag_negative_prompt,
195
+ nag_scale=nag_scale,
196
+ nag_tau=3.5,
197
+ nag_alpha=0.5,
198
+ height=target_h, width=target_w, num_frames=num_frames,
199
+ guidance_scale=0.,
200
+ num_inference_steps=int(steps),
201
+ generator=torch.Generator(device="cuda").manual_seed(current_seed)
202
+ ).frames[0]
203
+
204
+ # Save initial video without audio
205
+ with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as tmpfile:
206
+ temp_video_path = tmpfile.name
207
+ export_to_video(nag_output_frames_list, temp_video_path, fps=FIXED_FPS)
208
+ print(f"Video saved to: {temp_video_path}")
209
+
210
+ # Add audio if enabled
211
+ if enable_audio:
212
+ try:
213
+ print("Adding audio to video...")
214
+ final_video_path = add_audio_to_video(
215
+ temp_video_path,
216
+ prompt, # Use the same prompt for audio generation
217
+ audio_negative_prompt,
218
+ audio_steps,
219
+ audio_cfg_strength,
220
+ duration_seconds
221
+ )
222
+ # Clean up temp video
223
+ if os.path.exists(temp_video_path) and final_video_path != temp_video_path:
224
+ os.remove(temp_video_path)
225
+ print(f"Final video with audio: {final_video_path}")
226
+ except Exception as e:
227
+ log.error(f"Audio generation failed: {e}")
228
+ final_video_path = temp_video_path
229
+ else:
230
  final_video_path = temp_video_path
231
+
232
+ return final_video_path, current_seed
233
+ except Exception as e:
234
+ print(f"Error in video generation: {e}")
235
+ return None, current_seed
236
 
237
  # Example generation function
238
  def generate_with_example(prompt, nag_negative_prompt, nag_scale):
 
254
  # Examples with audio descriptions
255
  examples = [
256
  ["Midnight highway outside a neon-lit city. A black 1973 Porsche 911 Carrera RS speeds at 120 km/h. Inside, a stylish singer-guitarist sings while driving, vintage sunburst guitar on the passenger seat. Sodium streetlights streak over the hood; RGB panels shift magenta to blue on the driver. Camera: drone dive, Russian-arm low wheel shot, interior gimbal, FPV barrel roll, overhead spiral. Neo-noir palette, rain-slick asphalt reflections, roaring flat-six engine blended with live guitar.", DEFAULT_NAG_NEGATIVE_PROMPT, 11],
257
+ ["Arena rock concert packed with 20 000 fans. A flamboyant lead guitarist in leather jacket and mirrored aviators shreds a cherry-red Flying V on a thrust stage. Pyro flames shoot up on every downbeat, COโ‚‚ jets burst behind. Moving-head spotlights swirl teal and amber, follow-spots rim-light the guitarist's hair. Steadicam 360-orbit, crane shot rising over crowd, ultra-slow-motion pick attack at 1 000 fps. Film-grain teal-orange grade, thunderous crowd roar mixes with screaming guitar solo.", DEFAULT_NAG_NEGATIVE_PROMPT, 11],
258
  ["Golden-hour countryside road winding through rolling wheat fields. A man and woman ride a vintage cafรฉ-racer motorcycle, hair and scarf fluttering in the warm breeze. Drone chase shot reveals endless patchwork farmland; low slider along rear wheel captures dust trail. Sun-flare back-lights the riders, lens blooms on highlights. Soft acoustic rock underscore; engine rumble mixed at โ€“8 dB. Warm pastel color grade, gentle film-grain for nostalgic vibe.", DEFAULT_NAG_NEGATIVE_PROMPT, 11],
259
  ]
260
 
261
+ # CSS styling - Fixed container structure
262
  css = """
263
  .container {
264
  max-width: 1400px;
 
339
  }
340
  """
341
 
342
+ # Gradio interface - Fixed structure
343
  with gr.Blocks(css=css, theme=gr.themes.Soft()) as demo:
344
+ gr.HTML("""
345
+ <div class="container">
346
  <h1 class="main-title">๐ŸŽฌ VEO3 Free</h1>
347
  <p class="subtitle">Wan2.1-T2V-14B + Fast 4-step with NAG + Automatic Audio Generation</p>
348
+ </div>
349
+ """)
 
 
 
 
 
 
 
350
 
351
+ gr.HTML("""
352
+ <div class='container' style='display:flex; justify-content:center; gap:12px;'>
353
+ <a href="https://huggingface.co/spaces/openfree/Best-AI" target="_blank">
354
+ <img src="https://img.shields.io/static/v1?label=OpenFree&message=BEST%20AI%20Services&color=%230000ff&labelColor=%23000080&logo=huggingface&logoColor=%23ffa500&style=for-the-badge" alt="OpenFree badge">
355
+ </a>
356
+
357
+ <a href="https://discord.gg/openfreeai" target="_blank">
358
+ <img src="https://img.shields.io/static/v1?label=Discord&message=Openfree%20AI&color=%230000ff&labelColor=%23800080&logo=discord&logoColor=white&style=for-the-badge" alt="Discord badge">
359
+ </a>
360
+ </div>
361
+ """)
362
 
363
+ with gr.Row():
364
+ with gr.Column(scale=1):
365
+ with gr.Group(elem_classes="prompt-container"):
366
+ prompt = gr.Textbox(
367
+ label="โœจ Video Prompt (also used for audio generation)",
368
+ placeholder="Describe your video scene in detail...",
369
+ lines=3,
370
+ elem_classes="prompt-input"
371
+ )
372
+
373
+ with gr.Accordion("๐ŸŽจ Advanced Video Settings", open=False):
374
+ nag_negative_prompt = gr.Textbox(
375
+ label="Video Negative Prompt",
376
+ value=DEFAULT_NAG_NEGATIVE_PROMPT,
377
+ lines=2,
378
  )
379
+ nag_scale = gr.Slider(
380
+ label="NAG Scale",
381
+ minimum=1.0,
382
+ maximum=20.0,
383
+ step=0.25,
384
+ value=11.0,
385
+ info="Higher values = stronger guidance"
386
+ )
387
+
388
+ with gr.Group(elem_classes="settings-panel"):
389
+ gr.Markdown("### โš™๏ธ Video Settings")
 
 
 
 
390
 
391
+ with gr.Row():
392
+ duration_seconds_input = gr.Slider(
393
+ minimum=1,
394
+ maximum=8,
395
+ step=1,
396
+ value=DEFAULT_DURATION_SECONDS,
397
+ label="๐Ÿ“ฑ Duration (seconds)",
398
+ elem_classes="slider-container"
399
+ )
400
+ steps_slider = gr.Slider(
401
+ minimum=1,
402
+ maximum=8,
403
+ step=1,
404
+ value=DEFAULT_STEPS,
405
+ label="๐Ÿ”„ Inference Steps",
406
+ elem_classes="slider-container"
407
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
408
 
409
+ with gr.Row():
410
+ height_input = gr.Slider(
411
+ minimum=SLIDER_MIN_H,
412
+ maximum=SLIDER_MAX_H,
413
+ step=MOD_VALUE,
414
+ value=DEFAULT_H_SLIDER_VALUE,
415
+ label=f"๐Ÿ“ Height (ร—{MOD_VALUE})",
416
+ elem_classes="slider-container"
417
+ )
418
+ width_input = gr.Slider(
419
+ minimum=SLIDER_MIN_W,
420
+ maximum=SLIDER_MAX_W,
421
+ step=MOD_VALUE,
422
+ value=DEFAULT_W_SLIDER_VALUE,
423
+ label=f"๐Ÿ“ Width (ร—{MOD_VALUE})",
424
+ elem_classes="slider-container"
425
+ )
426
+
427
+ with gr.Row():
428
+ seed_input = gr.Slider(
429
+ label="๐ŸŒฑ Seed",
430
+ minimum=0,
431
+ maximum=MAX_SEED,
432
+ step=1,
433
+ value=DEFAULT_SEED,
434
+ interactive=True
435
+ )
436
+ randomize_seed_checkbox = gr.Checkbox(
437
+ label="๐ŸŽฒ Random Seed",
438
  value=True,
439
  interactive=True
440
  )
441
+
442
+ with gr.Group(elem_classes="audio-settings"):
443
+ gr.Markdown("### ๐ŸŽต Audio Generation Settings")
444
+
445
+ enable_audio = gr.Checkbox(
446
+ label="๐Ÿ”Š Enable Automatic Audio Generation",
447
+ value=True,
448
+ interactive=True
449
+ )
450
+
451
+ with gr.Column(visible=True) as audio_settings_group:
452
+ audio_negative_prompt = gr.Textbox(
453
+ label="Audio Negative Prompt",
454
+ value=DEFAULT_AUDIO_NEGATIVE_PROMPT,
455
+ placeholder="Elements to avoid in audio (e.g., music, speech)",
456
+ )
457
 
458
+ with gr.Row():
459
+ audio_steps = gr.Slider(
460
+ minimum=10,
461
+ maximum=50,
462
+ step=5,
463
+ value=25,
464
+ label="๐ŸŽš๏ธ Audio Steps",
465
+ info="More steps = better quality"
466
+ )
467
+ audio_cfg_strength = gr.Slider(
468
+ minimum=1.0,
469
+ maximum=10.0,
470
+ step=0.5,
471
+ value=4.5,
472
+ label="๐ŸŽ›๏ธ Audio Guidance",
473
+ info="Strength of prompt guidance"
474
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
475
 
476
+ # Toggle audio settings visibility
477
+ enable_audio.change(
478
+ fn=lambda x: gr.update(visible=x),
479
+ inputs=[enable_audio],
480
+ outputs=[audio_settings_group]
481
  )
482
 
483
+ generate_button = gr.Button(
484
+ "๐ŸŽฌ Generate Video with Audio",
485
+ variant="primary",
486
+ elem_classes="generate-btn"
487
+ )
 
 
 
 
 
 
 
 
 
488
 
489
+ with gr.Column(scale=1):
490
+ video_output = gr.Video(
491
+ label="Generated Video with Audio",
492
+ autoplay=True,
493
+ interactive=False,
494
+ elem_classes="video-output"
495
+ )
496
+
497
+ gr.HTML("""
498
+ <div style="text-align: center; margin-top: 20px; color: #6b7280;">
499
+ <p>๐Ÿ’ก Tip: The same prompt is used for both video and audio generation!</p>
500
+ <p>๐ŸŽง Audio is automatically matched to the visual content</p>
501
+ </div>
502
+ """)
503
+
504
+ gr.Markdown("### ๐ŸŽฏ Example Prompts")
505
+ gr.Examples(
506
+ examples=examples,
507
+ fn=generate_with_example,
508
+ inputs=[prompt, nag_negative_prompt, nag_scale],
509
+ outputs=[
510
+ video_output,
511
+ height_input, width_input, duration_seconds_input,
512
+ steps_slider, seed_input,
513
+ enable_audio, audio_negative_prompt, audio_steps, audio_cfg_strength
514
+ ],
515
+ cache_examples=False # Changed from "lazy" to False
516
+ )
517
 
518
  # Connect UI elements
519
  ui_inputs = [