rocketmandrey commited on
Commit
3dcfbcf
Β·
verified Β·
1 Parent(s): 8f567ff

Upload folder using huggingface_hub

Browse files
Files changed (2) hide show
  1. app.py +68 -110
  2. requirements.txt +1 -1
app.py CHANGED
@@ -1,8 +1,7 @@
1
  import gradio as gr
2
  import numpy as np
3
  from PIL import Image
4
- import tempfile
5
- import os
6
 
7
  # Configuration
8
  MAX_SEED = np.iinfo(np.int32).max
@@ -10,14 +9,13 @@ MAX_SEED = np.iinfo(np.int32).max
10
  def generate_video(
11
  image,
12
  audio,
13
- prompt="A person talking",
14
- resolution="480p",
15
- audio_cfg=2.5,
16
- guidance_scale=5.0,
17
- num_inference_steps=25,
18
- seed=42,
19
- max_duration=10,
20
- progress=gr.Progress()
21
  ):
22
  """Generate talking video from image and audio"""
23
 
@@ -28,19 +26,9 @@ def generate_video(
28
  return None, "❌ Please upload an audio file"
29
 
30
  try:
31
- progress(0, "Initializing...")
32
-
33
- # For now, return a placeholder message since we need to implement the actual model
34
- # In a real implementation, you would load the MeiGen-MultiTalk model here
35
-
36
- progress(0.5, "Processing audio and image...")
37
-
38
  # Simulate processing time
39
- import time
40
  time.sleep(2)
41
 
42
- progress(1.0, "Video generation complete!")
43
-
44
  return None, f"""βœ… Video generation request processed!
45
 
46
  **Settings:**
@@ -66,129 +54,103 @@ The model files are not included in this demo due to size constraints."""
66
  def randomize_seed():
67
  return np.random.randint(0, MAX_SEED)
68
 
69
- # Gradio Interface
70
- with gr.Blocks(
71
- theme=gr.themes.Soft(),
72
- title="MeiGen-MultiTalk Demo",
73
- css="""
74
- .main-header {
75
- text-align: center;
76
- background: linear-gradient(45deg, #ff6b6b, #4ecdc4);
77
- -webkit-background-clip: text;
78
- -webkit-text-fill-color: transparent;
79
- background-clip: text;
80
- font-size: 2.5em;
81
- font-weight: bold;
82
- margin-bottom: 0.5em;
83
- }
84
- .subtitle {
85
- text-align: center;
86
- color: #666;
87
- margin-bottom: 2em;
88
- }
89
- """
90
- ) as demo:
91
 
92
  gr.HTML("""
93
- <div class="main-header">🎬 MeiGen-MultiTalk Demo</div>
94
- <p class="subtitle">Generate talking videos from images and audio using AI</p>
 
 
95
  """)
96
 
97
  with gr.Row():
98
  # Input Column
99
- with gr.Column(scale=1):
100
  gr.Markdown("### πŸ“ Input Files")
101
 
102
  image_input = gr.Image(
103
  label="Reference Image",
104
- type="pil",
105
- height=300
106
  )
107
 
108
  audio_input = gr.Audio(
109
- label="Audio File",
110
- type="filepath"
111
  )
112
 
113
  prompt_input = gr.Textbox(
114
  label="Prompt",
115
  placeholder="A person talking naturally...",
116
- value="A person talking",
117
- lines=2
118
  )
119
 
120
  gr.Markdown("### βš™οΈ Generation Settings")
121
 
122
- with gr.Row():
123
- resolution = gr.Dropdown(
124
- choices=["480p", "720p"],
125
- value="480p",
126
- label="Resolution"
127
- )
128
-
129
- max_duration = gr.Slider(
130
- minimum=1,
131
- maximum=15,
132
- value=10,
133
- step=1,
134
- label="Max Duration (seconds)"
135
- )
136
 
137
- with gr.Row():
138
- audio_cfg = gr.Slider(
139
- minimum=1.0,
140
- maximum=5.0,
141
- value=2.5,
142
- step=0.1,
143
- label="Audio CFG Scale"
144
- )
145
-
146
- guidance_scale = gr.Slider(
147
- minimum=1.0,
148
- maximum=10.0,
149
- value=5.0,
150
- step=0.5,
151
- label="Guidance Scale"
152
- )
153
 
154
- with gr.Row():
155
- num_inference_steps = gr.Slider(
156
- minimum=10,
157
- maximum=50,
158
- value=25,
159
- step=1,
160
- label="Inference Steps"
161
- )
162
-
163
- seed = gr.Number(
164
- value=42,
165
- minimum=0,
166
- maximum=MAX_SEED,
167
- label="Seed"
168
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
169
 
170
  with gr.Row():
171
- randomize_btn = gr.Button("🎲 Randomize Seed", variant="secondary")
172
- generate_btn = gr.Button("🎬 Generate Video", variant="primary", size="lg")
173
 
174
  # Output Column
175
- with gr.Column(scale=1):
176
  gr.Markdown("### πŸŽ₯ Generated Video")
177
 
178
  video_output = gr.Video(
179
- label="Generated Video",
180
- height=400
181
  )
182
 
183
  result_text = gr.Textbox(
184
  label="Generation Log",
185
- lines=8,
186
- max_lines=15
187
  )
188
 
189
- # Examples
190
- gr.Markdown("### πŸ“‹ Tips for Best Results")
191
  gr.Markdown("""
 
192
  - **Image**: Use clear, front-facing photos with good lighting
193
  - **Audio**: Ensure clean audio without background noise
194
  - **Prompt**: Be specific about the desired talking style
@@ -219,10 +181,6 @@ with gr.Blocks(
219
  )
220
 
221
  if __name__ == "__main__":
222
- demo.launch(
223
- share=False,
224
- server_port=7860,
225
- show_error=True
226
- )
227
 
228
  # Force restart - updated at 2025-01-23
 
1
  import gradio as gr
2
  import numpy as np
3
  from PIL import Image
4
+ import time
 
5
 
6
  # Configuration
7
  MAX_SEED = np.iinfo(np.int32).max
 
9
  def generate_video(
10
  image,
11
  audio,
12
+ prompt,
13
+ resolution,
14
+ audio_cfg,
15
+ guidance_scale,
16
+ num_inference_steps,
17
+ seed,
18
+ max_duration
 
19
  ):
20
  """Generate talking video from image and audio"""
21
 
 
26
  return None, "❌ Please upload an audio file"
27
 
28
  try:
 
 
 
 
 
 
 
29
  # Simulate processing time
 
30
  time.sleep(2)
31
 
 
 
32
  return None, f"""βœ… Video generation request processed!
33
 
34
  **Settings:**
 
54
  def randomize_seed():
55
  return np.random.randint(0, MAX_SEED)
56
 
57
+ # Simplified Gradio Interface
58
+ with gr.Blocks(title="MeiGen-MultiTalk Demo") as demo:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
59
 
60
  gr.HTML("""
61
+ <div style="text-align: center; margin-bottom: 20px;">
62
+ <h1>🎬 MeiGen-MultiTalk Demo</h1>
63
+ <p>Generate talking videos from images and audio using AI</p>
64
+ </div>
65
  """)
66
 
67
  with gr.Row():
68
  # Input Column
69
+ with gr.Column():
70
  gr.Markdown("### πŸ“ Input Files")
71
 
72
  image_input = gr.Image(
73
  label="Reference Image",
74
+ type="pil"
 
75
  )
76
 
77
  audio_input = gr.Audio(
78
+ label="Audio File"
 
79
  )
80
 
81
  prompt_input = gr.Textbox(
82
  label="Prompt",
83
  placeholder="A person talking naturally...",
84
+ value="A person talking"
 
85
  )
86
 
87
  gr.Markdown("### βš™οΈ Generation Settings")
88
 
89
+ resolution = gr.Dropdown(
90
+ choices=["480p", "720p"],
91
+ value="480p",
92
+ label="Resolution"
93
+ )
 
 
 
 
 
 
 
 
 
94
 
95
+ max_duration = gr.Slider(
96
+ minimum=1,
97
+ maximum=15,
98
+ value=10,
99
+ step=1,
100
+ label="Max Duration (seconds)"
101
+ )
 
 
 
 
 
 
 
 
 
102
 
103
+ audio_cfg = gr.Slider(
104
+ minimum=1.0,
105
+ maximum=5.0,
106
+ value=2.5,
107
+ step=0.1,
108
+ label="Audio CFG Scale"
109
+ )
110
+
111
+ guidance_scale = gr.Slider(
112
+ minimum=1.0,
113
+ maximum=10.0,
114
+ value=5.0,
115
+ step=0.5,
116
+ label="Guidance Scale"
117
+ )
118
+
119
+ num_inference_steps = gr.Slider(
120
+ minimum=10,
121
+ maximum=50,
122
+ value=25,
123
+ step=1,
124
+ label="Inference Steps"
125
+ )
126
+
127
+ seed = gr.Number(
128
+ value=42,
129
+ minimum=0,
130
+ maximum=MAX_SEED,
131
+ label="Seed"
132
+ )
133
 
134
  with gr.Row():
135
+ randomize_btn = gr.Button("🎲 Randomize Seed")
136
+ generate_btn = gr.Button("🎬 Generate Video", variant="primary")
137
 
138
  # Output Column
139
+ with gr.Column():
140
  gr.Markdown("### πŸŽ₯ Generated Video")
141
 
142
  video_output = gr.Video(
143
+ label="Generated Video"
 
144
  )
145
 
146
  result_text = gr.Textbox(
147
  label="Generation Log",
148
+ lines=10
 
149
  )
150
 
151
+ # Tips
 
152
  gr.Markdown("""
153
+ ### πŸ“‹ Tips for Best Results
154
  - **Image**: Use clear, front-facing photos with good lighting
155
  - **Audio**: Ensure clean audio without background noise
156
  - **Prompt**: Be specific about the desired talking style
 
181
  )
182
 
183
  if __name__ == "__main__":
184
+ demo.launch(share=True)
 
 
 
 
185
 
186
  # Force restart - updated at 2025-01-23
requirements.txt CHANGED
@@ -1,4 +1,4 @@
1
- gradio==4.44.1
2
  torch
3
  pillow
4
  numpy
 
1
+ gradio==4.36.1
2
  torch
3
  pillow
4
  numpy