Spaces:
Sleeping
Sleeping
Upload folder using huggingface_hub
Browse files- app.py +68 -110
- requirements.txt +1 -1
app.py
CHANGED
@@ -1,8 +1,7 @@
|
|
1 |
import gradio as gr
|
2 |
import numpy as np
|
3 |
from PIL import Image
|
4 |
-
import
|
5 |
-
import os
|
6 |
|
7 |
# Configuration
|
8 |
MAX_SEED = np.iinfo(np.int32).max
|
@@ -10,14 +9,13 @@ MAX_SEED = np.iinfo(np.int32).max
|
|
10 |
def generate_video(
|
11 |
image,
|
12 |
audio,
|
13 |
-
prompt
|
14 |
-
resolution
|
15 |
-
audio_cfg
|
16 |
-
guidance_scale
|
17 |
-
num_inference_steps
|
18 |
-
seed
|
19 |
-
max_duration
|
20 |
-
progress=gr.Progress()
|
21 |
):
|
22 |
"""Generate talking video from image and audio"""
|
23 |
|
@@ -28,19 +26,9 @@ def generate_video(
|
|
28 |
return None, "β Please upload an audio file"
|
29 |
|
30 |
try:
|
31 |
-
progress(0, "Initializing...")
|
32 |
-
|
33 |
-
# For now, return a placeholder message since we need to implement the actual model
|
34 |
-
# In a real implementation, you would load the MeiGen-MultiTalk model here
|
35 |
-
|
36 |
-
progress(0.5, "Processing audio and image...")
|
37 |
-
|
38 |
# Simulate processing time
|
39 |
-
import time
|
40 |
time.sleep(2)
|
41 |
|
42 |
-
progress(1.0, "Video generation complete!")
|
43 |
-
|
44 |
return None, f"""β
Video generation request processed!
|
45 |
|
46 |
**Settings:**
|
@@ -66,129 +54,103 @@ The model files are not included in this demo due to size constraints."""
|
|
66 |
def randomize_seed():
|
67 |
return np.random.randint(0, MAX_SEED)
|
68 |
|
69 |
-
# Gradio Interface
|
70 |
-
with gr.Blocks(
|
71 |
-
theme=gr.themes.Soft(),
|
72 |
-
title="MeiGen-MultiTalk Demo",
|
73 |
-
css="""
|
74 |
-
.main-header {
|
75 |
-
text-align: center;
|
76 |
-
background: linear-gradient(45deg, #ff6b6b, #4ecdc4);
|
77 |
-
-webkit-background-clip: text;
|
78 |
-
-webkit-text-fill-color: transparent;
|
79 |
-
background-clip: text;
|
80 |
-
font-size: 2.5em;
|
81 |
-
font-weight: bold;
|
82 |
-
margin-bottom: 0.5em;
|
83 |
-
}
|
84 |
-
.subtitle {
|
85 |
-
text-align: center;
|
86 |
-
color: #666;
|
87 |
-
margin-bottom: 2em;
|
88 |
-
}
|
89 |
-
"""
|
90 |
-
) as demo:
|
91 |
|
92 |
gr.HTML("""
|
93 |
-
<div
|
94 |
-
|
|
|
|
|
95 |
""")
|
96 |
|
97 |
with gr.Row():
|
98 |
# Input Column
|
99 |
-
with gr.Column(
|
100 |
gr.Markdown("### π Input Files")
|
101 |
|
102 |
image_input = gr.Image(
|
103 |
label="Reference Image",
|
104 |
-
type="pil"
|
105 |
-
height=300
|
106 |
)
|
107 |
|
108 |
audio_input = gr.Audio(
|
109 |
-
label="Audio File"
|
110 |
-
type="filepath"
|
111 |
)
|
112 |
|
113 |
prompt_input = gr.Textbox(
|
114 |
label="Prompt",
|
115 |
placeholder="A person talking naturally...",
|
116 |
-
value="A person talking"
|
117 |
-
lines=2
|
118 |
)
|
119 |
|
120 |
gr.Markdown("### βοΈ Generation Settings")
|
121 |
|
122 |
-
|
123 |
-
|
124 |
-
|
125 |
-
|
126 |
-
|
127 |
-
)
|
128 |
-
|
129 |
-
max_duration = gr.Slider(
|
130 |
-
minimum=1,
|
131 |
-
maximum=15,
|
132 |
-
value=10,
|
133 |
-
step=1,
|
134 |
-
label="Max Duration (seconds)"
|
135 |
-
)
|
136 |
|
137 |
-
|
138 |
-
|
139 |
-
|
140 |
-
|
141 |
-
|
142 |
-
|
143 |
-
|
144 |
-
)
|
145 |
-
|
146 |
-
guidance_scale = gr.Slider(
|
147 |
-
minimum=1.0,
|
148 |
-
maximum=10.0,
|
149 |
-
value=5.0,
|
150 |
-
step=0.5,
|
151 |
-
label="Guidance Scale"
|
152 |
-
)
|
153 |
|
154 |
-
|
155 |
-
|
156 |
-
|
157 |
-
|
158 |
-
|
159 |
-
|
160 |
-
|
161 |
-
|
162 |
-
|
163 |
-
|
164 |
-
|
165 |
-
|
166 |
-
|
167 |
-
|
168 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
169 |
|
170 |
with gr.Row():
|
171 |
-
randomize_btn = gr.Button("π² Randomize Seed"
|
172 |
-
generate_btn = gr.Button("π¬ Generate Video", variant="primary"
|
173 |
|
174 |
# Output Column
|
175 |
-
with gr.Column(
|
176 |
gr.Markdown("### π₯ Generated Video")
|
177 |
|
178 |
video_output = gr.Video(
|
179 |
-
label="Generated Video"
|
180 |
-
height=400
|
181 |
)
|
182 |
|
183 |
result_text = gr.Textbox(
|
184 |
label="Generation Log",
|
185 |
-
lines=
|
186 |
-
max_lines=15
|
187 |
)
|
188 |
|
189 |
-
#
|
190 |
-
gr.Markdown("### π Tips for Best Results")
|
191 |
gr.Markdown("""
|
|
|
192 |
- **Image**: Use clear, front-facing photos with good lighting
|
193 |
- **Audio**: Ensure clean audio without background noise
|
194 |
- **Prompt**: Be specific about the desired talking style
|
@@ -219,10 +181,6 @@ with gr.Blocks(
|
|
219 |
)
|
220 |
|
221 |
if __name__ == "__main__":
|
222 |
-
demo.launch(
|
223 |
-
share=False,
|
224 |
-
server_port=7860,
|
225 |
-
show_error=True
|
226 |
-
)
|
227 |
|
228 |
# Force restart - updated at 2025-01-23
|
|
|
1 |
import gradio as gr
|
2 |
import numpy as np
|
3 |
from PIL import Image
|
4 |
+
import time
|
|
|
5 |
|
6 |
# Configuration
|
7 |
MAX_SEED = np.iinfo(np.int32).max
|
|
|
9 |
def generate_video(
|
10 |
image,
|
11 |
audio,
|
12 |
+
prompt,
|
13 |
+
resolution,
|
14 |
+
audio_cfg,
|
15 |
+
guidance_scale,
|
16 |
+
num_inference_steps,
|
17 |
+
seed,
|
18 |
+
max_duration
|
|
|
19 |
):
|
20 |
"""Generate talking video from image and audio"""
|
21 |
|
|
|
26 |
return None, "β Please upload an audio file"
|
27 |
|
28 |
try:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
29 |
# Simulate processing time
|
|
|
30 |
time.sleep(2)
|
31 |
|
|
|
|
|
32 |
return None, f"""β
Video generation request processed!
|
33 |
|
34 |
**Settings:**
|
|
|
54 |
def randomize_seed():
|
55 |
return np.random.randint(0, MAX_SEED)
|
56 |
|
57 |
+
# Simplified Gradio Interface
|
58 |
+
with gr.Blocks(title="MeiGen-MultiTalk Demo") as demo:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
59 |
|
60 |
gr.HTML("""
|
61 |
+
<div style="text-align: center; margin-bottom: 20px;">
|
62 |
+
<h1>π¬ MeiGen-MultiTalk Demo</h1>
|
63 |
+
<p>Generate talking videos from images and audio using AI</p>
|
64 |
+
</div>
|
65 |
""")
|
66 |
|
67 |
with gr.Row():
|
68 |
# Input Column
|
69 |
+
with gr.Column():
|
70 |
gr.Markdown("### π Input Files")
|
71 |
|
72 |
image_input = gr.Image(
|
73 |
label="Reference Image",
|
74 |
+
type="pil"
|
|
|
75 |
)
|
76 |
|
77 |
audio_input = gr.Audio(
|
78 |
+
label="Audio File"
|
|
|
79 |
)
|
80 |
|
81 |
prompt_input = gr.Textbox(
|
82 |
label="Prompt",
|
83 |
placeholder="A person talking naturally...",
|
84 |
+
value="A person talking"
|
|
|
85 |
)
|
86 |
|
87 |
gr.Markdown("### βοΈ Generation Settings")
|
88 |
|
89 |
+
resolution = gr.Dropdown(
|
90 |
+
choices=["480p", "720p"],
|
91 |
+
value="480p",
|
92 |
+
label="Resolution"
|
93 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
94 |
|
95 |
+
max_duration = gr.Slider(
|
96 |
+
minimum=1,
|
97 |
+
maximum=15,
|
98 |
+
value=10,
|
99 |
+
step=1,
|
100 |
+
label="Max Duration (seconds)"
|
101 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
102 |
|
103 |
+
audio_cfg = gr.Slider(
|
104 |
+
minimum=1.0,
|
105 |
+
maximum=5.0,
|
106 |
+
value=2.5,
|
107 |
+
step=0.1,
|
108 |
+
label="Audio CFG Scale"
|
109 |
+
)
|
110 |
+
|
111 |
+
guidance_scale = gr.Slider(
|
112 |
+
minimum=1.0,
|
113 |
+
maximum=10.0,
|
114 |
+
value=5.0,
|
115 |
+
step=0.5,
|
116 |
+
label="Guidance Scale"
|
117 |
+
)
|
118 |
+
|
119 |
+
num_inference_steps = gr.Slider(
|
120 |
+
minimum=10,
|
121 |
+
maximum=50,
|
122 |
+
value=25,
|
123 |
+
step=1,
|
124 |
+
label="Inference Steps"
|
125 |
+
)
|
126 |
+
|
127 |
+
seed = gr.Number(
|
128 |
+
value=42,
|
129 |
+
minimum=0,
|
130 |
+
maximum=MAX_SEED,
|
131 |
+
label="Seed"
|
132 |
+
)
|
133 |
|
134 |
with gr.Row():
|
135 |
+
randomize_btn = gr.Button("π² Randomize Seed")
|
136 |
+
generate_btn = gr.Button("π¬ Generate Video", variant="primary")
|
137 |
|
138 |
# Output Column
|
139 |
+
with gr.Column():
|
140 |
gr.Markdown("### π₯ Generated Video")
|
141 |
|
142 |
video_output = gr.Video(
|
143 |
+
label="Generated Video"
|
|
|
144 |
)
|
145 |
|
146 |
result_text = gr.Textbox(
|
147 |
label="Generation Log",
|
148 |
+
lines=10
|
|
|
149 |
)
|
150 |
|
151 |
+
# Tips
|
|
|
152 |
gr.Markdown("""
|
153 |
+
### π Tips for Best Results
|
154 |
- **Image**: Use clear, front-facing photos with good lighting
|
155 |
- **Audio**: Ensure clean audio without background noise
|
156 |
- **Prompt**: Be specific about the desired talking style
|
|
|
181 |
)
|
182 |
|
183 |
if __name__ == "__main__":
|
184 |
+
demo.launch(share=True)
|
|
|
|
|
|
|
|
|
185 |
|
186 |
# Force restart - updated at 2025-01-23
|
requirements.txt
CHANGED
@@ -1,4 +1,4 @@
|
|
1 |
-
gradio==4.
|
2 |
torch
|
3 |
pillow
|
4 |
numpy
|
|
|
1 |
+
gradio==4.36.1
|
2 |
torch
|
3 |
pillow
|
4 |
numpy
|