Spaces:
Sleeping
Sleeping
Upload folder using huggingface_hub
Browse files- README.md +4 -10
- app.py +101 -20
- requirements.txt +2 -4
README.md
CHANGED
@@ -3,8 +3,8 @@ title: MeiGen MultiTalk Demo
|
|
3 |
emoji: π¬
|
4 |
colorFrom: red
|
5 |
colorTo: blue
|
6 |
-
sdk:
|
7 |
-
sdk_version:
|
8 |
app_file: app.py
|
9 |
pinned: false
|
10 |
license: apache-2.0
|
@@ -27,20 +27,14 @@ This is a demo of MeiGen-MultiTalk, an audio-driven multi-person conversational
|
|
27 |
1. Upload a reference image (photo of person(s) who will be speaking)
|
28 |
2. Upload an audio file
|
29 |
3. Enter a prompt describing the desired video
|
30 |
-
4.
|
31 |
-
- Resolution: Video quality (480p or 720p)
|
32 |
-
- Audio CFG: Controls strength of audio influence
|
33 |
-
- Guidance Scale: Controls adherence to prompt
|
34 |
-
- Random Seed: For reproducible results
|
35 |
-
- Max Duration: Video length in seconds
|
36 |
-
5. Click "Generate Video" and wait for the result
|
37 |
|
38 |
## Tips
|
39 |
|
40 |
- Use clear, front-facing photos for best results
|
41 |
- Ensure good audio quality without background noise
|
42 |
- Keep prompts clear and specific
|
43 |
-
-
|
44 |
|
45 |
## Limitations
|
46 |
|
|
|
3 |
emoji: π¬
|
4 |
colorFrom: red
|
5 |
colorTo: blue
|
6 |
+
sdk: streamlit
|
7 |
+
sdk_version: 1.28.1
|
8 |
app_file: app.py
|
9 |
pinned: false
|
10 |
license: apache-2.0
|
|
|
27 |
1. Upload a reference image (photo of person(s) who will be speaking)
|
28 |
2. Upload an audio file
|
29 |
3. Enter a prompt describing the desired video
|
30 |
+
4. Click "Generate Video" to process
|
|
|
|
|
|
|
|
|
|
|
|
|
31 |
|
32 |
## Tips
|
33 |
|
34 |
- Use clear, front-facing photos for best results
|
35 |
- Ensure good audio quality without background noise
|
36 |
- Keep prompts clear and specific
|
37 |
+
- Supported formats: PNG, JPG, JPEG for images; MP3, WAV, OGG for audio
|
38 |
|
39 |
## Limitations
|
40 |
|
app.py
CHANGED
@@ -1,30 +1,111 @@
|
|
1 |
-
import
|
2 |
import time
|
|
|
3 |
|
4 |
-
|
5 |
-
"
|
6 |
-
|
7 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8 |
|
9 |
-
|
|
|
|
|
10 |
|
11 |
-
return f"""β
|
12 |
|
13 |
-
**Input received:**
|
|
|
|
|
|
|
14 |
|
15 |
-
**Note:** This is a
|
16 |
-
|
|
|
|
|
|
|
17 |
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
26 |
|
27 |
-
|
28 |
-
|
29 |
|
30 |
# Minimal test version
|
|
|
1 |
+
import streamlit as st
|
2 |
import time
|
3 |
+
from PIL import Image
|
4 |
|
5 |
+
st.set_page_config(
|
6 |
+
page_title="MeiGen-MultiTalk Demo",
|
7 |
+
page_icon="π¬",
|
8 |
+
layout="centered"
|
9 |
+
)
|
10 |
+
|
11 |
+
def process_inputs(image, audio, prompt):
|
12 |
+
"""Process the inputs and return a result"""
|
13 |
+
|
14 |
+
if image is None:
|
15 |
+
return "β Please upload an image"
|
16 |
+
|
17 |
+
if audio is None:
|
18 |
+
return "β Please upload an audio file"
|
19 |
+
|
20 |
+
if not prompt:
|
21 |
+
return "β Please enter a prompt"
|
22 |
|
23 |
+
# Simulate processing
|
24 |
+
with st.spinner("Processing..."):
|
25 |
+
time.sleep(2)
|
26 |
|
27 |
+
return f"""β
Video generation request processed!
|
28 |
|
29 |
+
**Input received:**
|
30 |
+
- Image: β
Uploaded ({image.size} pixels)
|
31 |
+
- Audio: β
Uploaded
|
32 |
+
- Prompt: {prompt}
|
33 |
|
34 |
+
**Note:** This is a demo interface. The actual video generation would require:
|
35 |
+
1. Loading the MeiGen-MultiTalk model
|
36 |
+
2. Processing the input image and audio
|
37 |
+
3. Generating the video using the model
|
38 |
+
4. Returning the generated video file
|
39 |
|
40 |
+
The model files are not included in this demo due to size constraints.
|
41 |
+
Ready for implementation! π¬"""
|
42 |
+
|
43 |
+
# Main app
|
44 |
+
st.title("π¬ MeiGen-MultiTalk Demo")
|
45 |
+
st.markdown("Generate talking videos from images and audio using AI")
|
46 |
+
|
47 |
+
# Create columns for layout
|
48 |
+
col1, col2 = st.columns(2)
|
49 |
+
|
50 |
+
with col1:
|
51 |
+
st.header("π Input Files")
|
52 |
+
|
53 |
+
# Image upload
|
54 |
+
uploaded_image = st.file_uploader(
|
55 |
+
"Choose a reference image",
|
56 |
+
type=['png', 'jpg', 'jpeg'],
|
57 |
+
help="Upload a clear, front-facing photo"
|
58 |
+
)
|
59 |
+
|
60 |
+
if uploaded_image is not None:
|
61 |
+
image = Image.open(uploaded_image)
|
62 |
+
st.image(image, caption="Uploaded Image", use_column_width=True)
|
63 |
+
|
64 |
+
# Audio upload
|
65 |
+
uploaded_audio = st.file_uploader(
|
66 |
+
"Choose an audio file",
|
67 |
+
type=['mp3', 'wav', 'ogg'],
|
68 |
+
help="Upload clear audio without background noise"
|
69 |
+
)
|
70 |
+
|
71 |
+
if uploaded_audio is not None:
|
72 |
+
st.audio(uploaded_audio, format='audio/wav')
|
73 |
+
|
74 |
+
# Prompt input
|
75 |
+
prompt = st.text_area(
|
76 |
+
"Enter a prompt",
|
77 |
+
value="A person talking",
|
78 |
+
placeholder="Describe the desired video...",
|
79 |
+
help="Be specific about the desired talking style"
|
80 |
+
)
|
81 |
+
|
82 |
+
with col2:
|
83 |
+
st.header("π₯ Results")
|
84 |
+
|
85 |
+
if st.button("π¬ Generate Video", type="primary"):
|
86 |
+
if uploaded_image is not None and uploaded_audio is not None and prompt:
|
87 |
+
result = process_inputs(
|
88 |
+
Image.open(uploaded_image),
|
89 |
+
uploaded_audio,
|
90 |
+
prompt
|
91 |
+
)
|
92 |
+
st.success("Processing complete!")
|
93 |
+
st.text_area("Generation Log", result, height=300)
|
94 |
+
else:
|
95 |
+
st.error("Please upload both image and audio files, and enter a prompt")
|
96 |
+
|
97 |
+
# Tips section
|
98 |
+
st.markdown("---")
|
99 |
+
st.markdown("### π Tips for Best Results")
|
100 |
+
st.markdown("""
|
101 |
+
- **Image**: Use clear, front-facing photos with good lighting
|
102 |
+
- **Audio**: Ensure clean audio without background noise
|
103 |
+
- **Prompt**: Be specific about the desired talking style
|
104 |
+
- **Format**: Supported image formats: PNG, JPG, JPEG
|
105 |
+
- **Audio**: Supported audio formats: MP3, WAV, OGG
|
106 |
+
""")
|
107 |
|
108 |
+
st.markdown("---")
|
109 |
+
st.markdown("*This is a demo interface ready for model integration.*")
|
110 |
|
111 |
# Minimal test version
|
requirements.txt
CHANGED
@@ -1,5 +1,3 @@
|
|
1 |
-
|
2 |
-
torch
|
3 |
pillow
|
4 |
-
numpy
|
5 |
-
spaces
|
|
|
1 |
+
streamlit
|
|
|
2 |
pillow
|
3 |
+
numpy
|
|