rocketmandrey commited on
Commit
fd969a5
Β·
verified Β·
1 Parent(s): dc9a364

Upload folder using huggingface_hub

Browse files
Files changed (3) hide show
  1. README.md +4 -10
  2. app.py +101 -20
  3. requirements.txt +2 -4
README.md CHANGED
@@ -3,8 +3,8 @@ title: MeiGen MultiTalk Demo
3
  emoji: 🎬
4
  colorFrom: red
5
  colorTo: blue
6
- sdk: gradio
7
- sdk_version: 4.44.1
8
  app_file: app.py
9
  pinned: false
10
  license: apache-2.0
@@ -27,20 +27,14 @@ This is a demo of MeiGen-MultiTalk, an audio-driven multi-person conversational
27
  1. Upload a reference image (photo of person(s) who will be speaking)
28
  2. Upload an audio file
29
  3. Enter a prompt describing the desired video
30
- 4. Adjust generation parameters if needed:
31
- - Resolution: Video quality (480p or 720p)
32
- - Audio CFG: Controls strength of audio influence
33
- - Guidance Scale: Controls adherence to prompt
34
- - Random Seed: For reproducible results
35
- - Max Duration: Video length in seconds
36
- 5. Click "Generate Video" and wait for the result
37
 
38
  ## Tips
39
 
40
  - Use clear, front-facing photos for best results
41
  - Ensure good audio quality without background noise
42
  - Keep prompts clear and specific
43
- - For multi-person videos, ensure the reference image shows all speakers clearly
44
 
45
  ## Limitations
46
 
 
3
  emoji: 🎬
4
  colorFrom: red
5
  colorTo: blue
6
+ sdk: streamlit
7
+ sdk_version: 1.28.1
8
  app_file: app.py
9
  pinned: false
10
  license: apache-2.0
 
27
  1. Upload a reference image (photo of person(s) who will be speaking)
28
  2. Upload an audio file
29
  3. Enter a prompt describing the desired video
30
+ 4. Click "Generate Video" to process
 
 
 
 
 
 
31
 
32
  ## Tips
33
 
34
  - Use clear, front-facing photos for best results
35
  - Ensure good audio quality without background noise
36
  - Keep prompts clear and specific
37
+ - Supported formats: PNG, JPG, JPEG for images; MP3, WAV, OGG for audio
38
 
39
  ## Limitations
40
 
app.py CHANGED
@@ -1,30 +1,111 @@
1
- import gradio as gr
2
  import time
 
3
 
4
- def process_text(text):
5
- """Simple text processing function"""
6
- if not text:
7
- return "❌ Please enter some text"
 
 
 
 
 
 
 
 
 
 
 
 
 
8
 
9
- time.sleep(1)
 
 
10
 
11
- return f"""βœ… Text processed successfully!
12
 
13
- **Input received:** {text}
 
 
 
14
 
15
- **Note:** This is a basic demo interface for MeiGen-MultiTalk.
16
- Ready for image and audio integration! 🎬"""
 
 
 
17
 
18
- # Minimal interface
19
- iface = gr.Interface(
20
- fn=process_text,
21
- inputs=gr.Textbox(label="Enter text", placeholder="Type something..."),
22
- outputs=gr.Textbox(label="Result", lines=5),
23
- title="🎬 MeiGen-MultiTalk Demo",
24
- description="Basic demo interface - text processing test"
25
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
 
27
- if __name__ == "__main__":
28
- iface.launch()
29
 
30
  # Minimal test version
 
1
+ import streamlit as st
2
  import time
3
+ from PIL import Image
4
 
5
+ st.set_page_config(
6
+ page_title="MeiGen-MultiTalk Demo",
7
+ page_icon="🎬",
8
+ layout="centered"
9
+ )
10
+
11
+ def process_inputs(image, audio, prompt):
12
+ """Process the inputs and return a result"""
13
+
14
+ if image is None:
15
+ return "❌ Please upload an image"
16
+
17
+ if audio is None:
18
+ return "❌ Please upload an audio file"
19
+
20
+ if not prompt:
21
+ return "❌ Please enter a prompt"
22
 
23
+ # Simulate processing
24
+ with st.spinner("Processing..."):
25
+ time.sleep(2)
26
 
27
+ return f"""βœ… Video generation request processed!
28
 
29
+ **Input received:**
30
+ - Image: βœ… Uploaded ({image.size} pixels)
31
+ - Audio: βœ… Uploaded
32
+ - Prompt: {prompt}
33
 
34
+ **Note:** This is a demo interface. The actual video generation would require:
35
+ 1. Loading the MeiGen-MultiTalk model
36
+ 2. Processing the input image and audio
37
+ 3. Generating the video using the model
38
+ 4. Returning the generated video file
39
 
40
+ The model files are not included in this demo due to size constraints.
41
+ Ready for implementation! 🎬"""
42
+
43
+ # Main app
44
+ st.title("🎬 MeiGen-MultiTalk Demo")
45
+ st.markdown("Generate talking videos from images and audio using AI")
46
+
47
+ # Create columns for layout
48
+ col1, col2 = st.columns(2)
49
+
50
+ with col1:
51
+ st.header("πŸ“ Input Files")
52
+
53
+ # Image upload
54
+ uploaded_image = st.file_uploader(
55
+ "Choose a reference image",
56
+ type=['png', 'jpg', 'jpeg'],
57
+ help="Upload a clear, front-facing photo"
58
+ )
59
+
60
+ if uploaded_image is not None:
61
+ image = Image.open(uploaded_image)
62
+ st.image(image, caption="Uploaded Image", use_column_width=True)
63
+
64
+ # Audio upload
65
+ uploaded_audio = st.file_uploader(
66
+ "Choose an audio file",
67
+ type=['mp3', 'wav', 'ogg'],
68
+ help="Upload clear audio without background noise"
69
+ )
70
+
71
+ if uploaded_audio is not None:
72
+ st.audio(uploaded_audio, format='audio/wav')
73
+
74
+ # Prompt input
75
+ prompt = st.text_area(
76
+ "Enter a prompt",
77
+ value="A person talking",
78
+ placeholder="Describe the desired video...",
79
+ help="Be specific about the desired talking style"
80
+ )
81
+
82
+ with col2:
83
+ st.header("πŸŽ₯ Results")
84
+
85
+ if st.button("🎬 Generate Video", type="primary"):
86
+ if uploaded_image is not None and uploaded_audio is not None and prompt:
87
+ result = process_inputs(
88
+ Image.open(uploaded_image),
89
+ uploaded_audio,
90
+ prompt
91
+ )
92
+ st.success("Processing complete!")
93
+ st.text_area("Generation Log", result, height=300)
94
+ else:
95
+ st.error("Please upload both image and audio files, and enter a prompt")
96
+
97
+ # Tips section
98
+ st.markdown("---")
99
+ st.markdown("### πŸ“‹ Tips for Best Results")
100
+ st.markdown("""
101
+ - **Image**: Use clear, front-facing photos with good lighting
102
+ - **Audio**: Ensure clean audio without background noise
103
+ - **Prompt**: Be specific about the desired talking style
104
+ - **Format**: Supported image formats: PNG, JPG, JPEG
105
+ - **Audio**: Supported audio formats: MP3, WAV, OGG
106
+ """)
107
 
108
+ st.markdown("---")
109
+ st.markdown("*This is a demo interface ready for model integration.*")
110
 
111
  # Minimal test version
requirements.txt CHANGED
@@ -1,5 +1,3 @@
1
- gradio==3.35.2
2
- torch
3
  pillow
4
- numpy
5
- spaces
 
1
+ streamlit
 
2
  pillow
3
+ numpy