Spaces:

rocketmandrey
/

phunter_space

Sleeping

App Files Files Community

rocketmandrey commited on Jun 23

Commit

fd969a5

verified ·

1 Parent(s): dc9a364

Upload folder using huggingface_hub

Browse files

Files changed (3) hide show

README.md +4 -10
app.py +101 -20
requirements.txt +2 -4

README.md CHANGED Viewed

@@ -3,8 +3,8 @@ title: MeiGen MultiTalk Demo
 emoji: 🎬
 colorFrom: red
 colorTo: blue
-sdk: gradio
-sdk_version: 4.44.1
 app_file: app.py
 pinned: false
 license: apache-2.0
@@ -27,20 +27,14 @@ This is a demo of MeiGen-MultiTalk, an audio-driven multi-person conversational
 1. Upload a reference image (photo of person(s) who will be speaking)
 2. Upload an audio file
 3. Enter a prompt describing the desired video
-4. Adjust generation parameters if needed:
-   - Resolution: Video quality (480p or 720p)
-   - Audio CFG: Controls strength of audio influence
-   - Guidance Scale: Controls adherence to prompt
-   - Random Seed: For reproducible results
-   - Max Duration: Video length in seconds
-5. Click "Generate Video" and wait for the result
 ## Tips
 - Use clear, front-facing photos for best results
 - Ensure good audio quality without background noise
 - Keep prompts clear and specific
-- For multi-person videos, ensure the reference image shows all speakers clearly
 ## Limitations

 emoji: 🎬
 colorFrom: red
 colorTo: blue
+sdk: streamlit
+sdk_version: 1.28.1
 app_file: app.py
 pinned: false
 license: apache-2.0
 1. Upload a reference image (photo of person(s) who will be speaking)
 2. Upload an audio file
 3. Enter a prompt describing the desired video
+4. Click "Generate Video" to process
 ## Tips
 - Use clear, front-facing photos for best results
 - Ensure good audio quality without background noise
 - Keep prompts clear and specific
+- Supported formats: PNG, JPG, JPEG for images; MP3, WAV, OGG for audio
 ## Limitations

app.py CHANGED Viewed

@@ -1,30 +1,111 @@
-import gradio as gr
 import time
-def process_text(text):
-    """Simple text processing function"""
-    if not text:
-        return "❌ Please enter some text"
-    time.sleep(1)
-    return f"""✅ Text processed successfully!
-**Input received:** {text}
-**Note:** This is a basic demo interface for MeiGen-MultiTalk.
-Ready for image and audio integration! 🎬"""
-# Minimal interface
-iface = gr.Interface(
-    fn=process_text,
-    inputs=gr.Textbox(label="Enter text", placeholder="Type something..."),
-    outputs=gr.Textbox(label="Result", lines=5),
-    title="🎬 MeiGen-MultiTalk Demo",
-    description="Basic demo interface - text processing test"
-)
-if __name__ == "__main__":
-    iface.launch()
 # Minimal test version

+import streamlit as st
 import time
+from PIL import Image
+st.set_page_config(
+    page_title="MeiGen-MultiTalk Demo",
+    page_icon="🎬",
+    layout="centered"
+)
+def process_inputs(image, audio, prompt):
+    """Process the inputs and return a result"""
+    if image is None:
+        return "❌ Please upload an image"
+    if audio is None:
+        return "❌ Please upload an audio file"
+    if not prompt:
+        return "❌ Please enter a prompt"
+    # Simulate processing
+    with st.spinner("Processing..."):
+        time.sleep(2)
+    return f"""✅ Video generation request processed!
+**Input received:**
+- Image: ✅ Uploaded ({image.size} pixels)
+- Audio: ✅ Uploaded
+- Prompt: {prompt}
+**Note:** This is a demo interface. The actual video generation would require:
+1. Loading the MeiGen-MultiTalk model
+2. Processing the input image and audio
+3. Generating the video using the model
+4. Returning the generated video file
+The model files are not included in this demo due to size constraints.
+Ready for implementation! 🎬"""
+# Main app
+st.title("🎬 MeiGen-MultiTalk Demo")
+st.markdown("Generate talking videos from images and audio using AI")
+# Create columns for layout
+col1, col2 = st.columns(2)
+with col1:
+    st.header("📁 Input Files")
+    # Image upload
+    uploaded_image = st.file_uploader(
+        "Choose a reference image",
+        type=['png', 'jpg', 'jpeg'],
+        help="Upload a clear, front-facing photo"
+    )
+    if uploaded_image is not None:
+        image = Image.open(uploaded_image)
+        st.image(image, caption="Uploaded Image", use_column_width=True)
+    # Audio upload
+    uploaded_audio = st.file_uploader(
+        "Choose an audio file",
+        type=['mp3', 'wav', 'ogg'],
+        help="Upload clear audio without background noise"
+    )
+    if uploaded_audio is not None:
+        st.audio(uploaded_audio, format='audio/wav')
+    # Prompt input
+    prompt = st.text_area(
+        "Enter a prompt",
+        value="A person talking",
+        placeholder="Describe the desired video...",
+        help="Be specific about the desired talking style"
+    )
+with col2:
+    st.header("🎥 Results")
+    if st.button("🎬 Generate Video", type="primary"):
+        if uploaded_image is not None and uploaded_audio is not None and prompt:
+            result = process_inputs(
+                Image.open(uploaded_image),
+                uploaded_audio,
+                prompt
+            )
+            st.success("Processing complete!")
+            st.text_area("Generation Log", result, height=300)
+        else:
+            st.error("Please upload both image and audio files, and enter a prompt")
+# Tips section
+st.markdown("---")
+st.markdown("### 📋 Tips for Best Results")
+st.markdown("""
+- **Image**: Use clear, front-facing photos with good lighting
+- **Audio**: Ensure clean audio without background noise
+- **Prompt**: Be specific about the desired talking style
+- **Format**: Supported image formats: PNG, JPG, JPEG
+- **Audio**: Supported audio formats: MP3, WAV, OGG
+""")
+st.markdown("---")
+st.markdown("*This is a demo interface ready for model integration.*")
 # Minimal test version

requirements.txt CHANGED Viewed

@@ -1,5 +1,3 @@
-gradio==3.35.2
-torch
 pillow
-numpy
-spaces

+streamlit
 pillow
+numpy