Spaces:

rocketmandrey
/

phunter_space

Sleeping

App Files Files Community

rocketmandrey commited on Jun 23

Commit

132fb5e

1 Parent(s): f0b6296

Initial Space setup with MeiGen MultiTalk demo

Browse files

Files changed (4) hide show

.DS_Store +0 -0
README.md +54 -7
app.py +140 -0
requirements.txt +20 -0

.DS_Store ADDED Viewed

Binary file (6.15 kB). View file

README.md CHANGED Viewed

@@ -1,14 +1,61 @@
 ---
-title: Phunter Space
-emoji: 🏆
 colorFrom: blue
-colorTo: green
 sdk: gradio
-sdk_version: 5.34.2
 app_file: app.py
 pinned: false
-license: mit
-short_description: phunter_space
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: MeiGen MultiTalk Demo
+emoji: 🎬
 colorFrom: blue
+colorTo: red
 sdk: gradio
+sdk_version: 4.19.2
 app_file: app.py
 pinned: false
+license: apache-2.0
+hf_oauth: true
+models:
+  - MeiGen-AI/MeiGen-MultiTalk
+  - TencentGameMate/chinese-wav2vec2-base
+tags:
+  - audio
+  - video
+  - image
+  - text-to-video
 ---
+# MeiGen-MultiTalk
+Audio-driven multi-person conversational video generation system based on [MeiGen-AI/MeiGen-MultiTalk](https://huggingface.co/MeiGen-AI/MeiGen-MultiTalk).
+## Features
+- 💬 Realistic Conversations - Support single & multi-person generation
+- 👥 Interactive Character Control - Direct virtual humans via prompts
+- 🎤 Generalization Performance - Support generation of cartoon characters and singing
+- 📺 Resolution Flexibility - 480p & 720p output at arbitrary aspect ratios
+- ⏱️ Long Video Generation - Support videos up to 15 seconds
+## Setup
+1. Install dependencies:
+```bash
+pip install -r requirements.txt
+```
+2. Download required models:
+```bash
+huggingface-cli download MeiGen-AI/MeiGen-MultiTalk --local-dir ./weights/MeiGen-MultiTalk
+huggingface-cli download TencentGameMate/chinese-wav2vec2-base --local-dir ./weights/chinese-wav2vec2-base
+```
+## Usage
+See the examples directory for sample configurations:
+- `examples/single_example.json` - Single person video generation
+- `examples/multi_example.json` - Multi-person conversation generation
+## License
+This project is licensed under the Apache License 2.0 - see the LICENSE file for details.
+## Configuration Options
+- `image`: Path to reference image
+- `audio`: Path to audio file(s)
+- `

app.py ADDED Viewed

	@@ -0,0 +1,140 @@

+import os
+import json
+import gradio as gr
+from PIL import Image
+import torch
+from huggingface_hub import hf_hub_download
+import tempfile
+# Constants
+MODEL_ID = "MeiGen-AI/MeiGen-MultiTalk"
+DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
+def load_models():
+    """Load required models"""
+    # Here we'll add model loading logic
+    pass
+def process_video(
+    image,
+    audio_files,
+    prompt,
+    resolution="480p",
+    audio_cfg=4.0,
+    cfg=7.5,
+    seed=42,
+    max_duration=15
+):
+    """Process video generation"""
+    try:
+        # Create temporary directory for processing
+        with tempfile.TemporaryDirectory() as temp_dir:
+            # Save uploaded image
+            image_path = os.path.join(temp_dir, "reference.jpg")
+            image.save(image_path)
+            # Save uploaded audio files
+            audio_paths = []
+            for audio in audio_files:
+                audio_path = os.path.join(temp_dir, f"audio_{len(audio_paths)}.wav")
+                audio_paths.append(audio_path)
+                # Save audio file
+                with open(audio_path, "wb") as f:
+                    f.write(audio)
+            # Create configuration
+            config = {
+                "image": image_path,
+                "audio": audio_paths[0] if len(audio_paths) == 1 else audio_paths,
+                "prompt": prompt,
+                "resolution": resolution,
+                "audio_cfg": float(audio_cfg),
+                "cfg": float(cfg),
+                "seed": int(seed),
+                "max_duration": int(max_duration)
+            }
+            # Save configuration
+            config_path = os.path.join(temp_dir, "config.json")
+            with open(config_path, "w") as f:
+                json.dump(config, f, indent=2)
+            # Here we'll add video generation logic
+            # For now, return a message
+            return "Video generation will be implemented here"
+    except Exception as e:
+        return f"Error: {str(e)}"
+# Create Gradio interface
+with gr.Blocks(title="MeiGen-MultiTalk Demo") as demo:
+    gr.Markdown("""
+    # MeiGen-MultiTalk Demo
+    Generate talking head videos from images and audio files.
+    """)
+    with gr.Row():
+        with gr.Column():
+            image_input = gr.Image(label="Reference Image", type="pil")
+            audio_input = gr.Audio(label="Audio File(s)", type="binary", multiple=True)
+            prompt_input = gr.Textbox(label="Prompt", placeholder="Describe the desired video...")
+            with gr.Row():
+                resolution_input = gr.Dropdown(
+                    choices=["480p", "720p"],
+                    value="480p",
+                    label="Resolution"
+                )
+                audio_cfg_input = gr.Slider(
+                    minimum=1.0,
+                    maximum=10.0,
+                    value=4.0,
+                    step=0.1,
+                    label="Audio CFG"
+                )
+            with gr.Row():
+                cfg_input = gr.Slider(
+                    minimum=1.0,
+                    maximum=15.0,
+                    value=7.5,
+                    step=0.1,
+                    label="Guidance Scale"
+                )
+                seed_input = gr.Number(
+                    value=42,
+                    label="Random Seed",
+                    precision=0
+                )
+            max_duration_input = gr.Slider(
+                minimum=1,
+                maximum=15,
+                value=10,
+                step=1,
+                label="Max Duration (seconds)"
+            )
+            generate_btn = gr.Button("Generate Video")
+        with gr.Column():
+            output = gr.Video(label="Generated Video")
+    generate_btn.click(
+        fn=process_video,
+        inputs=[
+            image_input,
+            audio_input,
+            prompt_input,
+            resolution_input,
+            audio_cfg_input,
+            cfg_input,
+            seed_input,
+            max_duration_input
+        ],
+        outputs=output
+    )
+# Launch locally if running directly
+if __name__ == "__main__":
+    demo.launch()

requirements.txt ADDED Viewed

	@@ -0,0 +1,20 @@

+torch>=2.0.0
+torchvision
+torchaudio
+transformers>=4.30.0
+diffusers
+accelerate
+safetensors
+opencv-python
+numpy
+scipy
+tqdm
+einops
+omegaconf
+huggingface-hub
+moviepy
+soundfile
+librosa
+gradio>=4.0.0
+python-dotenv
+pillow