Spaces:

rocketmandrey
/

phunter_space

Sleeping

File size: 4,246 Bytes

132fb5e

import os
import json
import gradio as gr
from PIL import Image
import torch
from huggingface_hub import hf_hub_download
import tempfile

# Constants
MODEL_ID = "MeiGen-AI/MeiGen-MultiTalk"
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

def load_models():
    """Load required models"""
    # Here we'll add model loading logic
    pass

def process_video(
    image,
    audio_files,
    prompt,
    resolution="480p",
    audio_cfg=4.0,
    cfg=7.5,
    seed=42,
    max_duration=15
):
    """Process video generation"""
    try:
        # Create temporary directory for processing
        with tempfile.TemporaryDirectory() as temp_dir:
            # Save uploaded image
            image_path = os.path.join(temp_dir, "reference.jpg")
            image.save(image_path)
            
            # Save uploaded audio files
            audio_paths = []
            for audio in audio_files:
                audio_path = os.path.join(temp_dir, f"audio_{len(audio_paths)}.wav")
                audio_paths.append(audio_path)
                # Save audio file
                with open(audio_path, "wb") as f:
                    f.write(audio)
            
            # Create configuration
            config = {
                "image": image_path,
                "audio": audio_paths[0] if len(audio_paths) == 1 else audio_paths,
                "prompt": prompt,
                "resolution": resolution,
                "audio_cfg": float(audio_cfg),
                "cfg": float(cfg),
                "seed": int(seed),
                "max_duration": int(max_duration)
            }
            
            # Save configuration
            config_path = os.path.join(temp_dir, "config.json")
            with open(config_path, "w") as f:
                json.dump(config, f, indent=2)
            
            # Here we'll add video generation logic
            # For now, return a message
            return "Video generation will be implemented here"
            
    except Exception as e:
        return f"Error: {str(e)}"

# Create Gradio interface
with gr.Blocks(title="MeiGen-MultiTalk Demo") as demo:
    gr.Markdown("""
    # MeiGen-MultiTalk Demo
    Generate talking head videos from images and audio files.
    """)
    
    with gr.Row():
        with gr.Column():
            image_input = gr.Image(label="Reference Image", type="pil")
            audio_input = gr.Audio(label="Audio File(s)", type="binary", multiple=True)
            prompt_input = gr.Textbox(label="Prompt", placeholder="Describe the desired video...")
            
            with gr.Row():
                resolution_input = gr.Dropdown(
                    choices=["480p", "720p"],
                    value="480p",
                    label="Resolution"
                )
                audio_cfg_input = gr.Slider(
                    minimum=1.0,
                    maximum=10.0,
                    value=4.0,
                    step=0.1,
                    label="Audio CFG"
                )
                
            with gr.Row():
                cfg_input = gr.Slider(
                    minimum=1.0,
                    maximum=15.0,
                    value=7.5,
                    step=0.1,
                    label="Guidance Scale"
                )
                seed_input = gr.Number(
                    value=42,
                    label="Random Seed",
                    precision=0
                )
                
            max_duration_input = gr.Slider(
                minimum=1,
                maximum=15,
                value=10,
                step=1,
                label="Max Duration (seconds)"
            )
            
            generate_btn = gr.Button("Generate Video")
        
        with gr.Column():
            output = gr.Video(label="Generated Video")
            
    generate_btn.click(
        fn=process_video,
        inputs=[
            image_input,
            audio_input,
            prompt_input,
            resolution_input,
            audio_cfg_input,
            cfg_input,
            seed_input,
            max_duration_input
        ],
        outputs=output
    )

# Launch locally if running directly
if __name__ == "__main__":
    demo.launch()