phunter_space / app.py
rocketmandrey's picture
Initial Space setup with MeiGen MultiTalk demo
132fb5e
raw
history blame
4.25 kB
import os
import json
import gradio as gr
from PIL import Image
import torch
from huggingface_hub import hf_hub_download
import tempfile
# Constants
MODEL_ID = "MeiGen-AI/MeiGen-MultiTalk"
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
def load_models():
"""Load required models"""
# Here we'll add model loading logic
pass
def process_video(
image,
audio_files,
prompt,
resolution="480p",
audio_cfg=4.0,
cfg=7.5,
seed=42,
max_duration=15
):
"""Process video generation"""
try:
# Create temporary directory for processing
with tempfile.TemporaryDirectory() as temp_dir:
# Save uploaded image
image_path = os.path.join(temp_dir, "reference.jpg")
image.save(image_path)
# Save uploaded audio files
audio_paths = []
for audio in audio_files:
audio_path = os.path.join(temp_dir, f"audio_{len(audio_paths)}.wav")
audio_paths.append(audio_path)
# Save audio file
with open(audio_path, "wb") as f:
f.write(audio)
# Create configuration
config = {
"image": image_path,
"audio": audio_paths[0] if len(audio_paths) == 1 else audio_paths,
"prompt": prompt,
"resolution": resolution,
"audio_cfg": float(audio_cfg),
"cfg": float(cfg),
"seed": int(seed),
"max_duration": int(max_duration)
}
# Save configuration
config_path = os.path.join(temp_dir, "config.json")
with open(config_path, "w") as f:
json.dump(config, f, indent=2)
# Here we'll add video generation logic
# For now, return a message
return "Video generation will be implemented here"
except Exception as e:
return f"Error: {str(e)}"
# Create Gradio interface
with gr.Blocks(title="MeiGen-MultiTalk Demo") as demo:
gr.Markdown("""
# MeiGen-MultiTalk Demo
Generate talking head videos from images and audio files.
""")
with gr.Row():
with gr.Column():
image_input = gr.Image(label="Reference Image", type="pil")
audio_input = gr.Audio(label="Audio File(s)", type="binary", multiple=True)
prompt_input = gr.Textbox(label="Prompt", placeholder="Describe the desired video...")
with gr.Row():
resolution_input = gr.Dropdown(
choices=["480p", "720p"],
value="480p",
label="Resolution"
)
audio_cfg_input = gr.Slider(
minimum=1.0,
maximum=10.0,
value=4.0,
step=0.1,
label="Audio CFG"
)
with gr.Row():
cfg_input = gr.Slider(
minimum=1.0,
maximum=15.0,
value=7.5,
step=0.1,
label="Guidance Scale"
)
seed_input = gr.Number(
value=42,
label="Random Seed",
precision=0
)
max_duration_input = gr.Slider(
minimum=1,
maximum=15,
value=10,
step=1,
label="Max Duration (seconds)"
)
generate_btn = gr.Button("Generate Video")
with gr.Column():
output = gr.Video(label="Generated Video")
generate_btn.click(
fn=process_video,
inputs=[
image_input,
audio_input,
prompt_input,
resolution_input,
audio_cfg_input,
cfg_input,
seed_input,
max_duration_input
],
outputs=output
)
# Launch locally if running directly
if __name__ == "__main__":
demo.launch()