Spaces:

Agents-MCP-Hackathon
/

video_mcp

Running

File size: 6,389 Bytes

import gradio as gr
import os
import httpx
from typing import Dict, Any

# --- Backend Client Functions ---
# These functions call the Modal/backend endpoints.

async def call_video_analysis_backend(video_url: str) -> Dict[str, Any]:
    """Calls the backend to analyze a single video."""
    # Default to a placeholder if the env var is not set, to avoid crashing.
    backend_url = os.getenv("BACKEND_VIDEO_URL", "https://your-backend-hf-space-for-video/process_video_analysis")
    if not video_url:
        return {"status": "error", "message": "Video URL cannot be empty."}
    
    print(f"Sending request to backend for video: {video_url}")
    payload = {"video_url": video_url}
    try:
        async with httpx.AsyncClient(timeout=1800.0) as client:
            response = await client.post(backend_url, json=payload)
            response.raise_for_status()
            return response.json()
    except httpx.HTTPStatusError as e:
        return {"status": "error", "message": f"Backend Error: {e.response.status_code}", "details": e.response.text}
    except Exception as e:
        return {"status": "error", "message": "Failed to connect to backend", "details": str(e)}

async def call_topic_analysis_backend(topic: str, max_videos: int) -> Dict[str, Any]:
    """Calls the backend to analyze videos for a topic."""
    backend_url = os.getenv("BACKEND_TOPIC_URL", "https://your-backend-hf-space-for-topic/analyze_topic")
    if not topic:
        return {"status": "error", "message": "Topic cannot be empty."}
    
    print(f"Sending request to backend for topic: {topic} ({max_videos} videos)")
    payload = {"topic": topic, "max_videos": max_videos}
    try:
        async with httpx.AsyncClient(timeout=3600.0) as client:
            response = await client.post(backend_url, json=payload)
            response.raise_for_status()
            return response.json()
    except httpx.HTTPStatusError as e:
        return {"status": "error", "message": f"Backend Error: {e.response.status_code}", "details": e.response.text}
    except Exception as e:
        return {"status": "error", "message": "Failed to connect to backend", "details": str(e)}

# --- Gradio Tool Functions (Wrappers for MCP) ---

async def analyze_video(video_url: str):
    """
    Triggers a comprehensive analysis of a single video from a URL.

    This tool calls a backend service to perform multiple analyses:
    - Transcribes audio to text.
    - Generates a descriptive caption for the video content.
    - Recognizes main actions in the video.
    - Detects objects in keyframes.

    :param video_url: The public URL of the video to be processed (e.g., a YouTube link).
    :return: A JSON object containing the full analysis results from the backend.
    """
    status_update = f"Analyzing video: {video_url}..."
    results = await call_video_analysis_backend(video_url)
    if isinstance(results, dict) and results.get("analysis") is None:
        status_update = f"Error analyzing video: {results.get('error', 'Unknown error')}"
    else:
        status_update = "Video analysis complete."
    return status_update, results

async def analyze_topic(topic: str, max_videos: int):
    """
    Finds and analyzes multiple videos based on a given topic.

    This tool calls a backend service that searches for videos related to the topic,
    then runs a comprehensive analysis on each video found.

    :param topic: The topic to search for (e.g., 'latest AI advancements').
    :param max_videos: The maximum number of videos to find and analyze (1-5).
    :return: A JSON object with the aggregated analysis results for all videos.
    """
    status_update = f"Analyzing topic '{topic}' with {max_videos} videos... this can take a very long time."
    results = await call_topic_analysis_backend(topic, max_videos)
    if isinstance(results, dict) and results.get("results") is None:
        status_update = f"Error analyzing topic: {results.get('error', 'Unknown error')}"
    else:
        status_update = "Topic analysis complete."
    return status_update, results

# --- Gradio UI ---

with gr.Blocks(theme=gr.themes.Soft()) as demo:
    gr.Markdown("# LLM Video Interpretation MCP")
    gr.Markdown("This Hugging Face Space provides tools for processing video context for AI agents. Use the tools below to analyze videos by URL or by topic.")

    with gr.Tab("Single Video Analysis"):
        gr.Markdown("## Analyze a single video from a URL")
        with gr.Row():
            video_url_input = gr.Textbox(label="Video URL", placeholder="Enter a YouTube or direct video URL...", scale=4)
        submit_button = gr.Button("Analyze Video", variant="primary")
        status_text = gr.Textbox(label="Status", interactive=False)
        json_output = gr.JSON(label="Analysis Results")
        
        submit_button.click(
            analyze_video,
            inputs=[video_url_input],
            outputs=[status_text, json_output],
            api_name="analyze_video"
        )
        gr.Examples(
            examples=["https://www.youtube.com/watch?v=3wLg_t_H2Xw", "https://www.youtube.com/watch?v=h42dDpgE7g8"],
            inputs=video_url_input,
            fn=analyze_video,
            outputs=[status_text, json_output]
        )

    with gr.Tab("Topic Video Analysis"):
        gr.Markdown("## Analyze multiple videos based on a topic")
        with gr.Row():
            topic_input = gr.Textbox(label="Enter a topic", placeholder="e.g., 'Apple Vision Pro review'", scale=3)
            max_videos_slider = gr.Slider(minimum=1, maximum=5, value=2, step=1, label="Number of Videos to Analyze")
        topic_submit_button = gr.Button("Analyze Topic", variant="primary")
        topic_status_text = gr.Textbox(label="Status", interactive=False)
        topic_json_output = gr.JSON(label="Analysis Results")
        
        topic_submit_button.click(
            analyze_topic,
            inputs=[topic_input, max_videos_slider],
            outputs=[topic_status_text, topic_json_output],
            api_name="analyze_topic"
        )
        gr.Examples(
            examples=[["self-driving car technology", 2], ["open source large language models", 3]],
            inputs=[topic_input, max_videos_slider],
            fn=analyze_topic,
            outputs=[topic_status_text, topic_json_output]
        )

# Final launch of the Gradio app
demo.launch()