Spaces:

Agents-MCP-Hackathon
/

video_mcp

Running

File size: 8,846 Bytes

import gradio as gr
import os
import httpx
from typing import Dict, Any

# --- Backend Client Functions ---
# These functions call the Modal/backend endpoints.

async def call_video_analysis_backend(video_url: str) -> Dict[str, Any]:
    """Calls the backend to analyze a single video."""
    # Default to a placeholder if the env var is not set, to avoid crashing.
    backend_url = os.getenv("BACKEND_VIDEO_URL", "https://your-backend-hf-space-for-video/process_video_analysis")
    if not video_url:
        return {"status": "error", "message": "Video URL cannot be empty."}
    
    print(f"Sending request to backend for video: {video_url}")
    payload = {"video_url": video_url}
    try:
        async with httpx.AsyncClient(timeout=1800.0) as client:
            response = await client.post(backend_url, json=payload)
            response.raise_for_status()
            return response.json()
    except httpx.HTTPStatusError as e:
        return {"status": "error", "message": f"Backend Error: {e.response.status_code}", "details": e.response.text}
    except Exception as e:
        return {"status": "error", "message": "Failed to connect to backend", "details": str(e)}

async def call_topic_analysis_backend(topic: str, max_videos: int) -> Dict[str, Any]:
    """Calls the backend to analyze videos for a topic."""
    backend_url = os.getenv("BACKEND_TOPIC_URL", "https://your-backend-hf-space-for-topic/analyze_topic")
    if not topic:
        return {"status": "error", "message": "Topic cannot be empty."}
    
    print(f"Sending request to backend for topic: {topic} ({max_videos} videos)")
    payload = {"topic": topic, "max_videos": max_videos}
    try:
        async with httpx.AsyncClient(timeout=3600.0) as client:
            response = await client.post(backend_url, json=payload)
            response.raise_for_status()
            return response.json()
    except httpx.HTTPStatusError as e:
        return {"status": "error", "message": f"Backend Error: {e.response.status_code}", "details": e.response.text}
    except Exception as e:
        return {"status": "error", "message": "Failed to connect to backend", "details": str(e)}

# --- Gradio Tool Functions (Wrappers for MCP) ---

async def analyze_video(video_url: str):
    """
    Triggers a comprehensive analysis of a single video from a URL.

    This tool calls a backend service to perform multiple analyses:
    - Transcribes audio to text.
    - Generates a descriptive caption for the video content.
    - Recognizes main actions in the video.
    - Detects objects in keyframes.

    :param video_url: The public URL of the video to be processed (e.g., a YouTube link).
    :return: A JSON object containing the full analysis results from the backend.
    """
    status_update = f"Analyzing video: {video_url}..."
    results = await call_video_analysis_backend(video_url)
    if isinstance(results, dict) and results.get("analysis") is None:
        status_update = f"Error analyzing video: {results.get('error', 'Unknown error')}"
    else:
        status_update = "Video analysis complete."
    return status_update, results

async def analyze_topic(topic: str, max_videos: int):
    """
    Finds and analyzes multiple videos based on a given topic.

    This tool calls a backend service that searches for videos related to the topic,
    then runs a comprehensive analysis on each video found.

    :param topic: The topic to search for (e.g., 'latest AI advancements').
    :param max_videos: The maximum number of videos to find and analyze (1-5).
    :return: A JSON object with the aggregated analysis results for all videos.
    """
    status_update = f"Analyzing topic '{topic}' with {max_videos} videos... this can take a very long time."
    results = await call_topic_analysis_backend(topic, max_videos)
    if isinstance(results, dict) and results.get("results") is None:
        status_update = f"Error analyzing topic: {results.get('error', 'Unknown error')}"
    else:
        status_update = "Topic analysis complete."
    return status_update, results

# --- Gradio UI ---

with gr.Blocks(theme=gr.themes.Soft()) as demo:
    gr.Markdown("# LLM Video Interpretation MCP")
    gr.Markdown("This Hugging Face Space provides tools for processing video context for AI agents. Use the tools below to analyze videos by URL or by topic.")

    with gr.Tab("Single Video Analysis"):
        gr.Markdown("## Analyze a single video from a URL")
        with gr.Row():
            video_url_input = gr.Textbox(label="Video URL", placeholder="Enter a YouTube or direct video URL...", scale=4)
        submit_button = gr.Button("Analyze Video", variant="primary")
        status_text = gr.Textbox(label="Status", interactive=False)
        json_output = gr.JSON(label="Analysis Results")
        
        submit_button.click(
            analyze_video,
            inputs=[video_url_input],
            outputs=[status_text, json_output],
            api_name="analyze_video"
        )
        gr.Examples(
            examples=["https://www.youtube.com/watch?v=3wLg_t_H2Xw", "https://www.youtube.com/watch?v=h42dDpgE7g8"],
            inputs=video_url_input
        )

    with gr.Tab("Topic Video Analysis"):
        gr.Markdown("## Analyze multiple videos based on a topic")
        with gr.Row():
            topic_input = gr.Textbox(label="Enter a topic", placeholder="e.g., 'Apple Vision Pro review'", scale=3)
            max_videos_slider = gr.Slider(minimum=1, maximum=5, value=2, step=1, label="Number of Videos to Analyze")
        topic_submit_button = gr.Button("Analyze Topic", variant="primary")
        topic_status_text = gr.Textbox(label="Status", interactive=False)
        topic_json_output = gr.JSON(label="Analysis Results")
        
        topic_submit_button.click(
            analyze_topic,
            inputs=[topic_input, max_videos_slider],
            outputs=[topic_status_text, topic_json_output],
            api_name="analyze_topic"
        )
        gr.Examples(
            examples=[["self-driving car technology", 2], ["open source large language models", 3]],
            inputs=[topic_input, max_videos_slider]
        )

# Set environment variables in your Hugging Face Space settings, not here.
# BACKEND_VIDEO_URL = "https://your-modal-or-backend-url/process_video_analysis"
# BACKEND_TOPIC_URL = "https://your-modal-or-backend-url/analyze_topic"

demo.launch()
        gr.Markdown("**Processing can take several minutes** depending on video length and model inference times. The cache on the Modal backend will speed up repeated requests for the same video.")

    with gr.Tab("Demo (for Manual Testing)"):
        gr.Markdown("### Manually test video URLs or paths for interpretation and observe the JSON response.")
        demo_interface.render()

    with gr.Tab("Topic Video Analysis"):
        gr.Markdown("### Analyze Multiple Videos Based on a Topic")
        gr.Markdown("Enter a topic, and the system will search for relevant videos, analyze them, and provide an aggregated JSON output.")
        
        with gr.Row():
            topic_input = gr.Textbox(label="Enter Topic", placeholder="e.g., 'best cat videos', 'Python programming tutorials'", scale=3)
            max_videos_input = gr.Number(label="Max Videos to Analyze", value=3, minimum=1, maximum=5, step=1, scale=1) # Max 5 for UI, backend might support more
        
        topic_analysis_output = gr.JSON(label="Topic Analysis Results")
        
        with gr.Row():
            topic_submit_button = gr.Button("Analyze Topic Videos", variant="primary")
            topic_clear_button = gr.Button("Clear")

        topic_submit_button.click(
            fn=call_topic_analysis_endpoint, 
            inputs=[topic_input, max_videos_input], 
            outputs=[topic_analysis_output]
        )

        def clear_topic_outputs():
            return [None, 3, None] # topic_input, max_videos_input (reset to default), topic_analysis_output
        topic_clear_button.click(fn=clear_topic_outputs, inputs=[], outputs=[topic_input, max_videos_input, topic_analysis_output])
        
        gr.Examples(
            examples=[
                ["AI in healthcare", 2],
                ["sustainable energy solutions", 3],
                ["how to make sourdough bread", 1]
            ],
            inputs=[topic_input, max_videos_input],
            outputs=topic_analysis_output,
            fn=call_topic_analysis_endpoint,
            cache_examples=False
        )
        gr.Markdown("**Note:** This process involves searching for videos and then analyzing each one. It can take a significant amount of time, especially for multiple videos. The backend has a long timeout, but please be patient.")

# Launch the Gradio application
if __name__ == "__main__":
    app.launch(debug=True, server_name="0.0.0.0")