Spaces:

BladeSzaSza
/

gradio_labanmovementanalysis

Running on Zero

File size: 25,177 Bytes

# app.py  ─────────────────────────────────────────────────────────
"""
Laban Movement Analysis – modernised Gradio Space
Author: Csaba (BladeSzaSza)
"""
import gradio as gr
import os
from pathlib import Path
# from backend.gradio_labanmovementanalysis import LabanMovementAnalysis
from backend.gradio_labanmovementanalysis import LabanMovementAnalysis
from gradio_overlay_video import OverlayVideo

# Import agent API if available
# Initialize agent API if available
agent_api = None
try:
    from gradio_labanmovementanalysis.agent_api import (
        LabanAgentAPI,
        PoseModel,
        MovementDirection,
        MovementIntensity
    )
    agent_api = LabanAgentAPI()
    HAS_AGENT_API = True
except Exception as e:
    print(f"Warning: Agent API not available: {e}")
    agent_api = None
    HAS_AGENT_API = False
# Initialize components
try:
    analyzer = LabanMovementAnalysis(
        enable_visualization=True
    )
    print("✅ Core features initialized successfully")
except Exception as e:
    print(f"Warning: Some features may not be available: {e}")
    analyzer = LabanMovementAnalysis()


def process_video_enhanced(video_input, model, enable_viz, include_keypoints):
    """Enhanced video processing with all new features."""
    if not video_input:
        return {"error": "No video provided"}, None
    
    try:
        # Handle both file upload and URL input
        video_path = video_input.name if hasattr(video_input, 'name') else video_input
        
        json_result, viz_result = analyzer.process_video(
            video_path,
            model=model,
            enable_visualization=enable_viz,
            include_keypoints=include_keypoints
        )
        return json_result, viz_result
    except Exception as e:
        error_result = {"error": str(e)}
        return error_result, None

def process_video_standard(video : str, model : str, include_keypoints : bool) -> dict:
    """
    Processes a video file using the specified pose estimation model and returns movement analysis results.

    Args:
        video (str): Path to the video file to be analyzed.
        model (str): The name of the pose estimation model to use (e.g., "mediapipe-full", "movenet-thunder", etc.).
        include_keypoints (bool): Whether to include raw keypoint data in the output.

    Returns:
        dict:
            - A dictionary containing the movement analysis results in JSON format, or an error message if processing fails.


    Notes:
        - Visualization is disabled in this standard processing function.
        - If the input video is None, both return values will be None.
        - If an error occurs during processing, the first return value will be a dictionary with an "error" key.
    """
    if video is None:
        return None
    try:
        json_output, _ = analyzer.process_video(
            video,
            model=model,
            enable_visualization=False,
            include_keypoints=include_keypoints
        )
        return json_output
    except (RuntimeError, ValueError, OSError) as e:
        return {"error": str(e)}

def process_video_for_agent(video, model, output_format="summary"):
    """Process video with agent-friendly output format."""
    if not HAS_AGENT_API or agent_api is None:
        return {"error": "Agent API not available"}
    
    if not video:
        return {"error": "No video provided"}
    
    try:
        model_enum = PoseModel(model)
        result = agent_api.analyze(video, model=model_enum, generate_visualization=False)
        
        if output_format == "summary":
            return {"summary": agent_api.get_movement_summary(result)}
        elif output_format == "structured":
            return {
                "success": result.success,
                "direction": result.dominant_direction.value,
                "intensity": result.dominant_intensity.value,
                "speed": result.dominant_speed,
                "fluidity": result.fluidity_score,
                "expansion": result.expansion_score,
                "segments": len(result.movement_segments)
            }
        else:  # json
            return result.raw_data
    except Exception as e:
        return {"error": str(e)}

# Batch processing removed due to MediaPipe compatibility issues

# process_standard_for_agent is now imported from backend

# Movement filtering removed due to MediaPipe compatibility issues

# Import agentic analysis functions from backend
try:
    from gradio_labanmovementanalysis.agentic_analysis import (
        generate_agentic_analysis,
        process_standard_for_agent
    )
except ImportError:
    # Fallback if backend module is not available
    def generate_agentic_analysis(json_data, analysis_type, filter_direction="any", filter_intensity="any", filter_min_fluidity=0.0, filter_min_expansion=0.0):
        return {"error": "Agentic analysis backend not available"}
    
    def process_standard_for_agent(json_data, output_format="summary"):
        return {"error": "Agent conversion backend not available"}

# ── 4.  Build UI ─────────────────────────────────────────────────
def create_demo() -> gr.Blocks:
    with gr.Blocks(
        title="Laban Movement Analysis",
        theme='gstaff/sketch',
        fill_width=True,
    ) as demo:
        # gr.api(process_video_standard, api_name="process_video") 
        # ── Hero banner ──
        gr.Markdown(
            """
            # 🩰 Laban Movement Analysis 
            
            Pose estimation • AI action recognition • Movement Analysis 
            """
        )
        with gr.Tabs():
            # Tab 1: Standard Analysis
            with gr.Tab("🎭 Standard Analysis"):
                gr.Markdown("""
                ### Upload a video file to analyze movement using traditional LMA metrics with pose estimation.
                """)
                # ── Workspace ──
                with gr.Row(equal_height=True):
                    # Input column
                    with gr.Column(scale=1, min_width=260):
                        
                        analyze_btn_enh = gr.Button("🚀 Analyze Movement", variant="primary", size="lg")
                        video_in = gr.Video(label="Upload Video", sources=["upload"], format="mp4")
                        # URL input option
                        url_input_enh = gr.Textbox(
                            label="Or Enter Video URL",
                            placeholder="YouTube URL, Vimeo URL, or direct video URL",
                            info="Leave file upload empty to use URL"
                        )
                       
                        gr.Markdown("**Model Selection**")
                        
                        model_sel = gr.Dropdown(
                            choices=[
                                # MediaPipe variants
                                "mediapipe-lite", "mediapipe-full", "mediapipe-heavy",
                                # MoveNet variants
                                "movenet-lightning", "movenet-thunder",
                                # YOLO v8 variants
                                "yolo-v8-n", "yolo-v8-s", "yolo-v8-m", "yolo-v8-l", "yolo-v8-x",
                                # YOLO v11 variants
                                "yolo-v11-n", "yolo-v11-s", "yolo-v11-m", "yolo-v11-l", "yolo-v11-x"
                            ],
                            value="mediapipe-full",
                            label="Advanced Pose Models",
                            info="15 model variants available"
                        )
                        
                        with gr.Accordion("Analysis Options", open=False):
                            enable_viz = gr.Radio([("Create", 1), ("Dismiss", 0)], value=1, label="Visualization")
                            include_kp = gr.Radio([("Include", 1), ("Exclude", 0)], value=1, label="Raw Keypoints")

                        gr.Examples(
                            examples=[
                                ["examples/balette.mp4"],
                                ["https://www.youtube.com/shorts/RX9kH2l3L8U"],
                                ["https://vimeo.com/815392738"],
                                ["https://vimeo.com/548964931"],
                                ["https://videos.pexels.com/video-files/5319339/5319339-uhd_1440_2560_25fps.mp4"],
                            ],
                            inputs=url_input_enh,
                            label="Examples"
                        )


                    # Output column
                    with gr.Column(scale=2, min_width=320):
                        viz_out = gr.Video(label="Annotated Video", scale=1, height=400)
                        with gr.Accordion("Raw JSON", open=True):
                            json_out = gr.JSON(label="Movement Analysis", elem_classes=["json-output"])

                # Wiring
                def process_enhanced_input(file_input, url_input, model, enable_viz, include_keypoints):
                    """Process either file upload or URL input."""
                    video_source = file_input if file_input else url_input
                    [json_out, viz_out] = process_video_enhanced(video_source, model, enable_viz, include_keypoints)
                    overlay_video.value = (None, json_out)
                    return [json_out, viz_out]
                
                analyze_btn_enh.click(
                    fn=process_enhanced_input,
                    inputs=[video_in, url_input_enh, model_sel, enable_viz, include_kp],
                    outputs=[json_out, viz_out],
                    api_name="analyze_enhanced"
                )

            with gr.Tab("🎬 Overlayed Visualisation"):
                gr.Markdown(
                    "# 🩰 Interactive Pose Visualization\n"
                    "## See the movement analysis in action with an interactive overlay. "
                    "Analyze video @ 🎬 Standard Analysis tab"
                )
                with gr.Row(equal_height=True, min_height=240):
                    with gr.Column(scale=1):
                        overlay_video = OverlayVideo(
                                value=(None, json_out),
                                autoplay=True,
                                interactive=False
                            )
                
                       
                # Update overlay when JSON changes
                def update_overlay(json_source):
                    """Update overlay video with JSON data from analysis or upload."""
                    if json_source:
                        return OverlayVideo(value=("", json_source), autoplay=True, interactive=False)
                    return OverlayVideo(value=("", None), autoplay=True, interactive=False)
                
                # Connect JSON output from analysis to overlay
                json_out.change(
                    fn=update_overlay,
                    inputs=[json_out],
                    outputs=[overlay_video]
                )

            # Tab 3: Agentic Analysis
            with gr.Tab("🤖 Agentic Analysis"):
                gr.Markdown("""
                ### Intelligent Movement Interpretation
                AI-powered analysis using the processed data from the Standard Analysis tab.
                """)
                
                with gr.Row(equal_height=True):
                    # Left column - Video display (sourced from first tab)
                    with gr.Column(scale=1, min_width=400):
                        gr.Markdown("**Source Video** *(from Standard Analysis)*")
                        agentic_video_display = gr.Video(
                            label="Analyzed Video", 
                            interactive=False,
                            height=350
                        )
                        
                        # Model info display (sourced from first tab)
                        gr.Markdown("**Model Used** *(from Standard Analysis)*")
                        agentic_model_display = gr.Textbox(
                            label="Pose Model",
                            interactive=False,
                            value="No analysis completed yet"
                        )
                    
                    # Right column - Analysis options and output
                    with gr.Column(scale=1, min_width=400):
                        gr.Markdown("**Analysis Type**")
                        agentic_analysis_type = gr.Radio(
                            choices=[
                                ("🎯 SUMMARY", "summary"),
                                ("📊 STRUCTURED", "structured"), 
                                ("🔍 MOVEMENT FILTERS", "movement_filters")
                            ],
                            value="summary",
                            label="Choose Analysis",
                            info="Select the type of intelligent analysis"
                        )
                        
                        # Movement filters options (shown when movement_filters is selected)
                        with gr.Group(visible=False) as movement_filter_options:
                            gr.Markdown("**Filter Criteria**")
                            filter_direction = gr.Dropdown(
                                choices=["any", "up", "down", "left", "right", "forward", "backward", "stationary"],
                                value="any",
                                label="Dominant Direction"
                            )
                            filter_intensity = gr.Dropdown(
                                choices=["any", "low", "medium", "high"],
                                value="any", 
                                label="Movement Intensity"
                            )
                            filter_min_fluidity = gr.Slider(0.0, 1.0, 0.0, label="Minimum Fluidity Score")
                            filter_min_expansion = gr.Slider(0.0, 1.0, 0.0, label="Minimum Expansion Score")
                        
                        analyze_agentic_btn = gr.Button("🚀 Generate Analysis", variant="primary", size="lg")
                        
                        # Output display
                        with gr.Accordion("Analysis Results", open=True):
                            agentic_output = gr.JSON(label="Intelligent Analysis Results")

                # Show/hide movement filter options based on selection
                def toggle_filter_options(analysis_type):
                    return gr.Group(visible=(analysis_type == "movement_filters"))
                
                agentic_analysis_type.change(
                    fn=toggle_filter_options,
                    inputs=[agentic_analysis_type],
                    outputs=[movement_filter_options]
                )
                
                # Update video display when standard analysis completes
                def update_agentic_video_display(video_input, url_input, model):
                    """Update agentic tab with video and model from standard analysis."""
                    video_source = video_input if video_input else url_input
                    return video_source, f"Model: {model}"
                
                # Link to standard analysis inputs
                video_in.change(
                    fn=update_agentic_video_display,
                    inputs=[video_in, url_input_enh, model_sel],
                    outputs=[agentic_video_display, agentic_model_display]
                )
                
                url_input_enh.change(
                    fn=update_agentic_video_display,
                    inputs=[video_in, url_input_enh, model_sel],
                    outputs=[agentic_video_display, agentic_model_display]
                )
                
                model_sel.change(
                    fn=update_agentic_video_display,
                    inputs=[video_in, url_input_enh, model_sel],
                    outputs=[agentic_video_display, agentic_model_display]
                )
                
                # Hook up the Generate Analysis button
                def process_agentic_analysis(json_data, analysis_type, filter_direction, filter_intensity, filter_min_fluidity, filter_min_expansion):
                    """Process agentic analysis based on user selection."""
                    return generate_agentic_analysis(
                        json_data, 
                        analysis_type, 
                        filter_direction, 
                        filter_intensity, 
                        filter_min_fluidity, 
                        filter_min_expansion
                    )
                
                analyze_agentic_btn.click(
                    fn=process_agentic_analysis,
                    inputs=[
                        json_out,  # JSON data from standard analysis
                        agentic_analysis_type,
                        filter_direction,
                        filter_intensity, 
                        filter_min_fluidity,
                        filter_min_expansion
                    ],
                    outputs=[agentic_output],
                    api_name="analyze_agentic"
                )
                
                # Auto-update agentic analysis when JSON changes and analysis type is summary
                def auto_update_summary(json_data, analysis_type):
                    """Auto-update with summary when new analysis is available."""
                    if json_data and analysis_type == "summary":
                        return generate_agentic_analysis(json_data, "summary")
                    return None
                
                json_out.change(
                    fn=auto_update_summary,
                    inputs=[json_out, agentic_analysis_type],
                    outputs=[agentic_output]
                )

            # Tab 4: About
            with gr.Tab("ℹ️ About"):
                gr.Markdown("""
                # 🩰 Developer Journey: Laban Movement Analysis
                
                ## 🎯 Project Vision
                
                Created to bridge the gap between traditional **Laban Movement Analysis (LMA)** principles and modern **AI-powered pose estimation**, this platform represents a comprehensive approach to understanding human movement through technology.
                
                ## 🛠️ Technical Architecture
                
                ### **Core Foundation**
                - **15 Pose Estimation Models** from diverse sources and frameworks
                - **Multi-format Video Processing** with URL support (YouTube, Vimeo, direct links)
                - **Real-time Analysis Pipeline** with configurable model selection
                - **MCP-Compatible API** for AI agent integration
                
                ### **Pose Model Ecosystem**
                ```
                📊 MediaPipe Family (Google)     → 3 variants (lite/full/heavy)
                ⚡ MoveNet Family (TensorFlow)   → 2 variants (lightning/thunder)  
                🎯 YOLO v8 Family (Ultralytics) → 5 variants (n/s/m/l/x)
                🔥 YOLO v11 Family (Ultralytics)→ 5 variants (n/s/m/l/x)
                ```
                
                ## 🎨 Innovation Highlights
                
                ### **1. Custom Gradio Component: `gradio_overlay_video`**
                - **Layered Visualization**: Controlled overlay of pose data on original video
                - **Interactive Controls**: Frame-by-frame analysis with movement metrics
                - **Synchronized Playback**: Real-time correlation between video and data
                
                ### **2. Agentic Analysis Engine**
                Beyond raw pose detection, we've developed intelligent interpretation layers:
                
                - **🎯 SUMMARY**: Narrative movement interpretation with temporal pattern analysis
                - **📊 STRUCTURED**: Comprehensive quantitative breakdowns with statistical insights
                - **🔍 MOVEMENT FILTERS**: Advanced pattern detection with customizable criteria
                
                ### **3. Temporal Pattern Recognition**
                - **Movement Consistency Tracking**: Direction and intensity variation analysis
                - **Complexity Scoring**: Multi-dimensional movement sophistication metrics
                - **Sequence Detection**: Continuous movement pattern identification
                - **Laban Integration**: Professional movement quality assessment using LMA principles
                
                ## 📈 Processing Pipeline
                
                ```mermaid
                Video Input → Pose Detection → LMA Analysis → JSON Output
                     ↓              ↓              ↓           ↓
                URL/Upload → 15 Models → Temporal → Visualization
                     ↓              ↓        Patterns    ↓
                Preprocessing → Keypoints → Metrics → Agentic Analysis
                ```
                
                ## 🎭 Laban Movement Analysis Integration
                
                Our implementation translates raw pose coordinates into meaningful movement qualities:
                
                - **Effort Qualities**: Intensity, speed, and flow characteristics
                - **Space Usage**: Expansion patterns and directional preferences  
                - **Temporal Dynamics**: Rhythm, acceleration, and movement consistency
                - **Quality Assessment**: Fluidity scores and movement sophistication
                
                ## 🔬 Technical Achievements
                
                ### **Multi-Source Model Integration**
                Successfully unified models from different frameworks:
                - Google's MediaPipe (BlazePose architecture)
                - TensorFlow's MoveNet (lightweight and accurate variants)
                - Ultralytics' YOLO ecosystem (object detection adapted for pose)
                
                ### **Real-Time Processing Capabilities**
                - **Streaming Support**: Frame-by-frame processing with temporal continuity
                - **Memory Optimization**: Efficient handling of large video files
                - **Error Recovery**: Graceful handling of pose detection failures
                
                ### **Agent-Ready Architecture**
                - **MCP Server Integration**: Compatible with AI agent workflows
                - **Structured API**: RESTful endpoints for programmatic access
                - **Flexible Output Formats**: JSON, visualization videos, and metadata
                
                ## 🌟 Future Roadmap
                
                - **3D Pose Integration**: Depth-aware movement analysis
                - **Multi-Person Tracking**: Ensemble and group movement dynamics
                - **Real-Time Streaming**: Live movement analysis capabilities
                - **Machine Learning Enhancement**: Custom models trained on movement data
                
                ## 🔧 Built With
                
                - **Frontend**: Gradio 5.33+ with custom Svelte components
                - **Backend**: Python with FastAPI and async processing
                - **Computer Vision**: MediaPipe, TensorFlow, PyTorch, Ultralytics
                - **Analysis**: NumPy, OpenCV, custom Laban algorithms
                - **Deployment**: Hugging Face Spaces with Docker support
                
                ---
                
                ### 👨‍💻 Created by **Csaba Bolyós**
                
                *Combining classical movement analysis with cutting-edge AI to unlock new possibilities in human movement understanding.*
                
                **Connect:**  
                [GitHub](https://github.com/bladeszasza) • [Hugging Face](https://huggingface.co/BladeSzaSza) • [LinkedIn](https://www.linkedin.com/in/csaba-bolyós-00a11767/)
                
                ---
                
                > *"Movement is a language. Technology helps us understand what the body is saying."*
                """)
         
        # Footer
        with gr.Row():
            gr.Markdown(
                """
                **Built by Csaba Bolyós**  
                [GitHub](https://github.com/bladeszasza) • [HF](https://huggingface.co/BladeSzaSza) • [LinkedIn](https://www.linkedin.com/in/csaba-bolyós-00a11767/)
                """
            )
    return demo


if __name__ == "__main__":
    demo = create_demo()
    demo.launch(server_name="0.0.0.0",
                share=True,
                server_port=int(os.getenv("PORT", 7860)),
                mcp_server=True)