import streamlit as st
import pandas as pd
import numpy as np
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
import torch
import json
import os
from pathlib import Path

class VideoRetrieval:
    def __init__(self, use_dummy_data=True):
        self.text_model = SentenceTransformer('all-MiniLM-L6-v2')
        if use_dummy_data:
            self.create_dummy_data()
        else:
            self.load_data()
        
    def create_dummy_data(self):
        """Create dummy features and metadata for demonstration"""
        # Create dummy features
        n_clips = 20
        feature_dim = 384  # matching the dimension of all-MiniLM-L6-v2
        
        self.features = {
            'visual_features': np.random.randn(n_clips, feature_dim),
            'scene_features': np.random.randn(n_clips, feature_dim),
            'object_features': np.random.randn(n_clips, feature_dim)
        }
        
        # Create dummy metadata
        movie_titles = [
            "The Matrix", "Inception", "The Dark Knight", "Pulp Fiction",
            "The Shawshank Redemption", "Forrest Gump", "The Godfather",
            "Fight Club", "Interstellar", "The Silence of the Lambs"
        ]
        
        descriptions = [
            "A dramatic confrontation in a dark room where the truth is revealed",
            "A high-stakes chase through a crowded city street",
            "An emotional reunion between long-lost friends",
            "A tense negotiation that determines the fate of many",
            "A quiet moment of reflection before a life-changing decision"
        ]
        
        # Sample YouTube clips (famous movie scenes)
        youtube_clips = [
            "https://www.youtube.com/watch?v=kcsNbQRU5TI",  # Matrix - Red Pill Blue Pill
            "https://www.youtube.com/watch?v=YoHD9XEInc0",  # Inception - Hallway Fight
            "https://www.youtube.com/watch?v=ZWCAf-xLV2k",  # Dark Knight - Interrogation
            "https://www.youtube.com/watch?v=Jomr9SAjcyw",  # Pulp Fiction - Restaurant
            "https://www.youtube.com/watch?v=SQ7_5MMbPYs",  # Shawshank - Hope Speech
        ]
        
        data = []
        for i in range(n_clips):
            data.append({
                'clip_id': f'clip_{i}',
                'movie_title': movie_titles[i % len(movie_titles)],
                'description': descriptions[i % len(descriptions)],
                'timestamp': f'{(i*5):02d}:00 - {(i*5+3):02d}:00',
                'duration': '3:00',
                'youtube_url': youtube_clips[i % len(youtube_clips)]
            })
        
        self.clips_df = pd.DataFrame(data)
        
    def load_data(self):
        """Load actual pre-computed features and metadata"""
        try:
            self.features = {
                'visual_features': np.load('path_to_visual_features.npy'),
                'scene_features': np.load('path_to_scene_features.npy'),
                'object_features': np.load('path_to_object_features.npy')
            }
            self.clips_df = pd.read_csv('clips_metadata.csv')
        except FileNotFoundError as e:
            st.error(f"Error loading data: {e}. Falling back to dummy data.")
            self.create_dummy_data()
    
    def encode_query(self, query_text):
        """Encode the text query into embeddings"""
        return self.text_model.encode(query_text)
    
    def compute_similarity(self, query_embedding, feature_type='visual_features'):
        """Compute similarity between query and video features"""
        similarities = cosine_similarity(
            query_embedding.reshape(1, -1),
            self.features[feature_type]
        )
        return similarities[0]
    
    def retrieve_clips(self, query_text, top_k=3):
        """Retrieve top-k most relevant clips based on query"""
        # Encode query
        query_embedding = self.encode_query(query_text)
        
        # Compute similarities for different feature types
        similarities = {}
        weights = {
            'visual_features': 0.4,
            'scene_features': 0.3,
            'object_features': 0.3
        }
        
        for feat_type, weight in weights.items():
            similarities[feat_type] = self.compute_similarity(query_embedding, feat_type) * weight
            
        # Combine similarities
        combined_similarities = sum(similarities.values())
        
        # Get top-k indices
        top_indices = np.argsort(combined_similarities)[-top_k:][::-1]
        
        # Return clip information
        results = []
        for idx in top_indices:
            results.append({
                'clip_id': self.clips_df.iloc[idx]['clip_id'],
                'movie_title': self.clips_df.iloc[idx]['movie_title'],
                'description': self.clips_df.iloc[idx]['description'],
                'timestamp': self.clips_df.iloc[idx]['timestamp'],
                'youtube_url': self.clips_df.iloc[idx]['youtube_url'],
                'similarity_score': float(combined_similarities[idx])  # Convert to float for JSON serialization
            })
        
        return results

def main():
    st.set_page_config(
        page_title="Movie Scene Retrieval System",
        page_icon="🎬",
        layout="wide"
    )
    
    st.title("🎬 Movie Scene Retrieval System")
    st.write("""
    Search for movie scenes using natural language descriptions.
    The system will retrieve the most relevant 2-3 minute clips based on your query.
    
    *Note: This is a demo version using simulated data.*
    """)
    
    # Initialize retrieval system
    try:
        retrieval_system = st.session_state.retrieval_system
    except AttributeError:
        retrieval_system = VideoRetrieval(use_dummy_data=True)
        st.session_state.retrieval_system = retrieval_system
    
    # Search interface
    col1, col2 = st.columns([3, 1])
    
    with col1:
        query = st.text_input(
            "Enter your scene description:",
            placeholder="e.g., A dramatic confrontation between two characters in a dark room"
        )
    
    with col2:
        num_results = st.slider("Number of results:", min_value=1, max_value=5, value=3)
    
    if st.button("🔍 Search", type="primary"):
        if not query:
            st.warning("Please enter a scene description.")
        else:
            with st.spinner("Searching for relevant clips..."):
                results = retrieval_system.retrieve_clips(query, top_k=num_results)
                
                for i, result in enumerate(results, 1):
                    with st.container():
                        st.subheader(f"{result['movie_title']}")
                        cols = st.columns([2, 1])
                        
                        with cols[0]:
                            st.markdown(f"**Scene Description:**")
                            st.write(result['description'])
                            st.text(f"⏱️ Timestamp: {result['timestamp']}")
                            
                            # Add video player
                            if result['youtube_url']:
                                st.video(result['youtube_url'])
                        
                        with cols[1]:
                            st.markdown("**Relevance Score:**")
                            score = min(1.0, max(0.0, result['similarity_score']))
                            st.progress(score)
                            st.text(f"{score:.2%} match")
                            
                            # Add direct YouTube link
                            st.markdown(f"[🔗 Watch on YouTube]({result['youtube_url']})")
                            st.text("Click to open in a new tab")
                        
                        st.divider()
    
    # Sidebar with additional information
    with st.sidebar:
        st.header("ℹ️ About")
        st.write("""
        This demo system simulates a video retrieval engine that uses:
        
        - 🎥 Visual scene understanding
        - 👥 Character interaction analysis
        - 🎯 Object detection
        - 🎭 Action recognition
        
        In a production system, these features would be pre-computed
        from actual movie clips using state-of-the-art AI models.
        """)
        
        st.header("⚙️ Feature Weights")
        st.write("Current weights used for similarity computation:")
        st.write("- 🎬 Visual Features: 40%")
        st.write("- 🏞️ Scene Features: 30%")
        st.write("- 📦 Object Features: 30%")

if __name__ == "__main__":
    main()