#!/usr/bin/env python3
"""
LLM Compatibility Advisor - Streamlined Version
Author: Assistant
Description: Provides device-based LLM recommendations with popular models
Requirements: streamlit, pandas, plotly, openpyxl
"""

import streamlit as st
import pandas as pd
import re
import plotly.graph_objects as go
from typing import Optional, Tuple, Dict, List

# Must be first Streamlit command
st.set_page_config(
    page_title="LLM Compatibility Advisor", 
    layout="wide",
    page_icon="🧠"
)

@st.cache_data
def load_data():
    paths = [
        "src/BITS_INTERNS.xlsx",
        "src/Summer of AI - ICFAI  (Responses) (3).xlsx"
    ]

    combined_df = pd.DataFrame()
    for path in paths:
        try:
            df = pd.read_excel(path, sheet_name="Form Responses 1")
            df.columns = df.columns.str.strip()
            combined_df = pd.concat([combined_df, df], ignore_index=True)
        except FileNotFoundError:
            return None, f"Excel file '{path}' not found. Please upload the file."
        except Exception as e:
            return None, f"Error loading '{path}': {str(e)}"

    if combined_df.empty:
        return None, "No data found in either file."

    return combined_df, None

def extract_numeric_ram(ram) -> Optional[int]:
    if pd.isna(ram):
        return None

    ram_str = str(ram).lower().replace(" ", "")

    gb_match = re.search(r"(\d+(?:\.\d+)?)(?:gb|g)", ram_str)
    if gb_match:
        return int(float(gb_match.group(1)))

    mb_match = re.search(r"(\d+)(?:mb|m)", ram_str)
    if mb_match:
        return max(1, int(int(mb_match.group(1)) / 1024))

    plain_match = re.search(r"(\d+)", ram_str)
    if plain_match:
        return int(plain_match.group(1))

    return None

LLM_DATABASE = {
    "ultra_low": {
        "general": [
            {"name": "TinyLlama-1.1B-Chat", "size": "637MB", "description": "Compact chat model"},
            {"name": "all-MiniLM-L6-v2", "size": "91MB", "description": "Sentence embeddings"}
        ],
        "code": [
            {"name": "CodeT5-small", "size": "242MB", "description": "Code generation"}
        ]
    },
    "low": {
        "general": [
            {"name": "Phi-1.5", "size": "2.8GB", "description": "Microsoft's efficient model"},
            {"name": "Gemma-2B", "size": "1.4GB", "description": "Google's compact model"}
        ],
        "code": [
            {"name": "CodeGen-2B", "size": "1.8GB", "description": "Salesforce code model"}
        ]
    },
    "moderate": {
        "general": [
            {"name": "Llama-2-7B-Chat", "size": "3.5GB", "description": "Meta's popular chat model"},
            {"name": "Mistral-7B-Instruct-v0.2", "size": "4.1GB", "description": "Latest Mistral instruct"}
        ],
        "code": [
            {"name": "CodeLlama-7B-Instruct", "size": "3.8GB", "description": "Instruction-tuned CodeLlama"}
        ]
    },
    "good": {
        "general": [
            {"name": "Llama-2-13B-Chat", "size": "7.3GB", "description": "Larger Llama variant"},
            {"name": "OpenChat-3.5", "size": "7.1GB", "description": "High-quality chat model"}
        ],
        "code": [
            {"name": "CodeLlama-13B-Instruct", "size": "7.3GB", "description": "Larger code model"}
        ]
    },
    "high": {
        "general": [
            {"name": "Mixtral-8x7B-Instruct-v0.1", "size": "26.9GB", "description": "Mixture of experts"},
            {"name": "Yi-34B-Chat", "size": "19.5GB", "description": "01.AI's large model"}
        ],
        "code": [
            {"name": "CodeLlama-34B-Instruct", "size": "19.0GB", "description": "Large code specialist"}
        ]
    },
    "ultra_high": {
        "general": [
            {"name": "Llama-2-70B", "size": "130GB", "description": "Full precision"},
            {"name": "Mixtral-8x22B", "size": "176GB", "description": "Latest mixture model"}
        ]
    }
}

def recommend_llm(ram_str) -> Tuple[str, str, Dict[str, List[Dict]]]:
    ram = extract_numeric_ram(ram_str)
    if ram is None:
        return "⚪ Check exact specs", "Unknown", {}
    if ram <= 2:
        return "🔸 Ultra-lightweight models", "Ultra Low", LLM_DATABASE["ultra_low"]
    elif ram <= 4:
        return "🔸 Small language models", "Low", LLM_DATABASE["low"]
    elif ram <= 8:
        return "🟠 7B models - excellent capabilities", "Moderate", LLM_DATABASE["moderate"]
    elif ram <= 16:
        return "🟢 High-quality models", "Good", LLM_DATABASE["good"]
    elif ram <= 32:
        return "🔵 Premium models", "High", LLM_DATABASE["high"]
    else:
        return "🔵 Top-tier models", "Ultra High", LLM_DATABASE["ultra_high"]

def get_os_info(os_name) -> Tuple[str, str]:
    if pd.isna(os_name):
        return "🖼", "Not specified"
    os = str(os_name).lower()
    if "windows" in os:
        return "🪟", os_name
    elif "mac" in os:
        return "🍎", os_name
    elif "linux" in os or "ubuntu" in os:
        return "🐧", os_name
    elif "android" in os:
        return "🤖", os_name
    elif "ios" in os:
        return "📱", os_name
    else:
        return "🖼", os_name

def create_performance_chart(df):
    laptop_rams = df["Laptop RAM"].apply(extract_numeric_ram).dropna()
    mobile_rams = df["Mobile RAM"].apply(extract_numeric_ram).dropna()
    fig = go.Figure()
    fig.add_trace(go.Histogram(x=laptop_rams, name="Laptop RAM", opacity=0.7))
    fig.add_trace(go.Histogram(x=mobile_rams, name="Mobile RAM", opacity=0.7))
    fig.update_layout(
        title="RAM Distribution",
        xaxis_title="RAM (GB)",
        yaxis_title="Students",
        barmode='overlay',
        height=400
    )
    return fig

def display_models(models_dict: Dict[str, List[Dict]]):
    if not models_dict:
        return
    for category, model_list in models_dict.items():
        if model_list:
            st.markdown(f"**{category.title()} Models:**")
            for model in model_list[:5]:
                st.write(f"• {model['name']} ({model['size']}) - {model['description']}")

st.title("🧠 LLM Compatibility Advisor")
st.markdown("Get personalized AI model recommendations with download sizes!")

df, error = load_data()
if error:
    st.error(error)
    st.stop()
if df is None or df.empty:
    st.error("No data found.")
    st.stop()

with st.sidebar:
    st.header("📊 Quick Stats")
    st.metric("Total Students", len(df))
    avg_laptop_ram = df["Laptop RAM"].apply(extract_numeric_ram).mean()
    avg_mobile_ram = df["Mobile RAM"].apply(extract_numeric_ram).mean()
    if not pd.isna(avg_laptop_ram):
        st.metric("Avg Laptop RAM", f"{avg_laptop_ram:.1f} GB")
    if not pd.isna(avg_mobile_ram):
        st.metric("Avg Mobile RAM", f"{avg_mobile_ram:.1f} GB")

st.subheader("👤 Individual Student Analysis")
student_names = list(df["Full Name"].unique())
student_options = ["Select a student..."] + student_names

selected_name = st.selectbox(
    "Choose a student:",
    options=student_options,
)

if selected_name != "Select a student...":
    selected_user = selected_name
    user_data = df[df["Full Name"] == selected_user].iloc[0]

    col1, col2 = st.columns(2)

    with col1:
        st.markdown("### 💻 Laptop")
        laptop_os_icon, laptop_os_name = get_os_info(user_data.get('Laptop Operating System'))
        laptop_ram = user_data.get('Laptop RAM', 'Not specified')
        laptop_rec, _, laptop_models = recommend_llm(laptop_ram)
        st.markdown(f"**OS:** {laptop_os_icon} {laptop_os_name}")
        st.markdown(f"**RAM:** {laptop_ram}")
        st.success(f"**Recommendation:** {laptop_rec}")
        display_models(laptop_models)

    with col2:
        st.markdown("### 📱 Mobile")
        mobile_os_icon, mobile_os_name = get_os_info(user_data.get('Mobile Operating System'))
        mobile_ram = user_data.get('Mobile RAM', 'Not specified')
        mobile_rec, _, mobile_models = recommend_llm(mobile_ram)
        st.markdown(f"**OS:** {mobile_os_icon} {mobile_os_name}")
        st.markdown(f"**RAM:** {mobile_ram}")
        st.success(f"**Recommendation:** {mobile_rec}")
        display_models(mobile_models)

st.markdown("---")
st.header("📊 Batch Analysis")
df_display = df[["Full Name", "Laptop RAM", "Mobile RAM"]].copy()
df_display["Laptop Recommendation"] = df["Laptop RAM"].apply(lambda x: recommend_llm(x)[0])
df_display["Mobile Recommendation"] = df["Mobile RAM"].apply(lambda x: recommend_llm(x)[0])
st.dataframe(df_display, use_container_width=True)

if len(df) > 1:
    st.subheader("📈 RAM Distribution")
    fig = create_performance_chart(df)
    st.plotly_chart(fig, use_container_width=True)

st.markdown("---")
st.header("🔍 Model Explorer")
selected_ram_range = st.selectbox(
    "Select RAM range:",
    ["\u22642GB (Ultra Low)", "3-4GB (Low)", "5-8GB (Moderate)", 
     "9-16GB (Good)", "17-32GB (High)", ">32GB (Ultra High)"]
)

ram_mapping = {
    "≤2GB (Ultra Low)": "ultra_low",
    "3-4GB (Low)": "low", 
    "5-8GB (Moderate)": "moderate",
    "9-16GB (Good)": "good",
    "17-32GB (High)": "high",
    ">32GB (Ultra High)": "ultra_high"
}

selected_key = ram_mapping[selected_ram_range]
if selected_key in LLM_DATABASE:
    st.subheader(f"Models for {selected_ram_range}")
    display_models(LLM_DATABASE[selected_key])

with st.expander("📘 Quick Reference"):
    st.markdown("""
    ## Popular Models by Category

    **General Purpose:**
    - Llama-2 Series (7B, 13B, 70B)
    - Mistral Series
    - Gemma (2B, 7B)

    **Code Specialists:**
    - CodeLlama
    - CodeGen

    **Where to Download:**
    - 🤗 Hugging Face Hub
    - 🧙️ Ollama
    - 📦 LM Studio
    """)

st.markdown("---")
st.markdown("*Built for BITS Pilani Interns*")