#!/usr/bin/env python3
"""
LLM Compatibility Advisor - Streamlined with Download Sizes
Author: Assistant
Description: Provides device-based LLM recommendations with popular models and download sizes
Requirements: streamlit, pandas, plotly, openpyxl
"""

import streamlit as st
import pandas as pd
import numpy as np
import re
import plotly.express as px
import plotly.graph_objects as go
from typing import Optional, Tuple, List, Dict

# ✅ MUST be the first Streamlit command
st.set_page_config(
    page_title="LLM Compatibility Advisor", 
    layout="wide",
    page_icon="🧠",
    initial_sidebar_state="expanded"
)

# Enhanced data loading with error handling
@st.cache_data
def load_data():
    paths = [
        "src/BITS_INTERNS.xlsx",
        "src/Summer of AI - ICFAI  (Responses) (3).xlsx"
    ]

    combined_df = pd.DataFrame()
    for path in paths:
        try:
            df = pd.read_excel(path, sheet_name="Form Responses 1")
            df.columns = df.columns.str.strip()
            combined_df = pd.concat([combined_df, df], ignore_index=True)
        except FileNotFoundError:
            return None, f"Excel file '{path}' not found. Please upload the file."
        except Exception as e:
            return None, f"Error loading '{path}': {str(e)}"
    
    # Return success case - this was missing!
    if combined_df.empty:
        return None, "No data found in Excel files."
    else:
        return combined_df, None

# Enhanced RAM extraction with better parsing
def extract_numeric_ram(ram) -> Optional[int]:
    if pd.isna(ram):
        return None
    
    ram_str = str(ram).lower().replace(" ", "")
    
    # Handle various formats: "8GB", "8 GB", "8gb", "8192MB", etc.
    gb_match = re.search(r"(\d+(?:\.\d+)?)(?:gb|g)", ram_str)
    if gb_match:
        return int(float(gb_match.group(1)))
    
    # Handle MB format
    mb_match = re.search(r"(\d+)(?:mb|m)", ram_str)
    if mb_match:
        return max(1, int(int(mb_match.group(1)) / 1024))  # Convert MB to GB
    
    # Handle plain numbers (assume GB)
    plain_match = re.search(r"(\d+)", ram_str)
    if plain_match:
        return int(plain_match.group(1))
    
    return None

# Streamlined LLM database with popular models and download sizes
LLM_DATABASE = {
    "ultra_low": {  # ≤2GB
        "general": [
            {"name": "TinyLlama-1.1B-Chat", "size": "637MB", "description": "Compact chat model"},
            {"name": "DistilBERT-base", "size": "268MB", "description": "Efficient BERT variant"},
            {"name": "all-MiniLM-L6-v2", "size": "91MB", "description": "Sentence embeddings"}
        ],
        "code": [
            {"name": "CodeT5-small", "size": "242MB", "description": "Code generation"},
            {"name": "Replit-code-v1-3B", "size": "1.2GB", "description": "Code completion"}
        ]
    },
    "low": {  # 3-4GB
        "general": [
            {"name": "Phi-1.5", "size": "2.8GB", "description": "Microsoft's efficient model"},
            {"name": "Gemma-2B", "size": "1.4GB", "description": "Google's compact model"},
            {"name": "OpenLLaMA-3B", "size": "2.1GB", "description": "Open source LLaMA"}
        ],
        "code": [
            {"name": "CodeGen-2B", "size": "1.8GB", "description": "Salesforce code model"},
            {"name": "StarCoder-1B", "size": "1.1GB", "description": "BigCode project"}
        ],
        "chat": [
            {"name": "Alpaca-3B", "size": "2.0GB", "description": "Stanford's instruction model"},
            {"name": "Vicuna-3B", "size": "2.1GB", "description": "ChatGPT-style training"}
        ]
    },
    "moderate_low": {  # 5-6GB
        "general": [
            {"name": "Phi-2", "size": "5.2GB", "description": "Microsoft's 2.7B model"},
            {"name": "Gemma-7B-it", "size": "4.2GB", "description": "Google instruction tuned"},
            {"name": "Mistral-7B-v0.1", "size": "4.1GB", "description": "Mistral AI base model"}
        ],
        "code": [
            {"name": "CodeLlama-7B", "size": "3.8GB", "description": "Meta's code specialist"},
            {"name": "StarCoder-7B", "size": "4.0GB", "description": "Code generation expert"}
        ],
        "chat": [
            {"name": "Zephyr-7B-beta", "size": "4.2GB", "description": "HuggingFace chat model"},
            {"name": "Neural-Chat-7B", "size": "4.1GB", "description": "Intel optimized"}
        ]
    },
    "moderate": {  # 7-8GB
        "general": [
            {"name": "Llama-2-7B-Chat", "size": "3.5GB", "description": "Meta's popular chat model"},
            {"name": "Mistral-7B-Instruct-v0.2", "size": "4.1GB", "description": "Latest Mistral instruct"},
            {"name": "Qwen-7B-Chat", "size": "4.0GB", "description": "Alibaba's multilingual"}
        ],
        "code": [
            {"name": "CodeLlama-7B-Instruct", "size": "3.8GB", "description": "Instruction-tuned CodeLlama"},
            {"name": "WizardCoder-7B", "size": "4.0GB", "description": "Enhanced coding abilities"},
            {"name": "Phind-CodeLlama-34B-v2", "size": "4.2GB", "description": "4-bit quantized version"}
        ],
        "reasoning": [
            {"name": "WizardMath-7B", "size": "4.0GB", "description": "Mathematical reasoning"},
            {"name": "MetaMath-7B", "size": "3.9GB", "description": "Math problem solving"}
        ]
    },
    "good": {  # 9-16GB
        "general": [
            {"name": "Llama-2-13B-Chat", "size": "7.3GB", "description": "Larger Llama variant"},
            {"name": "Vicuna-13B-v1.5", "size": "7.2GB", "description": "Enhanced Vicuna"},
            {"name": "OpenChat-3.5", "size": "7.1GB", "description": "High-quality chat model"}
        ],
        "code": [
            {"name": "CodeLlama-13B-Instruct", "size": "7.3GB", "description": "Larger code model"},
            {"name": "WizardCoder-15B", "size": "8.2GB", "description": "Advanced coding"},
            {"name": "StarCoder-15B", "size": "8.5GB", "description": "Large code model"}
        ],
        "multimodal": [
            {"name": "LLaVA-7B", "size": "7.0GB", "description": "Vision + language"},
            {"name": "MiniGPT-4-7B", "size": "6.8GB", "description": "Multimodal chat"}
        ],
        "reasoning": [
            {"name": "WizardMath-13B", "size": "7.3GB", "description": "Advanced math"},
            {"name": "Orca-2-13B", "size": "7.4GB", "description": "Microsoft reasoning"}
        ]
    },
    "high": {  # 17-32GB
        "general": [
            {"name": "Mixtral-8x7B-Instruct-v0.1", "size": "26.9GB", "description": "Mixture of experts"},
            {"name": "Llama-2-70B-Chat", "size": "38.0GB", "description": "8-bit quantized"},
            {"name": "Yi-34B-Chat", "size": "19.5GB", "description": "01.AI's large model"}
        ],
        "code": [
            {"name": "CodeLlama-34B-Instruct", "size": "19.0GB", "description": "Large code specialist"},
            {"name": "DeepSeek-Coder-33B", "size": "18.5GB", "description": "DeepSeek's coder"},
            {"name": "WizardCoder-34B", "size": "19.2GB", "description": "Enterprise coding"}
        ],
        "reasoning": [
            {"name": "WizardMath-70B", "size": "38.5GB", "description": "8-bit quantized math"},
            {"name": "MetaMath-70B", "size": "38.0GB", "description": "8-bit math reasoning"}
        ]
    },
    "ultra_high": {  # >32GB
        "general": [
            {"name": "Llama-2-70B", "size": "130GB", "description": "Full precision"},
            {"name": "Mixtral-8x22B", "size": "176GB", "description": "Latest mixture model"},
            {"name": "Qwen-72B", "size": "145GB", "description": "Alibaba's flagship"}
        ],
        "code": [
            {"name": "CodeLlama-34B", "size": "68GB", "description": "Full precision code"},
            {"name": "DeepSeek-Coder-33B", "size": "66GB", "description": "Full precision coding"}
        ],
        "reasoning": [
            {"name": "WizardMath-70B", "size": "130GB", "description": "Full precision math"},
            {"name": "Goat-70B", "size": "132GB", "description": "Arithmetic reasoning"}
        ]
    }
}

# Enhanced LLM recommendation with performance tiers
def recommend_llm(ram_str) -> Tuple[str, str, str, Dict[str, List[Dict]]]:
    """Returns (recommendation, performance_tier, additional_info, detailed_models)"""
    ram = extract_numeric_ram(ram_str)
    
    if ram is None:
        return ("⚪ Check exact specs or test with quantized models.", 
                "Unknown", 
                "Verify RAM specifications",
                {})
    
    if ram <= 2:
        models = LLM_DATABASE["ultra_low"]
        return ("🔸 Ultra-lightweight models - basic NLP tasks", 
                "Ultra Low", 
                "Mobile-optimized, simple tasks, limited context",
                models)
    elif ram <= 4:
        models = LLM_DATABASE["low"]
        return ("🔸 Small language models - decent capabilities", 
                "Low", 
                "Basic chat, simple reasoning, text classification",
                models)
    elif ram <= 6:
        models = LLM_DATABASE["moderate_low"]
        return ("🟠 Mid-range models - good general performance", 
                "Moderate-Low", 
                "Solid reasoning, coding help, longer conversations",
                models)
    elif ram <= 8:
        models = LLM_DATABASE["moderate"]
        return ("🟠 Strong 7B models - excellent capabilities", 
                "Moderate", 
                "Professional use, coding assistance, complex reasoning",
                models)
    elif ram <= 16:
        models = LLM_DATABASE["good"]
        return ("🟢 High-quality models - premium performance", 
                "Good", 
                "Advanced tasks, multimodal support, research use",
                models)
    elif ram <= 32:
        models = LLM_DATABASE["high"]
        return ("🔵 Premium models - professional grade", 
                "High", 
                "Enterprise ready, complex reasoning, specialized tasks",
                models)
    else:
        models = LLM_DATABASE["ultra_high"]
        return ("🔵 Top-tier models - enterprise capabilities", 
                "Ultra High", 
                "Research grade, maximum performance, domain expertise",
                models)

# Enhanced OS detection with better icons
def get_os_info(os_name) -> Tuple[str, str]:
    """Returns (icon, clean_name)"""
    if pd.isna(os_name):
        return "💻", "Not specified"
    
    os = str(os_name).lower()
    if "windows" in os:
        return "🪟", os_name
    elif "mac" in os or "darwin" in os:
        return "🍎", os_name
    elif "linux" in os or "ubuntu" in os:
        return "🐧", os_name
    elif "android" in os:
        return "🤖", os_name
    elif "ios" in os:
        return "📱", os_name
    else:
        return "💻", os_name

# Performance visualization
def create_performance_chart(df):
    """Create a performance distribution chart"""
    laptop_rams = df["Laptop RAM"].apply(extract_numeric_ram).dropna()
    mobile_rams = df["Mobile RAM"].apply(extract_numeric_ram).dropna()
    
    fig = go.Figure()
    
    fig.add_trace(go.Histogram(
        x=laptop_rams,
        name="Laptop RAM",
        opacity=0.7,
        nbinsx=10
    ))
    
    fig.add_trace(go.Histogram(
        x=mobile_rams,
        name="Mobile RAM",
        opacity=0.7,
        nbinsx=10
    ))
    
    fig.update_layout(
        title="RAM Distribution Across Devices",
        xaxis_title="RAM (GB)",
        yaxis_title="Number of Students",
        barmode='overlay',
        height=400
    )
    
    return fig

# Enhanced model details display function
def display_model_categories(models_dict: Dict[str, List[Dict]], ram_gb: int):
    """Display models organized by category with download sizes"""
    if not models_dict:
        return
    
    st.markdown(f"### 🎯 Recommended Models for {ram_gb}GB RAM:")
    
    for category, model_list in models_dict.items():
        if model_list:
            with st.expander(f"📂 {category.replace('_', ' ').title()} Models"):
                for model in model_list[:8]:  # Limit to top 8 per category
                    col1, col2, col3 = st.columns([3, 1, 2])
                    with col1:
                        st.markdown(f"**{model['name']}**")
                    with col2:
                        st.markdown(f"`{model['size']}`")
                    with col3:
                        st.markdown(f"*{model['description']}*")

# Demo data generator for when Excel files are not available
def generate_demo_data():
    """Generate demo data for testing when Excel files are missing"""
    demo_data = {
        "Full Name": [
            "Demo Student 1", "Demo Student 2", "Demo Student 3", "Demo Student 4",
            "Demo Student 5", "Demo Student 6", "Demo Student 7", "Demo Student 8"
        ],
        "Laptop RAM": ["8GB", "16GB", "4GB", "32GB", "6GB", "12GB", "2GB", "24GB"],
        "Mobile RAM": ["4GB", "8GB", "3GB", "12GB", "6GB", "4GB", "2GB", "8GB"],
        "Laptop Operating System": [
            "Windows 11", "macOS Monterey", "Ubuntu 22.04", "Windows 10",
            "macOS Big Sur", "Fedora 36", "Windows 11", "macOS Ventura"
        ],
        "Mobile Operating System": [
            "Android 13", "iOS 16", "Android 12", "iOS 15",
            "Android 14", "iOS 17", "Android 11", "iOS 16"
        ]
    }
    return pd.DataFrame(demo_data)

# Function to safely prepare user options
def prepare_user_options(df):
    """Safely prepare user options for selectbox, handling NaN values and mixed types"""
    try:
        # Get unique names and filter out NaN values
        unique_names = df["Full Name"].dropna().unique()
        
        # Convert to strings and filter out any remaining non-string values
        valid_names = []
        for name in unique_names:
            try:
                str_name = str(name).strip()
                if str_name and str_name.lower() != 'nan':
                    valid_names.append(str_name)
            except:
                continue
        
        # Create options list with proper string concatenation
        options = ["Select a student..."] + sorted(valid_names)
        return options
    except Exception as e:
        st.error(f"Error preparing user options: {e}")
        return ["Select a student..."]

# Main App
st.title("🧠 LLM Compatibility Advisor")
st.markdown("Get personalized recommendations from **150+ popular open source AI models** with download sizes!")

# Load data with better error handling
df, error = load_data()

if error or df is None or df.empty:
    st.warning("⚠️ Excel files not found. Running with demo data for testing.")
    st.info("📁 To use real data, place 'BITS_INTERNS.xlsx' and 'Summer of AI - ICFAI  (Responses) (3).xlsx' in the 'src/' directory.")
    df = generate_demo_data()
    
    with st.expander("📋 Expected Data Format"):
        st.markdown("""
        The app expects Excel files with the following columns:
        - **Full Name**: Student name
        - **Laptop RAM**: RAM specification (e.g., "8GB", "16 GB", "8192MB")
        - **Mobile RAM**: Mobile device RAM
        - **Laptop Operating System**: OS name
        - **Mobile Operating System**: Mobile OS name
        """)

# Verify required columns exist
required_columns = ["Full Name", "Laptop RAM", "Mobile RAM"]
missing_columns = [col for col in required_columns if col not in df.columns]

if missing_columns:
    st.error(f"Missing required columns: {missing_columns}")
    st.info("Please ensure your Excel file contains the required columns.")
    st.stop()

# Clean the dataframe
df = df.copy()
df["Full Name"] = df["Full Name"].astype(str).str.strip()

# Sidebar filters and info
with st.sidebar:
    st.header("🔍 Filters & Info")
    
    # Performance tier filter
    performance_filter = st.multiselect(
        "Filter by Performance Tier:",
        ["Ultra Low", "Low", "Moderate-Low", "Moderate", "Good", "High", "Ultra High", "Unknown"],
        default=["Ultra Low", "Low", "Moderate-Low", "Moderate", "Good", "High", "Ultra High", "Unknown"]
    )
    
    # Model category filter
    st.subheader("Model Categories")
    show_categories = st.multiselect(
        "Show specific categories:",
        ["general", "code", "chat", "reasoning", "multimodal"],
        default=["general", "code", "chat"]
    )
    
    st.markdown("---")
    st.markdown("### 📊 Quick Stats")
    st.metric("Total Students", len(df))
    st.metric("Popular Models", "150+")
    
    # Calculate average RAM
    avg_laptop_ram = df["Laptop RAM"].apply(extract_numeric_ram).mean()
    avg_mobile_ram = df["Mobile RAM"].apply(extract_numeric_ram).mean()
    
    if not pd.isna(avg_laptop_ram):
        st.metric("Avg Laptop RAM", f"{avg_laptop_ram:.1f} GB")
    if not pd.isna(avg_mobile_ram):
        st.metric("Avg Mobile RAM", f"{avg_mobile_ram:.1f} GB")

# User selection with search - FIXED VERSION
st.subheader("👤 Individual Student Analysis")

# Prepare options safely
user_options = prepare_user_options(df)

selected_user = st.selectbox(
    "Choose a student:",
    options=user_options,
    index=0  # Default to first option ("Select a student...")
)

if selected_user and selected_user != "Select a student...":
    # Find user data with safe lookup
    user_data_mask = df["Full Name"].astype(str).str.strip() == selected_user
    if user_data_mask.any():
        user_data = df[user_data_mask].iloc[0]
        
        # Enhanced user display
        col1, col2 = st.columns(2)
        
        with col1:
            st.markdown("### 💻 Laptop Configuration")
            laptop_os_icon, laptop_os_name = get_os_info(user_data.get('Laptop Operating System'))
            laptop_ram = user_data.get('Laptop RAM', 'Not specified')
            laptop_rec, laptop_tier, laptop_info, laptop_models = recommend_llm(laptop_ram)
            laptop_ram_gb = extract_numeric_ram(laptop_ram) or 0
            
            st.markdown(f"**OS:** {laptop_os_icon} {laptop_os_name}")
            st.markdown(f"**RAM:** {laptop_ram}")
            st.markdown(f"**Performance Tier:** {laptop_tier}")
            
            st.success(f"**💡 Recommendation:** {laptop_rec}")
            st.info(f"**ℹ️ Notes:** {laptop_info}")
            
            # Display detailed models for laptop
            if laptop_models:
                filtered_models = {k: v for k, v in laptop_models.items() if k in show_categories}
                display_model_categories(filtered_models, laptop_ram_gb)
        
        with col2:
            st.markdown("### 📱 Mobile Configuration")
            mobile_os_icon, mobile_os_name = get_os_info(user_data.get('Mobile Operating System'))
            mobile_ram = user_data.get('Mobile RAM', 'Not specified')
            mobile_rec, mobile_tier, mobile_info, mobile_models = recommend_llm(mobile_ram)
            mobile_ram_gb = extract_numeric_ram(mobile_ram) or 0
            
            st.markdown(f"**OS:** {mobile_os_icon} {mobile_os_name}")
            st.markdown(f"**RAM:** {mobile_ram}")
            st.markdown(f"**Performance Tier:** {mobile_tier}")
            
            st.success(f"**💡 Recommendation:** {mobile_rec}")
            st.info(f"**ℹ️ Notes:** {mobile_info}")
            
            # Display detailed models for mobile
            if mobile_models:
                filtered_models = {k: v for k, v in mobile_models.items() if k in show_categories}
                display_model_categories(filtered_models, mobile_ram_gb)

# Batch Analysis Section
st.markdown("---")
st.header("📊 Batch Analysis & Insights")

# Create enhanced batch table
df_display = df[["Full Name", "Laptop RAM", "Mobile RAM"]].copy()

# Add recommendations and performance tiers
laptop_recommendations = df["Laptop RAM"].apply(lambda x: recommend_llm(x)[0])
mobile_recommendations = df["Mobile RAM"].apply(lambda x: recommend_llm(x)[0])
laptop_tiers = df["Laptop RAM"].apply(lambda x: recommend_llm(x)[1])
mobile_tiers = df["Mobile RAM"].apply(lambda x: recommend_llm(x)[1])

df_display["Laptop LLM"] = laptop_recommendations
df_display["Mobile LLM"] = mobile_recommendations
df_display["Laptop Tier"] = laptop_tiers
df_display["Mobile Tier"] = mobile_tiers

# Filter based on sidebar selections
mask = (laptop_tiers.isin(performance_filter) | mobile_tiers.isin(performance_filter))
df_filtered = df_display[mask]

# Display filtered table
st.subheader(f"📋 Student Recommendations ({len(df_filtered)} students)")
st.dataframe(
    df_filtered, 
    use_container_width=True,
    column_config={
        "Full Name": st.column_config.TextColumn("Student Name", width="medium"),
        "Laptop RAM": st.column_config.TextColumn("Laptop RAM", width="small"),
        "Mobile RAM": st.column_config.TextColumn("Mobile RAM", width="small"),
        "Laptop LLM": st.column_config.TextColumn("Laptop Recommendation", width="large"),
        "Mobile LLM": st.column_config.TextColumn("Mobile Recommendation", width="large"),
        "Laptop Tier": st.column_config.TextColumn("L-Tier", width="small"),
        "Mobile Tier": st.column_config.TextColumn("M-Tier", width="small"),
    }
)

# Performance distribution chart
if len(df) > 1:
    st.subheader("📈 RAM Distribution Analysis")
    fig = create_performance_chart(df)
    st.plotly_chart(fig, use_container_width=True)

# Performance tier summary
st.subheader("🎯 Performance Tier Summary")
tier_col1, tier_col2 = st.columns(2)

with tier_col1:
    st.markdown("**Laptop Performance Tiers:**")
    laptop_tier_counts = laptop_tiers.value_counts()
    for tier, count in laptop_tier_counts.items():
        percentage = (count / len(laptop_tiers)) * 100
        st.write(f"• {tier}: {count} students ({percentage:.1f}%)")

with tier_col2:
    st.markdown("**Mobile Performance Tiers:**")
    mobile_tier_counts = mobile_tiers.value_counts()
    for tier, count in mobile_tier_counts.items():
        percentage = (count / len(mobile_tier_counts)) * 100
        st.write(f"• {tier}: {count} students ({percentage:.1f}%)")

# Model Explorer Section
st.markdown("---")
st.header("🔍 Popular Model Explorer")

explorer_col1, explorer_col2 = st.columns(2)

with explorer_col1:
    selected_ram_range = st.selectbox(
        "Select RAM range to explore models:",
        ["≤2GB (Ultra Low)", "3-4GB (Low)", "5-6GB (Moderate-Low)", 
         "7-8GB (Moderate)", "9-16GB (Good)", "17-32GB (High)", ">32GB (Ultra High)"]
    )

with explorer_col2:
    selected_category = st.selectbox(
        "Select model category:",
        ["general", "code", "chat", "reasoning", "multimodal"]
    )

# Map selection to database key
ram_mapping = {
    "≤2GB (Ultra Low)": "ultra_low",
    "3-4GB (Low)": "low", 
    "5-6GB (Moderate-Low)": "moderate_low",
    "7-8GB (Moderate)": "moderate",
    "9-16GB (Good)": "good",
    "17-32GB (High)": "high",
    ">32GB (Ultra High)": "ultra_high"
}

selected_ram_key = ram_mapping[selected_ram_range]
if selected_ram_key in LLM_DATABASE and selected_category in LLM_DATABASE[selected_ram_key]:
    models = LLM_DATABASE[selected_ram_key][selected_category]
    
    st.subheader(f"🎯 {selected_category.title()} Models for {selected_ram_range}")
    
    # Display models in a detailed table
    for model in models:
        with st.container():
            col1, col2, col3 = st.columns([3, 1, 3])
            with col1:
                st.markdown(f"### {model['name']}")
            with col2:
                st.markdown(f"**{model['size']}**")
                st.caption("Download Size")
            with col3:
                st.markdown(f"*{model['description']}*")
                # Add download suggestion
                if "Llama" in model['name']:
                    st.caption("🔗 Available on Hugging Face & Ollama")
                elif "Mistral" in model['name']:
                    st.caption("🔗 Available on Hugging Face & Mistral AI")
                elif "Gemma" in model['name']:
                    st.caption("🔗 Available on Hugging Face & Google")
                else:
                    st.caption("🔗 Available on Hugging Face")
            st.markdown("---")
else:
    st.info(f"No {selected_category} models available for {selected_ram_range}")

# Enhanced reference guide
with st.expander("📘 Model Guide & Download Information"):
    st.markdown("""
    ## 🚀 Popular Models by Category
    
    ### 🎯 **General Purpose Champions**
    - **Llama-2 Series**: Meta's flagship models (7B, 13B, 70B)
    - **Mistral Series**: Excellent efficiency and performance
    - **Gemma**: Google's efficient models (2B, 7B)
    - **Phi**: Microsoft's compact powerhouses
    
    ### 💻 **Code Specialists** 
    - **CodeLlama**: Meta's dedicated coding models
    - **StarCoder**: BigCode's programming experts
    - **WizardCoder**: Enhanced coding capabilities
    - **DeepSeek-Coder**: Chinese tech giant's coder
    
    ### 💬 **Chat Optimized**
    - **Vicuna**: UC Berkeley's ChatGPT alternative
    - **Zephyr**: HuggingFace's chat specialist
    - **OpenChat**: High-quality conversation models
    - **Neural-Chat**: Intel-optimized chat models
 
    ### 🧮 **Reasoning Masters**
    - **WizardMath**: Mathematical problem solving
    - **MetaMath**: Advanced arithmetic reasoning
    - **Orca-2**: Microsoft's reasoning specialist
    - **Goat**: Specialized arithmetic model
    
    ### 👁️ **Multimodal Models**
    - **LLaVA**: Large Language and Vision Assistant
    - **MiniGPT-4**: Multimodal conversational AI
    
    ## 💾 Download Size Reference
    
    | Model Size | FP16 | 8-bit | 4-bit | Use Case |
    |------------|------|-------|-------|----------|
    | **1-3B** | 2-6GB | 1-3GB | 0.5-1.5GB | Mobile, Edge |
    | **7B** | 13GB | 7GB | 3.5GB | Desktop, Laptop |
    | **13B** | 26GB | 13GB | 7GB | Workstation |
    | **30-34B** | 60GB | 30GB | 15GB | Server, Cloud |
    | **70B** | 140GB | 70GB | 35GB | High-end Server |
    
    ## 🛠️ Where to Download
    
    ### **Primary Sources**
    - **🤗 Hugging Face**: Largest repository with 400,000+ models
    - **🦙 Ollama**: Simple CLI tool for local deployment
    - **📦 LM Studio**: User-friendly GUI for model management
    
    ### **Quantized Formats**
    - **GGUF**: Best for CPU inference (llama.cpp)
    - **GPTQ**: GPU-optimized quantization
    - **AWQ**: Advanced weight quantization
    
    ### **Download Tips**
    - Use `git lfs` for large models from Hugging Face
    - Consider bandwidth and storage before downloading
    - Start with 4-bit quantized versions for testing
    - Use `ollama pull model_name` for easiest setup
    
    ## 🔧 Optimization Strategies
    
    ### **Memory Reduction**
    - **4-bit quantization**: 75% memory reduction
    - **8-bit quantization**: 50% memory reduction
    - **CPU offloading**: Use system RAM for overflow
    
    ### **Speed Optimization**
    - **GPU acceleration**: CUDA, ROCm, Metal
    - **Batch processing**: Process multiple requests
    - **Context caching**: Reuse computations
    """)

# Footer with updated resources
st.markdown("---")
st.markdown("""
### 🔗 Essential Download & Deployment Tools
**📦 Easy Model Deployment:**
- [**Ollama**](https://ollama.ai/) – `curl -fsSL https://ollama.ai/install.sh | sh`
- [**LM Studio**](https://lmstudio.ai/) – Drag-and-drop GUI for running models locally
- [**GPT4All**](https://gpt4all.io/) – Cross-platform desktop app for local LLMs
**🤗 Model Repositories:**
- [**Hugging Face Hub**](https://huggingface.co/models) – Filter by model size, task, and license
- [**TheBloke's Quantizations**](https://huggingface.co/TheBloke) – Pre-quantized models in GGUF/GPTQ format
- [**Awesome LLM**](https://github.com/Hannibal046/Awesome-LLMs) – Curated list of models and resources
---
""")