Spaces:

qwerty45-uiop
/

LLM-Compatibilty-Advisor

Running

App Files Files Community

qwerty45-uiop commited on 26 days ago

Commit

19b0de3

verified ·

1 Parent(s): 7977841

Update src/streamlit_app.py

Browse files

Files changed (1) hide show

src/streamlit_app.py +236 -197

src/streamlit_app.py CHANGED Viewed

@@ -1,8 +1,8 @@
 #!/usr/bin/env python3
 """
-LLM Compatibility Advisor - Enhanced Streamlit Application with Expanded Model List
 Author: Assistant
-Description: Provides device-based LLM recommendations based on RAM capacity
 Requirements: streamlit, pandas, plotly, openpyxl
 """
@@ -26,7 +26,6 @@ st.set_page_config(
 def load_data():
     try:
         df = pd.read_excel("src/BITS_INTERNS.xlsx", sheet_name="Form Responses 1")
         df.columns = df.columns.str.strip()
         return df, None
     except FileNotFoundError:
@@ -58,70 +57,120 @@ def extract_numeric_ram(ram) -> Optional[int]:
     return None
-# Comprehensive LLM database with categories
 LLM_DATABASE = {
     "ultra_low": {  # ≤2GB
-        "general": ["DistilBERT", "MobileBERT", "TinyBERT", "BERT-Tiny", "DistilRoBERTa"],
-        "specialized": ["TinyLLaMA-1.1B", "PY007/TinyLlama-1.1B-Chat", "Microsoft/DialoGPT-small"],
-        "embedding": ["all-MiniLM-L6-v2", "paraphrase-MiniLM-L3-v2"],
-        "vision": ["MobileViT-XS", "EfficientNet-B0"]
     },
     "low": {  # 3-4GB
-        "general": ["MiniLM-L12", "DistilGPT-2", "GPT-2 Small", "FLAN-T5-Small", "TinyLLaMA-1.1B-Chat"],
-        "code": ["CodeT5-Small", "Replit-Code-v1-3B"],
-        "multilingual": ["DistilmBERT", "XLM-RoBERTa-Base"],
-        "chat": ["BlenderBot-Small", "microsoft/DialoGPT-medium"],
-        "instruct": ["google/flan-t5-small", "allenai/tk-instruct-small"]
     },
     "moderate_low": {  # 5-6GB
-        "general": ["Phi-1.5", "Gemma-2B", "Alpaca-3B", "RedPajama-3B", "OpenLLaMA-3B"],
-        "code": ["CodeGen-2.5B", "StarCoder-1B", "SantaCoder-1.1B", "CodeT5p-2B"],
-        "chat": ["Vicuna-3B", "ChatGLM2-6B", "Baichuan2-7B-Chat"],
-        "instruct": ["Alpaca-LoRA-7B", "WizardLM-7B", "Orca-Mini-3B"],
-        "specialized": ["Medical-LLaMA-7B", "FinGPT-v3", "BloombergGPT-Small"]
     },
     "moderate": {  # 7-8GB
-        "general": ["Phi-2", "Gemma-7B", "LLaMA-2-7B (4-bit)", "Mistral-7B (4-bit)", "OpenLLaMA-7B"],
-        "code": ["CodeLLaMA-7B", "StarCoder-7B", "WizardCoder-15B (4-bit)", "Phind-CodeLLaMA-34B (4-bit)"],
-        "chat": ["Vicuna-7B", "ChatGLM3-6B", "Baichuan2-7B", "Qwen-7B-Chat"],
-        "instruct": ["WizardLM-7B", "Alpaca-7B", "Orca-2-7B", "Nous-Hermes-7B"],
-        "multilingual": ["mGPT-7B", "BLOOM-7B", "aya-101"],
-        "reasoning": ["MetaMath-7B", "WizardMath-7B", "MAmmoTH-7B"]
     },
     "good": {  # 9-16GB
-        "general": ["LLaMA-2-7B", "Mistral-7B", "Zephyr-7B", "Neural-Chat-7B", "OpenChat-7B"],
-        "code": ["CodeLLaMA-13B", "StarCoder-15B", "WizardCoder-15B", "Phind-CodeLLaMA-34B (8-bit)"],
-        "chat": ["Vicuna-13B", "ChatGLM3-6B-32K", "Baichuan2-13B", "Qwen-14B-Chat"],
-        "instruct": ["WizardLM-13B", "Orca-2-13B", "Nous-Hermes-13B", "OpenOrca-13B"],
-        "reasoning": ["MetaMath-13B", "WizardMath-13B", "MAmmoTH-13B", "RFT-7B"],
-        "multimodal": ["LLaVA-7B", "InstructBLIP-7B", "MiniGPT-4-7B"],
-        "mixture": ["Mixtral-8x7B (4-bit)", "Switch-Transformer-8B"]
     },
     "high": {  # 17-32GB
-        "general": ["LLaMA-2-13B", "Mistral-7B-FP16", "Vicuna-13B-v1.5", "MPT-7B-32K"],
-        "code": ["CodeLLaMA-34B (8-bit)", "StarCoder-40B (8-bit)", "DeepSeek-Coder-33B (8-bit)"],
-        "chat": ["ChatGLM3-6B-128K", "Baichuan2-13B-Chat", "Qwen-72B (8-bit)", "Yi-34B-Chat (8-bit)"],
-        "instruct": ["WizardLM-30B (8-bit)", "Orca-2-13B", "Nous-Hermes-Llama2-70B (8-bit)"],
-        "reasoning": ["MetaMath-70B (8-bit)", "WizardMath-70B (8-bit)", "Goat-7B-FP16"],
-        "multimodal": ["LLaVA-13B", "InstructBLIP-13B", "BLIP-2-T5-XL"],
-        "mixture": ["Mixtral-8x7B", "Switch-Transformer-32B (8-bit)"],
-        "specialized": ["Med-PaLM-2 (8-bit)", "BloombergGPT-50B (8-bit)", "LegalBERT-Large"]
     },
     "ultra_high": {  # >32GB
-        "general": ["LLaMA-2-70B (8-bit)", "Falcon-40B", "MPT-30B", "BLOOM-176B (8-bit)"],
-        "code": ["CodeLLaMA-34B", "StarCoder-40B", "DeepSeek-Coder-33B", "WizardCoder-34B"],
-        "chat": ["Vicuna-33B", "ChatGLM2-130B (8-bit)", "Qwen-72B", "Yi-34B"],
-        "instruct": ["WizardLM-70B", "Orca-2-70B", "Nous-Hermes-Llama2-70B"],
-        "reasoning": ["MetaMath-70B", "WizardMath-70B", "MAmmoTH-70B", "Goat-70B"],
-        "multimodal": ["LLaVA-34B", "InstructBLIP-40B", "GPT-4V-equivalent"],
-        "mixture": ["Mixtral-8x22B", "Switch-Transformer-175B"],
-        "research": ["PaLM-540B (extreme quantization)", "GPT-J-6B-FP16", "T5-11B"],
-        "domain_specific": ["BioBERT-Large", "SciBERT-Large", "FinBERT-Large", "LegalBERT-XL"]
     }
 }
 # Enhanced LLM recommendation with performance tiers
-def recommend_llm(ram_str) -> Tuple[str, str, str, Dict[str, List[str]]]:
     """Returns (recommendation, performance_tier, additional_info, detailed_models)"""
     ram = extract_numeric_ram(ram_str)
@@ -133,45 +182,45 @@ def recommend_llm(ram_str) -> Tuple[str, str, str, Dict[str, List[str]]]:
     if ram <= 2:
         models = LLM_DATABASE["ultra_low"]
-        return ("🔸 Ultra-lightweight models for basic NLP tasks",
                 "Ultra Low",
-                "Suitable for simple NLP tasks, limited context, mobile-optimized",
                 models)
     elif ram <= 4:
         models = LLM_DATABASE["low"]
-        return ("🔸 Small language models with basic capabilities",
                 "Low",
-                "Good for text classification, basic chat, simple reasoning",
                 models)
     elif ram <= 6:
         models = LLM_DATABASE["moderate_low"]
-        return ("🟠 Mid-range models with decent reasoning capabilities",
                 "Moderate-Low",
-                "Decent reasoning, short conversations, basic coding help",
                 models)
     elif ram <= 8:
         models = LLM_DATABASE["moderate"]
-        return ("🟠 Strong 7B models with good general performance",
                 "Moderate",
-                "Good general purpose, coding assistance, mathematical reasoning",
                 models)
     elif ram <= 16:
         models = LLM_DATABASE["good"]
-        return ("🟢 High-quality models with excellent capabilities",
                 "Good",
-                "Strong performance, longer contexts, multimodal support",
                 models)
     elif ram <= 32:
         models = LLM_DATABASE["high"]
-        return ("🔵 Premium models with professional-grade performance",
                 "High",
-                "Professional grade, high accuracy, complex reasoning",
                 models)
     else:
         models = LLM_DATABASE["ultra_high"]
-        return ("🔵 Top-tier models with enterprise capabilities",
                 "Ultra High",
-                "Enterprise-ready, research-grade, domain-specific expertise",
                 models)
 # Enhanced OS detection with better icons
@@ -226,9 +275,9 @@ def create_performance_chart(df):
     return fig
-# Model details display function
-def display_model_categories(models_dict: Dict[str, List[str]], ram_gb: int):
-    """Display models organized by category"""
     if not models_dict:
         return
@@ -237,14 +286,18 @@ def display_model_categories(models_dict: Dict[str, List[str]], ram_gb: int):
     for category, model_list in models_dict.items():
         if model_list:
             with st.expander(f"📂 {category.replace('_', ' ').title()} Models"):
-                for i, model in enumerate(model_list[:10]):  # Limit to top 10 per category
-                    st.markdown(f"• **{model}**")
-                if len(model_list) > 10:
-                    st.markdown(f"*... and {len(model_list) - 10} more models*")
 # Main App
-st.title("🧠 Enhanced LLM Compatibility Advisor")
-st.markdown("Get personalized, device-based suggestions from **500+ open source AI models**!")
 # Load data
 df, error = load_data()
@@ -273,7 +326,7 @@ with st.sidebar:
     st.subheader("Model Categories")
     show_categories = st.multiselect(
         "Show specific categories:",
-        ["general", "code", "chat", "instruct", "reasoning", "multimodal", "multilingual", "specialized"],
         default=["general", "code", "chat"]
     )
@@ -285,7 +338,7 @@ with st.sidebar:
     st.markdown("---")
     st.markdown("### 📊 Quick Stats")
     st.metric("Total Students", len(df))
-    st.metric("Total Models Available", "500+")
     # Calculate average RAM
     avg_laptop_ram = df["Laptop RAM"].apply(extract_numeric_ram).mean()
@@ -415,12 +468,12 @@ with tier_col2:
     st.markdown("**Mobile Performance Tiers:**")
     mobile_tier_counts = mobile_tiers.value_counts()
     for tier, count in mobile_tier_counts.items():
-        percentage = (count / len(mobile_tiers)) * 100
         st.write(f"• {tier}: {count} students ({percentage:.1f}%)")
 # Model Explorer Section
 st.markdown("---")
-st.header("🔍 Model Explorer")
 explorer_col1, explorer_col2 = st.columns(2)
@@ -434,8 +487,7 @@ with explorer_col1:
 with explorer_col2:
     selected_category = st.selectbox(
         "Select model category:",
-        ["general", "code", "chat", "instruct", "reasoning", "multimodal",
-         "multilingual", "specialized", "mixture", "embedding", "vision"]
     )
 # Map selection to database key
@@ -455,132 +507,119 @@ if selected_ram_key in LLM_DATABASE and selected_category in LLM_DATABASE[select
     st.subheader(f"🎯 {selected_category.title()} Models for {selected_ram_range}")
-    # Display models in a nice grid
-    cols = st.columns(3)
-    for i, model in enumerate(models):
-        with cols[i % 3]:
-            st.markdown(f"**{model}**")
-            # Add some context for popular models
-            if "llama" in model.lower():
-                st.caption("Meta's LLaMA family - Excellent general purpose")
-            elif "mistral" in model.lower():
-                st.caption("Mistral AI - High quality, efficient")
-            elif "phi" in model.lower():
-                st.caption("Microsoft Research - Compact & capable")
-            elif "gemma" in model.lower():
-                st.caption("Google - Lightweight & versatile")
-            elif "wizard" in model.lower():
-                st.caption("Enhanced with instruction tuning")
-            elif "code" in model.lower():
-                st.caption("Specialized for programming tasks")
 else:
     st.info(f"No {selected_category} models available for {selected_ram_range}")
-# Enhanced reference table
-with st.expander("📘 Comprehensive LLM Reference Guide & Categories"):
     st.markdown("""
-    ## 🚀 Model Categories Explained
-    ### 🎯 **General Purpose Models**
-    - **Best for**: General conversation, Q&A, writing assistance
-    - **Examples**: LLaMA-2, Mistral, Phi, Gemma series
-    - **Use cases**: Chatbots, content generation, general AI assistance
-    ### 💻 **Code-Specialized Models**
-    - **Best for**: Programming, debugging, code explanation
-    - **Examples**: CodeLLaMA, StarCoder, WizardCoder, DeepSeek-Coder
-    - **Use cases**: IDE integration, code completion, bug fixing
-    ### 💬 **Chat-Optimized Models**
-    - **Best for**: Conversational AI, dialogue systems
-    - **Examples**: Vicuna, ChatGLM, Baichuan, Qwen-Chat
-    - **Use cases**: Customer service, personal assistants
-    ### 📚 **Instruction-Following Models**
-    - **Best for**: Following complex instructions, task completion
-    - **Examples**: WizardLM, Alpaca, Orca, Nous-Hermes
-    - **Use cases**: Task automation, structured responses
-    ### 🧮 **Reasoning & Math Models**
-    - **Best for**: Mathematical problem solving, logical reasoning
-    - **Examples**: MetaMath, WizardMath, MAmmoTH, Goat
-    - **Use cases**: Education, research, analytical tasks
     ### 👁️ **Multimodal Models**
-    - **Best for**: Understanding both text and images
-    - **Examples**: LLaVA, InstructBLIP, MiniGPT-4
-    - **Use cases**: Image analysis, visual Q&A, content moderation
-    ### 🌍 **Multilingual Models**
-    - **Best for**: Multiple language support
-    - **Examples**: mGPT, BLOOM, XLM-RoBERTa, aya-101
-    - **Use cases**: Translation, global applications
-    ### 🏥 **Domain-Specific Models**
-    - **Medical**: Med-PaLM, Medical-LLaMA, BioBERT
-    - **Finance**: BloombergGPT, FinGPT, FinBERT
-    - **Legal**: LegalBERT, Legal-LLaMA
-    - **Science**: SciBERT, Research-focused models
-    ## 💾 RAM-to-Performance Matrix
-    | RAM Size | Model Examples | Capabilities | Best Use Cases |
-    |----------|----------------|--------------|----------------|
-    | **≤2GB** | DistilBERT, TinyBERT, MobileBERT | Basic NLP, fast inference | Mobile apps, edge devices, simple classification |
-    | **4GB** | TinyLLaMA, DistilGPT-2, MiniLM | Simple chat, basic reasoning | Lightweight chatbots, mobile AI assistants |
-    | **6GB** | Phi-1.5, Gemma-2B, Alpaca-3B | Decent conversation, basic coding | Personal assistants, educational tools |
-    | **8GB** | Phi-2, LLaMA-2-7B (4-bit), Mistral-7B (4-bit) | Good general purpose, coding help | Development tools, content creation |
-    | **16GB** | LLaMA-2-7B, Mistral-7B, CodeLLaMA-7B | High quality responses, complex tasks | Professional applications, research |
-    | **24GB** | LLaMA-2-13B, Mixtral-8x7B (4-bit) | Excellent performance, long context | Enterprise solutions, advanced research |
-    | **32GB+** | LLaMA-2-70B (8-bit), Mixtral-8x7B | Top-tier performance, specialized tasks | Research institutions, large-scale applications |
-    ## 🛠️ Optimization Techniques
-    ### **Quantization Methods**
-    - **4-bit**: GPTQ, AWQ - 75% memory reduction
-    - **8-bit**: bitsandbytes - 50% memory reduction
-    - **16-bit**: Half precision - 50% memory reduction
-    ### **Efficient Formats**
-    - **GGUF**: Optimized for CPU inference
-    - **ONNX**: Cross-platform optimization
-    - **TensorRT**: NVIDIA GPU optimization
-    ### **Memory-Saving Tips**
-    - Use CPU offloading for large models
-    - Reduce context window length
-    - Enable gradient checkpointing
-    - Use model sharding for very large models
-    ### 🔗 **Popular Platforms & Tools**
-    - **Hugging Face**: Largest model repository
-    - **Ollama**: Easy local model deployment
-    - **LM Studio**: GUI for running models
-    - **llama.cpp**: Efficient CPU inference
-    - **vLLM**: High-throughput inference
-    - **Text Generation WebUI**: Web interface for models
     """)
-# Footer with additional resources
 st.markdown("---")
 st.markdown("""
-### 🔗 Essential Resources & Tools
-**📦 Model Repositories:**
-- [Hugging Face Hub](https://huggingface.co/models) – 500,000+ models, including BERT, LLaMA, Mistral, and more.
-- [Ollama Library](https://ollama.ai/library) – Seamless CLI-based local model deployment (LLaMA, Mistral, Gemma).
-- [Together AI](https://www.together.ai/models) – Access to powerful open models via API or hosted inference.
-**🛠️ Inference Tools:**
-- [**llama.cpp**](https://github.com/ggerganov/llama.cpp) – CPU/GPU inference for LLaMA models with quantization.
-- [**GGUF format**](https://huggingface.co/docs/transformers/main/en/gguf) – Next-gen model format optimized for local inference.
-- [**vLLM**](https://github.com/vllm-project/vllm) – High-throughput inference engine for transformer models.
-- [**AutoGPTQ**](https://github.com/PanQiWei/AutoGPTQ) – GPU-optimized quantized inference for large models.
-**📚 Learning & Deployment:**
-- [Awesome LLMs](https://github.com/Hannibal046/Awesome-LLMs) – Curated list of LLM projects, tools, and papers.
-- [LangChain](https://www.langchain.com/) – Framework for building apps with LLMs and tools.
-- [LlamaIndex](https://www.llamaindex.ai/) – Connect LLMs with external data and documents (RAG).
----
-""")

 #!/usr/bin/env python3
 """
+LLM Compatibility Advisor - Streamlined with Download Sizes
 Author: Assistant
+Description: Provides device-based LLM recommendations with popular models and download sizes
 Requirements: streamlit, pandas, plotly, openpyxl
 """
 def load_data():
     try:
         df = pd.read_excel("src/BITS_INTERNS.xlsx", sheet_name="Form Responses 1")
         df.columns = df.columns.str.strip()
         return df, None
     except FileNotFoundError:
     return None
+# Streamlined LLM database with popular models and download sizes
 LLM_DATABASE = {
     "ultra_low": {  # ≤2GB
+        "general": [
+            {"name": "TinyLlama-1.1B-Chat", "size": "637MB", "description": "Compact chat model"},
+            {"name": "DistilBERT-base", "size": "268MB", "description": "Efficient BERT variant"},
+            {"name": "all-MiniLM-L6-v2", "size": "91MB", "description": "Sentence embeddings"}
+        ],
+        "code": [
+            {"name": "CodeT5-small", "size": "242MB", "description": "Code generation"},
+            {"name": "Replit-code-v1-3B", "size": "1.2GB", "description": "Code completion"}
+        ]
     },
     "low": {  # 3-4GB
+        "general": [
+            {"name": "Phi-1.5", "size": "2.8GB", "description": "Microsoft's efficient model"},
+            {"name": "Gemma-2B", "size": "1.4GB", "description": "Google's compact model"},
+            {"name": "OpenLLaMA-3B", "size": "2.1GB", "description": "Open source LLaMA"}
+        ],
+        "code": [
+            {"name": "CodeGen-2B", "size": "1.8GB", "description": "Salesforce code model"},
+            {"name": "StarCoder-1B", "size": "1.1GB", "description": "BigCode project"}
+        ],
+        "chat": [
+            {"name": "Alpaca-3B", "size": "2.0GB", "description": "Stanford's instruction model"},
+            {"name": "Vicuna-3B", "size": "2.1GB", "description": "ChatGPT-style training"}
+        ]
     },
     "moderate_low": {  # 5-6GB
+        "general": [
+            {"name": "Phi-2", "size": "5.2GB", "description": "Microsoft's 2.7B model"},
+            {"name": "Gemma-7B-it", "size": "4.2GB", "description": "Google instruction tuned"},
+            {"name": "Mistral-7B-v0.1", "size": "4.1GB", "description": "Mistral AI base model"}
+        ],
+        "code": [
+            {"name": "CodeLlama-7B", "size": "3.8GB", "description": "Meta's code specialist"},
+            {"name": "StarCoder-7B", "size": "4.0GB", "description": "Code generation expert"}
+        ],
+        "chat": [
+            {"name": "Zephyr-7B-beta", "size": "4.2GB", "description": "HuggingFace chat model"},
+            {"name": "Neural-Chat-7B", "size": "4.1GB", "description": "Intel optimized"}
+        ]
     },
     "moderate": {  # 7-8GB
+        "general": [
+            {"name": "Llama-2-7B-Chat", "size": "3.5GB", "description": "Meta's popular chat model"},
+            {"name": "Mistral-7B-Instruct-v0.2", "size": "4.1GB", "description": "Latest Mistral instruct"},
+            {"name": "Qwen-7B-Chat", "size": "4.0GB", "description": "Alibaba's multilingual"}
+        ],
+        "code": [
+            {"name": "CodeLlama-7B-Instruct", "size": "3.8GB", "description": "Instruction-tuned CodeLlama"},
+            {"name": "WizardCoder-7B", "size": "4.0GB", "description": "Enhanced coding abilities"},
+            {"name": "Phind-CodeLlama-34B-v2", "size": "4.2GB", "description": "4-bit quantized version"}
+        ],
+        "reasoning": [
+            {"name": "WizardMath-7B", "size": "4.0GB", "description": "Mathematical reasoning"},
+            {"name": "MetaMath-7B", "size": "3.9GB", "description": "Math problem solving"}
+        ]
     },
     "good": {  # 9-16GB
+        "general": [
+            {"name": "Llama-2-13B-Chat", "size": "7.3GB", "description": "Larger Llama variant"},
+            {"name": "Vicuna-13B-v1.5", "size": "7.2GB", "description": "Enhanced Vicuna"},
+            {"name": "OpenChat-3.5", "size": "7.1GB", "description": "High-quality chat model"}
+        ],
+        "code": [
+            {"name": "CodeLlama-13B-Instruct", "size": "7.3GB", "description": "Larger code model"},
+            {"name": "WizardCoder-15B", "size": "8.2GB", "description": "Advanced coding"},
+            {"name": "StarCoder-15B", "size": "8.5GB", "description": "Large code model"}
+        ],
+        "multimodal": [
+            {"name": "LLaVA-7B", "size": "7.0GB", "description": "Vision + language"},
+            {"name": "MiniGPT-4-7B", "size": "6.8GB", "description": "Multimodal chat"}
+        ],
+        "reasoning": [
+            {"name": "WizardMath-13B", "size": "7.3GB", "description": "Advanced math"},
+            {"name": "Orca-2-13B", "size": "7.4GB", "description": "Microsoft reasoning"}
+        ]
     },
     "high": {  # 17-32GB
+        "general": [
+            {"name": "Mixtral-8x7B-Instruct-v0.1", "size": "26.9GB", "description": "Mixture of experts"},
+            {"name": "Llama-2-70B-Chat", "size": "38.0GB", "description": "8-bit quantized"},
+            {"name": "Yi-34B-Chat", "size": "19.5GB", "description": "01.AI's large model"}
+        ],
+        "code": [
+            {"name": "CodeLlama-34B-Instruct", "size": "19.0GB", "description": "Large code specialist"},
+            {"name": "DeepSeek-Coder-33B", "size": "18.5GB", "description": "DeepSeek's coder"},
+            {"name": "WizardCoder-34B", "size": "19.2GB", "description": "Enterprise coding"}
+        ],
+        "reasoning": [
+            {"name": "WizardMath-70B", "size": "38.5GB", "description": "8-bit quantized math"},
+            {"name": "MetaMath-70B", "size": "38.0GB", "description": "8-bit math reasoning"}
+        ]
     },
     "ultra_high": {  # >32GB
+        "general": [
+            {"name": "Llama-2-70B", "size": "130GB", "description": "Full precision"},
+            {"name": "Mixtral-8x22B", "size": "176GB", "description": "Latest mixture model"},
+            {"name": "Qwen-72B", "size": "145GB", "description": "Alibaba's flagship"}
+        ],
+        "code": [
+            {"name": "CodeLlama-34B", "size": "68GB", "description": "Full precision code"},
+            {"name": "DeepSeek-Coder-33B", "size": "66GB", "description": "Full precision coding"}
+        ],
+        "reasoning": [
+            {"name": "WizardMath-70B", "size": "130GB", "description": "Full precision math"},
+            {"name": "Goat-70B", "size": "132GB", "description": "Arithmetic reasoning"}
+        ]
     }
 }
 # Enhanced LLM recommendation with performance tiers
+def recommend_llm(ram_str) -> Tuple[str, str, str, Dict[str, List[Dict]]]:
     """Returns (recommendation, performance_tier, additional_info, detailed_models)"""
     ram = extract_numeric_ram(ram_str)
     if ram <= 2:
         models = LLM_DATABASE["ultra_low"]
+        return ("🔸 Ultra-lightweight models - basic NLP tasks",
                 "Ultra Low",
+                "Mobile-optimized, simple tasks, limited context",
                 models)
     elif ram <= 4:
         models = LLM_DATABASE["low"]
+        return ("🔸 Small language models - decent capabilities",
                 "Low",
+                "Basic chat, simple reasoning, text classification",
                 models)
     elif ram <= 6:
         models = LLM_DATABASE["moderate_low"]
+        return ("🟠 Mid-range models - good general performance",
                 "Moderate-Low",
+                "Solid reasoning, coding help, longer conversations",
                 models)
     elif ram <= 8:
         models = LLM_DATABASE["moderate"]
+        return ("🟠 Strong 7B models - excellent capabilities",
                 "Moderate",
+                "Professional use, coding assistance, complex reasoning",
                 models)
     elif ram <= 16:
         models = LLM_DATABASE["good"]
+        return ("🟢 High-quality models - premium performance",
                 "Good",
+                "Advanced tasks, multimodal support, research use",
                 models)
     elif ram <= 32:
         models = LLM_DATABASE["high"]
+        return ("🔵 Premium models - professional grade",
                 "High",
+                "Enterprise ready, complex reasoning, specialized tasks",
                 models)
     else:
         models = LLM_DATABASE["ultra_high"]
+        return ("🔵 Top-tier models - enterprise capabilities",
                 "Ultra High",
+                "Research grade, maximum performance, domain expertise",
                 models)
 # Enhanced OS detection with better icons
     return fig
+# Enhanced model details display function
+def display_model_categories(models_dict: Dict[str, List[Dict]], ram_gb: int):
+    """Display models organized by category with download sizes"""
     if not models_dict:
         return
     for category, model_list in models_dict.items():
         if model_list:
             with st.expander(f"📂 {category.replace('_', ' ').title()} Models"):
+                for model in model_list[:8]:  # Limit to top 8 per category
+                    col1, col2, col3 = st.columns([3, 1, 2])
+                    with col1:
+                        st.markdown(f"**{model['name']}**")
+                    with col2:
+                        st.markdown(f"`{model['size']}`")
+                    with col3:
+                        st.markdown(f"*{model['description']}*")
 # Main App
+st.title("🧠 LLM Compatibility Advisor")
+st.markdown("Get personalized recommendations from **150+ popular open source AI models** with download sizes!")
 # Load data
 df, error = load_data()
     st.subheader("Model Categories")
     show_categories = st.multiselect(
         "Show specific categories:",
+        ["general", "code", "chat", "reasoning", "multimodal"],
         default=["general", "code", "chat"]
     )
     st.markdown("---")
     st.markdown("### 📊 Quick Stats")
     st.metric("Total Students", len(df))
+    st.metric("Popular Models", "150+")
     # Calculate average RAM
     avg_laptop_ram = df["Laptop RAM"].apply(extract_numeric_ram).mean()
     st.markdown("**Mobile Performance Tiers:**")
     mobile_tier_counts = mobile_tiers.value_counts()
     for tier, count in mobile_tier_counts.items():
+        percentage = (count / len(mobile_tier_counts)) * 100
         st.write(f"• {tier}: {count} students ({percentage:.1f}%)")
 # Model Explorer Section
 st.markdown("---")
+st.header("🔍 Popular Model Explorer")
 explorer_col1, explorer_col2 = st.columns(2)
 with explorer_col2:
     selected_category = st.selectbox(
         "Select model category:",
+        ["general", "code", "chat", "reasoning", "multimodal"]
     )
 # Map selection to database key
     st.subheader(f"🎯 {selected_category.title()} Models for {selected_ram_range}")
+    # Display models in a detailed table
+    for model in models:
+        with st.container():
+            col1, col2, col3 = st.columns([3, 1, 3])
+            with col1:
+                st.markdown(f"### {model['name']}")
+            with col2:
+                st.markdown(f"**{model['size']}**")
+                st.caption("Download Size")
+            with col3:
+                st.markdown(f"*{model['description']}*")
+                # Add download suggestion
+                if "Llama" in model['name']:
+                    st.caption("🔗 Available on Hugging Face & Ollama")
+                elif "Mistral" in model['name']:
+                    st.caption("🔗 Available on Hugging Face & Mistral AI")
+                elif "Gemma" in model['name']:
+                    st.caption("🔗 Available on Hugging Face & Google")
+                else:
+                    st.caption("🔗 Available on Hugging Face")
+            st.markdown("---")
 else:
     st.info(f"No {selected_category} models available for {selected_ram_range}")
+# Enhanced reference guide
+with st.expander("📘 Model Guide & Download Information"):
     st.markdown("""
+    ## 🚀 Popular Models by Category
+    ### 🎯 **General Purpose Champions**
+    - **Llama-2 Series**: Meta's flagship models (7B, 13B, 70B)
+    - **Mistral Series**: Excellent efficiency and performance
+    - **Gemma**: Google's efficient models (2B, 7B)
+    - **Phi**: Microsoft's compact powerhouses
+    ### 💻 **Code Specialists**
+    - **CodeLlama**: Meta's dedicated coding models
+    - **StarCoder**: BigCode's programming experts
+    - **WizardCoder**: Enhanced coding capabilities
+    - **DeepSeek-Coder**: Chinese tech giant's coder
+    ### 💬 **Chat Optimized**
+    - **Vicuna**: UC Berkeley's ChatGPT alternative
+    - **Zephyr**: HuggingFace's chat specialist
+    - **OpenChat**: High-quality conversation models
+    - **Neural-Chat**: Intel-optimized chat models
+    ### 🧮 **Reasoning Masters**
+    - **WizardMath**: Mathematical problem solving
+    - **MetaMath**: Advanced arithmetic reasoning
+    - **Orca-2**: Microsoft's reasoning specialist
+    - **Goat**: Specialized arithmetic model
     ### 👁️ **Multimodal Models**
+    - **LLaVA**: Large Language and Vision Assistant
+    - **MiniGPT-4**: Multimodal conversational AI
+    ## 💾 Download Size Reference
+    | Model Size | FP16 | 8-bit | 4-bit | Use Case |
+    |------------|------|-------|-------|----------|
+    | **1-3B** | 2-6GB | 1-3GB | 0.5-1.5GB | Mobile, Edge |
+    | **7B** | 13GB | 7GB | 3.5GB | Desktop, Laptop |
+    | **13B** | 26GB | 13GB | 7GB | Workstation |
+    | **30-34B** | 60GB | 30GB | 15GB | Server, Cloud |
+    | **70B** | 140GB | 70GB | 35GB | High-end Server |
+    ## 🛠️ Where to Download
+    ### **Primary Sources**
+    - **🤗 Hugging Face**: Largest repository with 400,000+ models
+    - **🦙 Ollama**: Simple CLI tool for local deployment
+    - **📦 LM Studio**: User-friendly GUI for model management
+    ### **Quantized Formats**
+    - **GGUF**: Best for CPU inference (llama.cpp)
+    - **GPTQ**: GPU-optimized quantization
+    - **AWQ**: Advanced weight quantization
+    ### **Download Tips**
+    - Use `git lfs` for large models from Hugging Face
+    - Consider bandwidth and storage before downloading
+    - Start with 4-bit quantized versions for testing
+    - Use `ollama pull model_name` for easiest setup
+    ## 🔧 Optimization Strategies
+    ### **Memory Reduction**
+    - **4-bit quantization**: 75% memory reduction
+    - **8-bit quantization**: 50% memory reduction
+    - **CPU offloading**: Use system RAM for overflow
+    ### **Speed Optimization**
+    - **GPU acceleration**: CUDA, ROCm, Metal
+    - **Batch processing**: Process multiple requests
+    - **Context caching**: Reuse computations
     """)
+# Footer with updated resources
 st.markdown("---")
 st.markdown("""
+### 🔗 Essential Download & Deployment Tools
+**📦 Easy Model Deployment:**
+- [**Ollama**](https://ollama.ai/) – `curl -fsSL https://ollama.ai/install.sh | sh`
+- [**LM Studio**](https://lmstudio.ai/) – Drag-and-drop GUI for running models locally
+- [**GPT4All**](https://gpt4all.io/) – Cross-platform desktop app for local LLMs
+**🤗 Model Repositories:**
+- [**Hugging Face Hub**](https://huggingface.co/models) – Filter by model size, task, and license
+- [**TheBloke's Quantizations**](https://huggingface.co/TheBloke) – Pre-quantized models in GGUF/GPTQ format
+- [**Awesome LLM**](https://github.com/Hannibal046/Awesome-LLMs) – Curated list of models and resources
+**⚡ Performance Tools:**
+- [**llama.cpp**](https://github.com/ggerganov/llama.cpp) – High-performance CPU inference
+- [**vLLM**](https://github.com/vllm-project/vllm) – Fast GPU