#!/usr/bin/env python3 """ LLM Compatibility Advisor - Streamlined with Download Sizes Author: Assistant Description: Provides device-based LLM recommendations with popular models and download sizes Requirements: streamlit, pandas, plotly, openpyxl """ import streamlit as st import pandas as pd import numpy as np import re import plotly.express as px import plotly.graph_objects as go from typing import Optional, Tuple, List, Dict # ✅ MUST be the first Streamlit command st.set_page_config( page_title="LLM Compatibility Advisor", layout="wide", page_icon="🧠", initial_sidebar_state="expanded" ) # Enhanced data loading with error handling @st.cache_data def load_data(): paths = [ "src/BITS_INTERNS.xlsx", "src/Summer of AI - ICFAI (Responses) (3).xlsx" ] combined_df = pd.DataFrame() for path in paths: try: df = pd.read_excel(path, sheet_name="Form Responses 1") df.columns = df.columns.str.strip() combined_df = pd.concat([combined_df, df], ignore_index=True) except FileNotFoundError: return None, f"Excel file '{path}' not found. Please upload the file." except Exception as e: return None, f"Error loading '{path}': {str(e)}" # Return success case - this was missing! if combined_df.empty: return None, "No data found in Excel files." else: return combined_df, None # Enhanced RAM extraction with better parsing def extract_numeric_ram(ram) -> Optional[int]: if pd.isna(ram): return None ram_str = str(ram).lower().replace(" ", "") # Handle various formats: "8GB", "8 GB", "8gb", "8192MB", etc. gb_match = re.search(r"(\d+(?:\.\d+)?)(?:gb|g)", ram_str) if gb_match: return int(float(gb_match.group(1))) # Handle MB format mb_match = re.search(r"(\d+)(?:mb|m)", ram_str) if mb_match: return max(1, int(int(mb_match.group(1)) / 1024)) # Convert MB to GB # Handle plain numbers (assume GB) plain_match = re.search(r"(\d+)", ram_str) if plain_match: return int(plain_match.group(1)) return None # Streamlined LLM database with popular models and download sizes LLM_DATABASE = { "ultra_low": { # ≤2GB "general": [ {"name": "TinyLlama-1.1B-Chat", "size": "637MB", "description": "Compact chat model"}, {"name": "DistilBERT-base", "size": "268MB", "description": "Efficient BERT variant"}, {"name": "all-MiniLM-L6-v2", "size": "91MB", "description": "Sentence embeddings"} ], "code": [ {"name": "CodeT5-small", "size": "242MB", "description": "Code generation"}, {"name": "Replit-code-v1-3B", "size": "1.2GB", "description": "Code completion"} ] }, "low": { # 3-4GB "general": [ {"name": "Phi-1.5", "size": "2.8GB", "description": "Microsoft's efficient model"}, {"name": "Gemma-2B", "size": "1.4GB", "description": "Google's compact model"}, {"name": "OpenLLaMA-3B", "size": "2.1GB", "description": "Open source LLaMA"} ], "code": [ {"name": "CodeGen-2B", "size": "1.8GB", "description": "Salesforce code model"}, {"name": "StarCoder-1B", "size": "1.1GB", "description": "BigCode project"} ], "chat": [ {"name": "Alpaca-3B", "size": "2.0GB", "description": "Stanford's instruction model"}, {"name": "Vicuna-3B", "size": "2.1GB", "description": "ChatGPT-style training"} ] }, "moderate_low": { # 5-6GB "general": [ {"name": "Phi-2", "size": "5.2GB", "description": "Microsoft's 2.7B model"}, {"name": "Gemma-7B-it", "size": "4.2GB", "description": "Google instruction tuned"}, {"name": "Mistral-7B-v0.1", "size": "4.1GB", "description": "Mistral AI base model"} ], "code": [ {"name": "CodeLlama-7B", "size": "3.8GB", "description": "Meta's code specialist"}, {"name": "StarCoder-7B", "size": "4.0GB", "description": "Code generation expert"} ], "chat": [ {"name": "Zephyr-7B-beta", "size": "4.2GB", "description": "HuggingFace chat model"}, {"name": "Neural-Chat-7B", "size": "4.1GB", "description": "Intel optimized"} ] }, "moderate": { # 7-8GB "general": [ {"name": "Llama-2-7B-Chat", "size": "3.5GB", "description": "Meta's popular chat model"}, {"name": "Mistral-7B-Instruct-v0.2", "size": "4.1GB", "description": "Latest Mistral instruct"}, {"name": "Qwen-7B-Chat", "size": "4.0GB", "description": "Alibaba's multilingual"} ], "code": [ {"name": "CodeLlama-7B-Instruct", "size": "3.8GB", "description": "Instruction-tuned CodeLlama"}, {"name": "WizardCoder-7B", "size": "4.0GB", "description": "Enhanced coding abilities"}, {"name": "Phind-CodeLlama-34B-v2", "size": "4.2GB", "description": "4-bit quantized version"} ], "reasoning": [ {"name": "WizardMath-7B", "size": "4.0GB", "description": "Mathematical reasoning"}, {"name": "MetaMath-7B", "size": "3.9GB", "description": "Math problem solving"} ] }, "good": { # 9-16GB "general": [ {"name": "Llama-2-13B-Chat", "size": "7.3GB", "description": "Larger Llama variant"}, {"name": "Vicuna-13B-v1.5", "size": "7.2GB", "description": "Enhanced Vicuna"}, {"name": "OpenChat-3.5", "size": "7.1GB", "description": "High-quality chat model"} ], "code": [ {"name": "CodeLlama-13B-Instruct", "size": "7.3GB", "description": "Larger code model"}, {"name": "WizardCoder-15B", "size": "8.2GB", "description": "Advanced coding"}, {"name": "StarCoder-15B", "size": "8.5GB", "description": "Large code model"} ], "multimodal": [ {"name": "LLaVA-7B", "size": "7.0GB", "description": "Vision + language"}, {"name": "MiniGPT-4-7B", "size": "6.8GB", "description": "Multimodal chat"} ], "reasoning": [ {"name": "WizardMath-13B", "size": "7.3GB", "description": "Advanced math"}, {"name": "Orca-2-13B", "size": "7.4GB", "description": "Microsoft reasoning"} ] }, "high": { # 17-32GB "general": [ {"name": "Mixtral-8x7B-Instruct-v0.1", "size": "26.9GB", "description": "Mixture of experts"}, {"name": "Llama-2-70B-Chat", "size": "38.0GB", "description": "8-bit quantized"}, {"name": "Yi-34B-Chat", "size": "19.5GB", "description": "01.AI's large model"} ], "code": [ {"name": "CodeLlama-34B-Instruct", "size": "19.0GB", "description": "Large code specialist"}, {"name": "DeepSeek-Coder-33B", "size": "18.5GB", "description": "DeepSeek's coder"}, {"name": "WizardCoder-34B", "size": "19.2GB", "description": "Enterprise coding"} ], "reasoning": [ {"name": "WizardMath-70B", "size": "38.5GB", "description": "8-bit quantized math"}, {"name": "MetaMath-70B", "size": "38.0GB", "description": "8-bit math reasoning"} ] }, "ultra_high": { # >32GB "general": [ {"name": "Llama-2-70B", "size": "130GB", "description": "Full precision"}, {"name": "Mixtral-8x22B", "size": "176GB", "description": "Latest mixture model"}, {"name": "Qwen-72B", "size": "145GB", "description": "Alibaba's flagship"} ], "code": [ {"name": "CodeLlama-34B", "size": "68GB", "description": "Full precision code"}, {"name": "DeepSeek-Coder-33B", "size": "66GB", "description": "Full precision coding"} ], "reasoning": [ {"name": "WizardMath-70B", "size": "130GB", "description": "Full precision math"}, {"name": "Goat-70B", "size": "132GB", "description": "Arithmetic reasoning"} ] } } # Enhanced LLM recommendation with performance tiers def recommend_llm(ram_str) -> Tuple[str, str, str, Dict[str, List[Dict]]]: """Returns (recommendation, performance_tier, additional_info, detailed_models)""" ram = extract_numeric_ram(ram_str) if ram is None: return ("âšĒ Check exact specs or test with quantized models.", "Unknown", "Verify RAM specifications", {}) if ram <= 2: models = LLM_DATABASE["ultra_low"] return ("🔸 Ultra-lightweight models - basic NLP tasks", "Ultra Low", "Mobile-optimized, simple tasks, limited context", models) elif ram <= 4: models = LLM_DATABASE["low"] return ("🔸 Small language models - decent capabilities", "Low", "Basic chat, simple reasoning, text classification", models) elif ram <= 6: models = LLM_DATABASE["moderate_low"] return ("🟠 Mid-range models - good general performance", "Moderate-Low", "Solid reasoning, coding help, longer conversations", models) elif ram <= 8: models = LLM_DATABASE["moderate"] return ("🟠 Strong 7B models - excellent capabilities", "Moderate", "Professional use, coding assistance, complex reasoning", models) elif ram <= 16: models = LLM_DATABASE["good"] return ("đŸŸĸ High-quality models - premium performance", "Good", "Advanced tasks, multimodal support, research use", models) elif ram <= 32: models = LLM_DATABASE["high"] return ("đŸ”ĩ Premium models - professional grade", "High", "Enterprise ready, complex reasoning, specialized tasks", models) else: models = LLM_DATABASE["ultra_high"] return ("đŸ”ĩ Top-tier models - enterprise capabilities", "Ultra High", "Research grade, maximum performance, domain expertise", models) # Enhanced OS detection with better icons def get_os_info(os_name) -> Tuple[str, str]: """Returns (icon, clean_name)""" if pd.isna(os_name): return "đŸ’ģ", "Not specified" os = str(os_name).lower() if "windows" in os: return "đŸĒŸ", os_name elif "mac" in os or "darwin" in os: return "🍎", os_name elif "linux" in os or "ubuntu" in os: return "🐧", os_name elif "android" in os: return "🤖", os_name elif "ios" in os: return "📱", os_name else: return "đŸ’ģ", os_name # Performance visualization def create_performance_chart(df): """Create a performance distribution chart""" laptop_rams = df["Laptop RAM"].apply(extract_numeric_ram).dropna() mobile_rams = df["Mobile RAM"].apply(extract_numeric_ram).dropna() fig = go.Figure() fig.add_trace(go.Histogram( x=laptop_rams, name="Laptop RAM", opacity=0.7, nbinsx=10 )) fig.add_trace(go.Histogram( x=mobile_rams, name="Mobile RAM", opacity=0.7, nbinsx=10 )) fig.update_layout( title="RAM Distribution Across Devices", xaxis_title="RAM (GB)", yaxis_title="Number of Students", barmode='overlay', height=400 ) return fig # Enhanced model details display function def display_model_categories(models_dict: Dict[str, List[Dict]], ram_gb: int): """Display models organized by category with download sizes""" if not models_dict: return st.markdown(f"### đŸŽ¯ Recommended Models for {ram_gb}GB RAM:") for category, model_list in models_dict.items(): if model_list: with st.expander(f"📂 {category.replace('_', ' ').title()} Models"): for model in model_list[:8]: # Limit to top 8 per category col1, col2, col3 = st.columns([3, 1, 2]) with col1: st.markdown(f"**{model['name']}**") with col2: st.markdown(f"`{model['size']}`") with col3: st.markdown(f"*{model['description']}*") # Demo data generator for when Excel files are not available def generate_demo_data(): """Generate demo data for testing when Excel files are missing""" demo_data = { "Full Name": [ "Demo Student 1", "Demo Student 2", "Demo Student 3", "Demo Student 4", "Demo Student 5", "Demo Student 6", "Demo Student 7", "Demo Student 8" ], "Laptop RAM": ["8GB", "16GB", "4GB", "32GB", "6GB", "12GB", "2GB", "24GB"], "Mobile RAM": ["4GB", "8GB", "3GB", "12GB", "6GB", "4GB", "2GB", "8GB"], "Laptop Operating System": [ "Windows 11", "macOS Monterey", "Ubuntu 22.04", "Windows 10", "macOS Big Sur", "Fedora 36", "Windows 11", "macOS Ventura" ], "Mobile Operating System": [ "Android 13", "iOS 16", "Android 12", "iOS 15", "Android 14", "iOS 17", "Android 11", "iOS 16" ] } return pd.DataFrame(demo_data) # Function to safely prepare user options def prepare_user_options(df): """Safely prepare user options for selectbox, handling NaN values and mixed types""" try: # Get unique names and filter out NaN values unique_names = df["Full Name"].dropna().unique() # Convert to strings and filter out any remaining non-string values valid_names = [] for name in unique_names: try: str_name = str(name).strip() if str_name and str_name.lower() != 'nan': valid_names.append(str_name) except: continue # Create options list with proper string concatenation options = ["Select a student..."] + sorted(valid_names) return options except Exception as e: st.error(f"Error preparing user options: {e}") return ["Select a student..."] # Main App st.title("🧠 LLM Compatibility Advisor") st.markdown("Get personalized recommendations from **150+ popular open source AI models** with download sizes!") # Load data with better error handling df, error = load_data() if error or df is None or df.empty: st.warning("âš ī¸ Excel files not found. Running with demo data for testing.") st.info("📁 To use real data, place 'BITS_INTERNS.xlsx' and 'Summer of AI - ICFAI (Responses) (3).xlsx' in the 'src/' directory.") df = generate_demo_data() with st.expander("📋 Expected Data Format"): st.markdown(""" The app expects Excel files with the following columns: - **Full Name**: Student name - **Laptop RAM**: RAM specification (e.g., "8GB", "16 GB", "8192MB") - **Mobile RAM**: Mobile device RAM - **Laptop Operating System**: OS name - **Mobile Operating System**: Mobile OS name """) # Verify required columns exist required_columns = ["Full Name", "Laptop RAM", "Mobile RAM"] missing_columns = [col for col in required_columns if col not in df.columns] if missing_columns: st.error(f"Missing required columns: {missing_columns}") st.info("Please ensure your Excel file contains the required columns.") st.stop() # Clean the dataframe df = df.copy() df["Full Name"] = df["Full Name"].astype(str).str.strip() # Sidebar filters and info with st.sidebar: st.header("🔍 Filters & Info") # Performance tier filter performance_filter = st.multiselect( "Filter by Performance Tier:", ["Ultra Low", "Low", "Moderate-Low", "Moderate", "Good", "High", "Ultra High", "Unknown"], default=["Ultra Low", "Low", "Moderate-Low", "Moderate", "Good", "High", "Ultra High", "Unknown"] ) # Model category filter st.subheader("Model Categories") show_categories = st.multiselect( "Show specific categories:", ["general", "code", "chat", "reasoning", "multimodal"], default=["general", "code", "chat"] ) st.markdown("---") st.markdown("### 📊 Quick Stats") st.metric("Total Students", len(df)) st.metric("Popular Models", "150+") # Calculate average RAM avg_laptop_ram = df["Laptop RAM"].apply(extract_numeric_ram).mean() avg_mobile_ram = df["Mobile RAM"].apply(extract_numeric_ram).mean() if not pd.isna(avg_laptop_ram): st.metric("Avg Laptop RAM", f"{avg_laptop_ram:.1f} GB") if not pd.isna(avg_mobile_ram): st.metric("Avg Mobile RAM", f"{avg_mobile_ram:.1f} GB") # User selection with search - FIXED VERSION st.subheader("👤 Individual Student Analysis") # Prepare options safely user_options = prepare_user_options(df) selected_user = st.selectbox( "Choose a student:", options=user_options, index=0 # Default to first option ("Select a student...") ) if selected_user and selected_user != "Select a student...": # Find user data with safe lookup user_data_mask = df["Full Name"].astype(str).str.strip() == selected_user if user_data_mask.any(): user_data = df[user_data_mask].iloc[0] # Enhanced user display col1, col2 = st.columns(2) with col1: st.markdown("### đŸ’ģ Laptop Configuration") laptop_os_icon, laptop_os_name = get_os_info(user_data.get('Laptop Operating System')) laptop_ram = user_data.get('Laptop RAM', 'Not specified') laptop_rec, laptop_tier, laptop_info, laptop_models = recommend_llm(laptop_ram) laptop_ram_gb = extract_numeric_ram(laptop_ram) or 0 st.markdown(f"**OS:** {laptop_os_icon} {laptop_os_name}") st.markdown(f"**RAM:** {laptop_ram}") st.markdown(f"**Performance Tier:** {laptop_tier}") st.success(f"**💡 Recommendation:** {laptop_rec}") st.info(f"**â„šī¸ Notes:** {laptop_info}") # Display detailed models for laptop if laptop_models: filtered_models = {k: v for k, v in laptop_models.items() if k in show_categories} display_model_categories(filtered_models, laptop_ram_gb) with col2: st.markdown("### 📱 Mobile Configuration") mobile_os_icon, mobile_os_name = get_os_info(user_data.get('Mobile Operating System')) mobile_ram = user_data.get('Mobile RAM', 'Not specified') mobile_rec, mobile_tier, mobile_info, mobile_models = recommend_llm(mobile_ram) mobile_ram_gb = extract_numeric_ram(mobile_ram) or 0 st.markdown(f"**OS:** {mobile_os_icon} {mobile_os_name}") st.markdown(f"**RAM:** {mobile_ram}") st.markdown(f"**Performance Tier:** {mobile_tier}") st.success(f"**💡 Recommendation:** {mobile_rec}") st.info(f"**â„šī¸ Notes:** {mobile_info}") # Display detailed models for mobile if mobile_models: filtered_models = {k: v for k, v in mobile_models.items() if k in show_categories} display_model_categories(filtered_models, mobile_ram_gb) # Batch Analysis Section st.markdown("---") st.header("📊 Batch Analysis & Insights") # Create enhanced batch table df_display = df[["Full Name", "Laptop RAM", "Mobile RAM"]].copy() # Add recommendations and performance tiers laptop_recommendations = df["Laptop RAM"].apply(lambda x: recommend_llm(x)[0]) mobile_recommendations = df["Mobile RAM"].apply(lambda x: recommend_llm(x)[0]) laptop_tiers = df["Laptop RAM"].apply(lambda x: recommend_llm(x)[1]) mobile_tiers = df["Mobile RAM"].apply(lambda x: recommend_llm(x)[1]) df_display["Laptop LLM"] = laptop_recommendations df_display["Mobile LLM"] = mobile_recommendations df_display["Laptop Tier"] = laptop_tiers df_display["Mobile Tier"] = mobile_tiers # Filter based on sidebar selections mask = (laptop_tiers.isin(performance_filter) | mobile_tiers.isin(performance_filter)) df_filtered = df_display[mask] # Display filtered table st.subheader(f"📋 Student Recommendations ({len(df_filtered)} students)") st.dataframe( df_filtered, use_container_width=True, column_config={ "Full Name": st.column_config.TextColumn("Student Name", width="medium"), "Laptop RAM": st.column_config.TextColumn("Laptop RAM", width="small"), "Mobile RAM": st.column_config.TextColumn("Mobile RAM", width="small"), "Laptop LLM": st.column_config.TextColumn("Laptop Recommendation", width="large"), "Mobile LLM": st.column_config.TextColumn("Mobile Recommendation", width="large"), "Laptop Tier": st.column_config.TextColumn("L-Tier", width="small"), "Mobile Tier": st.column_config.TextColumn("M-Tier", width="small"), } ) # Performance distribution chart if len(df) > 1: st.subheader("📈 RAM Distribution Analysis") fig = create_performance_chart(df) st.plotly_chart(fig, use_container_width=True) # Performance tier summary st.subheader("đŸŽ¯ Performance Tier Summary") tier_col1, tier_col2 = st.columns(2) with tier_col1: st.markdown("**Laptop Performance Tiers:**") laptop_tier_counts = laptop_tiers.value_counts() for tier, count in laptop_tier_counts.items(): percentage = (count / len(laptop_tiers)) * 100 st.write(f"â€ĸ {tier}: {count} students ({percentage:.1f}%)") with tier_col2: st.markdown("**Mobile Performance Tiers:**") mobile_tier_counts = mobile_tiers.value_counts() for tier, count in mobile_tier_counts.items(): percentage = (count / len(mobile_tier_counts)) * 100 st.write(f"â€ĸ {tier}: {count} students ({percentage:.1f}%)") # Model Explorer Section st.markdown("---") st.header("🔍 Popular Model Explorer") explorer_col1, explorer_col2 = st.columns(2) with explorer_col1: selected_ram_range = st.selectbox( "Select RAM range to explore models:", ["≤2GB (Ultra Low)", "3-4GB (Low)", "5-6GB (Moderate-Low)", "7-8GB (Moderate)", "9-16GB (Good)", "17-32GB (High)", ">32GB (Ultra High)"] ) with explorer_col2: selected_category = st.selectbox( "Select model category:", ["general", "code", "chat", "reasoning", "multimodal"] ) # Map selection to database key ram_mapping = { "≤2GB (Ultra Low)": "ultra_low", "3-4GB (Low)": "low", "5-6GB (Moderate-Low)": "moderate_low", "7-8GB (Moderate)": "moderate", "9-16GB (Good)": "good", "17-32GB (High)": "high", ">32GB (Ultra High)": "ultra_high" } selected_ram_key = ram_mapping[selected_ram_range] if selected_ram_key in LLM_DATABASE and selected_category in LLM_DATABASE[selected_ram_key]: models = LLM_DATABASE[selected_ram_key][selected_category] st.subheader(f"đŸŽ¯ {selected_category.title()} Models for {selected_ram_range}") # Display models in a detailed table for model in models: with st.container(): col1, col2, col3 = st.columns([3, 1, 3]) with col1: st.markdown(f"### {model['name']}") with col2: st.markdown(f"**{model['size']}**") st.caption("Download Size") with col3: st.markdown(f"*{model['description']}*") # Add download suggestion if "Llama" in model['name']: st.caption("🔗 Available on Hugging Face & Ollama") elif "Mistral" in model['name']: st.caption("🔗 Available on Hugging Face & Mistral AI") elif "Gemma" in model['name']: st.caption("🔗 Available on Hugging Face & Google") else: st.caption("🔗 Available on Hugging Face") st.markdown("---") else: st.info(f"No {selected_category} models available for {selected_ram_range}") # Enhanced reference guide with st.expander("📘 Model Guide & Download Information"): st.markdown(""" ## 🚀 Popular Models by Category ### đŸŽ¯ **General Purpose Champions** - **Llama-2 Series**: Meta's flagship models (7B, 13B, 70B) - **Mistral Series**: Excellent efficiency and performance - **Gemma**: Google's efficient models (2B, 7B) - **Phi**: Microsoft's compact powerhouses ### đŸ’ģ **Code Specialists** - **CodeLlama**: Meta's dedicated coding models - **StarCoder**: BigCode's programming experts - **WizardCoder**: Enhanced coding capabilities - **DeepSeek-Coder**: Chinese tech giant's coder ### đŸ’Ŧ **Chat Optimized** - **Vicuna**: UC Berkeley's ChatGPT alternative - **Zephyr**: HuggingFace's chat specialist - **OpenChat**: High-quality conversation models - **Neural-Chat**: Intel-optimized chat models ### 🧮 **Reasoning Masters** - **WizardMath**: Mathematical problem solving - **MetaMath**: Advanced arithmetic reasoning - **Orca-2**: Microsoft's reasoning specialist - **Goat**: Specialized arithmetic model ### đŸ‘ī¸ **Multimodal Models** - **LLaVA**: Large Language and Vision Assistant - **MiniGPT-4**: Multimodal conversational AI ## 💾 Download Size Reference | Model Size | FP16 | 8-bit | 4-bit | Use Case | |------------|------|-------|-------|----------| | **1-3B** | 2-6GB | 1-3GB | 0.5-1.5GB | Mobile, Edge | | **7B** | 13GB | 7GB | 3.5GB | Desktop, Laptop | | **13B** | 26GB | 13GB | 7GB | Workstation | | **30-34B** | 60GB | 30GB | 15GB | Server, Cloud | | **70B** | 140GB | 70GB | 35GB | High-end Server | ## đŸ› ī¸ Where to Download ### **Primary Sources** - **🤗 Hugging Face**: Largest repository with 400,000+ models - **đŸĻ™ Ollama**: Simple CLI tool for local deployment - **đŸ“Ļ LM Studio**: User-friendly GUI for model management ### **Quantized Formats** - **GGUF**: Best for CPU inference (llama.cpp) - **GPTQ**: GPU-optimized quantization - **AWQ**: Advanced weight quantization ### **Download Tips** - Use `git lfs` for large models from Hugging Face - Consider bandwidth and storage before downloading - Start with 4-bit quantized versions for testing - Use `ollama pull model_name` for easiest setup ## 🔧 Optimization Strategies ### **Memory Reduction** - **4-bit quantization**: 75% memory reduction - **8-bit quantization**: 50% memory reduction - **CPU offloading**: Use system RAM for overflow ### **Speed Optimization** - **GPU acceleration**: CUDA, ROCm, Metal - **Batch processing**: Process multiple requests - **Context caching**: Reuse computations """) # Footer with updated resources st.markdown("---") st.markdown(""" ### 🔗 Essential Download & Deployment Tools **đŸ“Ļ Easy Model Deployment:** - [**Ollama**](https://ollama.ai/) – `curl -fsSL https://ollama.ai/install.sh | sh` - [**LM Studio**](https://lmstudio.ai/) – Drag-and-drop GUI for running models locally - [**GPT4All**](https://gpt4all.io/) – Cross-platform desktop app for local LLMs **🤗 Model Repositories:** - [**Hugging Face Hub**](https://huggingface.co/models) – Filter by model size, task, and license - [**TheBloke's Quantizations**](https://huggingface.co/TheBloke) – Pre-quantized models in GGUF/GPTQ format - [**Awesome LLM**](https://github.com/Hannibal046/Awesome-LLMs) – Curated list of models and resources --- """)