Spaces:

Dhruv-18
/

neural-data-anlayst

Sleeping

Dhruv Pawar

Refactored and condensed main.py without removing functionality

d35ac23 4 months ago

72 kB

	import streamlit as st
	import pandas as pd
	import numpy as np
	import json
	import time
	from datetime import datetime
	import requests
	from typing import Dict, List, Any, Optional, Tuple
	import plotly.express as px
	import plotly.graph_objects as go
	from plotly.subplots import make_subplots
	import os
	from io import StringIO
	import uuid
	from dotenv import load_dotenv
	import sqlite3
	import tempfile

	# Load environment variables
	load_dotenv()

	# ===========================
	# Configuration and Constants
	# ===========================

	GROQ_API_URL = "https://api.groq.com/openai/v1/chat/completions"
	DEFAULT_MODEL = "llama-3.3-70b-versatile"

	AVAILABLE_MODELS = {
	"llama-3.3-70b-versatile": "Llama 3.3 70B (Most Advanced)",
	"llama3-70b-8192": "Llama 3 70B (Reliable)",
	"mixtral-8x7b-32768": "Mixtral 8x7B (Fast & Efficient)",
	"gemma2-9b-it": "Gemma 2 9B (Lightweight)",
	"qwen-qwq-32b": "Qwen QwQ 32B (Reasoning)",
	"deepseek-r1-distill-llama-70b": "DeepSeek R1 70B (Specialized)"
	}

	# Page configuration
	st.set_page_config(
	page_title="⚡ Neural Data Analyst Premium",
	page_icon="⚡",
	layout="wide",
	initial_sidebar_state="expanded"
	)

	# ===========================
	# Custom CSS
	# ===========================

	def inject_custom_css():
	"""Inject custom CSS for styling"""
	st.markdown("""
	<style>
	.main > div {
	padding-top: 2rem;
	}

	.stApp {
	background: linear-gradient(135deg, #000000 0%, #1a1a1a 100%);
	}

	.main-header {
	background: linear-gradient(45deg, #ffd700, #ffff00);
	-webkit-background-clip: text;
	-webkit-text-fill-color: transparent;
	background-clip: text;
	text-align: center;
	font-size: 3rem;
	font-weight: 900;
	margin-bottom: 1rem;
	text-shadow: 0 0 30px rgba(255, 215, 0, 0.5);
	}

	.subtitle {
	text-align: center;
	color: #cccccc;
	font-size: 1.2rem;
	margin-bottom: 2rem;
	}

	.metric-card {
	background: rgba(15, 15, 15, 0.95);
	border: 1px solid rgba(255, 215, 0, 0.3);
	border-radius: 15px;
	padding: 20px;
	margin: 10px 0;
	backdrop-filter: blur(20px);
	}

	.success-msg {
	background: rgba(0, 255, 0, 0.1);
	color: #00ff00;
	padding: 10px;
	border-radius: 8px;
	border: 1px solid #00ff00;
	}

	.error-msg {
	background: rgba(255, 68, 68, 0.1);
	color: #ff4444;
	padding: 10px;
	border-radius: 8px;
	border: 1px solid #ff4444;
	}

	.neural-button {
	background: linear-gradient(45deg, #ffd700, #ffff00);
	color: #000000;
	border: none;
	border-radius: 10px;
	padding: 12px 24px;
	font-weight: bold;
	text-transform: uppercase;
	letter-spacing: 1px;
	cursor: pointer;
	transition: all 0.3s ease;
	}

	.neural-button:hover {
	transform: translateY(-2px);
	box-shadow: 0 10px 30px rgba(255, 215, 0, 0.4);
	}
	</style>
	""", unsafe_allow_html=True)

	# ===========================
	# Session State Management
	# ===========================

	def initialize_session_state():
	"""Initialize all session state variables"""
	# API configuration
	api_key = get_api_key()

	defaults = {
	'api_key': api_key or "",
	'api_connected': bool(api_key),
	'selected_model': DEFAULT_MODEL,
	'uploaded_data': None,
	'data_schema': "",
	'analysis_history': [],
	'session_id': str(uuid.uuid4()),
	'example_query': "",
	'recent_queries': [],
	'show_eda_results': False,
	'show_ai_insights': False,
	'show_advanced_analytics': False,
	'eda_results': None,
	'ai_insights_text': None,
	'show_model_selection': False,
	'current_query': ""
	}

	for key, value in defaults.items():
	if key not in st.session_state:
	st.session_state[key] = value

	# Test API connection if key exists but not connected
	if api_key and not st.session_state.api_connected:
	test_api_connection_silent(api_key, st.session_state.selected_model)

	def get_api_key() -> Optional[str]:
	"""Get API key from various sources"""
	# Try Streamlit secrets first (with proper error handling)
	try:
	if hasattr(st, 'secrets'):
	return st.secrets.get('GROQ_API_KEY')
	except Exception:
	# No secrets file exists, which is fine
	pass

	# Try environment variable
	if 'GROQ_API_KEY' in os.environ:
	return os.environ['GROQ_API_KEY']

	# Try loading from .env file
	load_dotenv(override=True)
	return os.environ.get('GROQ_API_KEY')

	# ===========================
	# API Functions
	# ===========================

	def test_api_connection_silent(api_key: str, model: str) -> bool:
	"""Test API connection silently"""
	try:
	response = requests.post(
	GROQ_API_URL,
	headers={
	"Authorization": f"Bearer {api_key}",
	"Content-Type": "application/json"
	},
	json={
	"model": model,
	"messages": [{"role": "user", "content": "Say 'OK' in one word."}],
	"temperature": 0.1,
	"max_tokens": 10
	},
	timeout=10
	)
	success = response.status_code == 200
	if success:
	st.session_state.api_connected = True
	return success
	except Exception:
	return False

	def make_api_call(model: str, prompt: str, timeout: int = 30) -> str:
	"""Make API call to Groq"""
	try:
	response = requests.post(
	GROQ_API_URL,
	headers={
	"Authorization": f"Bearer {st.session_state.api_key}",
	"Content-Type": "application/json"
	},
	json={
	"model": model,
	"messages": [{"role": "user", "content": prompt}],
	"temperature": 0.1,
	"max_tokens": 1000
	},
	timeout=timeout
	)

	if response.status_code == 200:
	return response.json()['choices'][0]['message']['content'].strip()
	else:
	raise Exception(f"API error: {response.status_code}")

	except Exception as e:
	raise Exception(f"API call failed: {str(e)}")

	# ===========================
	# Data Processing Functions
	# ===========================

	@st.cache_data
	def load_csv_file(uploaded_file) -> pd.DataFrame:
	"""Load CSV file with caching"""
	try:
	return pd.read_csv(uploaded_file)
	except Exception as e:
	raise Exception(f"Error reading CSV: {str(e)}")

	@st.cache_data
	def load_json_file(uploaded_file) -> pd.DataFrame:
	"""Load JSON file with caching"""
	try:
	return pd.read_json(uploaded_file)
	except Exception as e:
	raise Exception(f"Error reading JSON: {str(e)}")

	def create_sample_data() -> pd.DataFrame:
	"""Create sample sales data for demonstration"""
	np.random.seed(42)
	n_rows = 1000

	data = {
	'customer_id': range(1, n_rows + 1),
	'customer_name': [f"Customer_{i}" for i in range(1, n_rows + 1)],
	'product': np.random.choice(['Widget A', 'Widget B', 'Widget C', 'Gadget X', 'Gadget Y'], n_rows),
	'sales_amount': np.random.normal(2000, 500, n_rows).round(2),
	'order_date': pd.date_range('2023-01-01', periods=n_rows, freq='D'),
	'region': np.random.choice(['North', 'South', 'East', 'West'], n_rows),
	'sales_rep': np.random.choice(['John Smith', 'Jane Doe', 'Bob Johnson', 'Alice Brown'], n_rows),
	'customer_age': np.random.randint(25, 70, n_rows),
	'customer_segment': np.random.choice(['Premium', 'Standard', 'Basic'], n_rows),
	'discount_percent': np.random.uniform(0, 20, n_rows).round(1)
	}

	return pd.DataFrame(data)

	def generate_database_schema(df: pd.DataFrame) -> Dict[str, str]:
	"""Generate database schema from DataFrame"""
	table_name = "uploaded_data"
	column_definitions = []

	for col in df.columns:
	# Clean column name
	clean_col = col.replace(' ', '_').replace('-', '_').replace('.', '_')
	clean_col = ''.join(c for c in clean_col if c.isalnum() or c == '_')

	# Determine SQL data type
	dtype = df[col].dtype
	if pd.api.types.is_integer_dtype(dtype):
	sql_type = "INTEGER"
	elif pd.api.types.is_float_dtype(dtype):
	sql_type = "DECIMAL(10,2)"
	elif pd.api.types.is_datetime64_any_dtype(dtype):
	sql_type = "DATETIME"
	elif pd.api.types.is_bool_dtype(dtype):
	sql_type = "BOOLEAN"
	else:
	max_length = df[col].astype(str).str.len().max() if not df[col].empty else 50
	sql_type = f"VARCHAR({max(50, int(max_length))})" if max_length <= 50 else "TEXT"

	column_definitions.append(f" {clean_col} {sql_type}")

	sql_schema = f"CREATE TABLE {table_name} (\n" + ",\n".join(column_definitions) + "\n);"
	simple_schema = f"{table_name}(" + ", ".join([
	col.replace(' ', '_').replace('-', '_').replace('.', '_')
	for col in df.columns
	]) + ")"

	return {
	"sql_schema": sql_schema,
	"simple_schema": simple_schema
	}

	# ===========================
	# UI Components
	# ===========================

	def render_header():
	"""Render the main header"""
	st.markdown('<h1 class="main-header">⚡ NEURAL DATA ANALYST</h1>', unsafe_allow_html=True)
	st.markdown('<p class="subtitle">Premium AI-Powered Business Intelligence Suite</p>', unsafe_allow_html=True)

	def render_sidebar():
	"""Render sidebar with API configuration and controls"""
	with st.sidebar:
	st.markdown("## 🔐 Neural Configuration")

	# Debug info (collapsed by default)
	with st.expander("🔧 Debug Info", expanded=False):
	render_debug_info()

	# API configuration
	if st.session_state.api_key:
	st.success("✅ API Key loaded from environment")

	# Model selection
	model = st.selectbox(
	"AI Model",
	list(AVAILABLE_MODELS.keys()),
	format_func=lambda x: AVAILABLE_MODELS[x],
	index=0,
	key="model_selector"
	)
	st.session_state.selected_model = model

	# Connection status
	if st.session_state.api_connected:
	st.markdown('<div class="success-msg">⚡ Neural Link: Active</div>', unsafe_allow_html=True)
	else:
	st.markdown('<div class="error-msg">⚡ Neural Link: Connecting...</div>', unsafe_allow_html=True)
	else:
	render_api_setup_instructions()

	# History section
	st.markdown("---")
	st.markdown("## 📊 Analysis History")

	if st.button("🗂️ View History", key="view_history"):
	show_history()

	if st.button("🗑️ Clear History", key="clear_history"):
	st.session_state.analysis_history = []
	st.success("History cleared!")

	def render_debug_info():
	"""Render debug information panel"""
	st.write(f"API Key in session: {'Yes' if st.session_state.api_key else 'No'}")
	if st.session_state.api_key:
	st.write(f"API Key (masked): {st.session_state.api_key[:10]}...{st.session_state.api_key[-5:]}")
	st.write(f"API Connected: {st.session_state.api_connected}")
	st.write(f"Environment GROQ_API_KEY: {'Set' if os.environ.get('GROQ_API_KEY') else 'Not set'}")

	if st.button("🔄 Reload API Key", key="reload_api"):
	reload_api_key()

	if st.button("🧪 Test API Connection", key="test_api"):
	test_api_connection()

	def render_api_setup_instructions():
	"""Render API setup instructions"""
	st.error("❌ No API key configured")
	st.markdown("""
	Setup Required:

	For local development:
	Create `.env` file:
	```
	GROQ_API_KEY=your_api_key_here
	```

	For Streamlit Cloud:
	Add to app secrets:
	```toml
	GROQ_API_KEY = "your_api_key_here"
	```

	Get API key: [Groq Console](https://console.groq.com/keys)
	""")

	def reload_api_key():
	"""Reload API key from environment"""
	api_key = get_api_key()
	if api_key:
	st.session_state.api_key = api_key
	if test_api_connection_silent(api_key, st.session_state.selected_model):
	st.session_state.api_connected = True
	st.success("✅ API key reloaded and tested successfully!")
	else:
	st.error("❌ API key loaded but connection test failed")
	st.rerun()
	else:
	st.error("No API key found in .env file")

	def test_api_connection():
	"""Test API connection with user feedback"""
	if st.session_state.api_key:
	with st.spinner("Testing API connection..."):
	success = test_api_connection_silent(st.session_state.api_key, st.session_state.selected_model)
	if success:
	st.session_state.api_connected = True
	st.success("✅ API connection successful!")
	else:
	st.error("❌ API connection failed")
	else:
	st.error("No API key to test")

	# ===========================
	# Data Upload and Display
	# ===========================

	def render_data_upload():
	"""Render data upload section"""
	st.markdown("## 📊 Data Upload & Analysis")

	uploaded_file = st.file_uploader(
	"Choose a CSV or JSON file",
	type=['csv', 'json'],
	help="Upload your data file for comprehensive analysis"
	)

	if uploaded_file is not None:
	process_uploaded_file(uploaded_file)
	else:
	render_sample_data_option()

	def process_uploaded_file(uploaded_file):
	"""Process uploaded file and display results"""
	try:
	# Load data based on file type
	if uploaded_file.name.endswith('.csv'):
	df = load_csv_file(uploaded_file)
	elif uploaded_file.name.endswith('.json'):
	df = load_json_file(uploaded_file)
	else:
	st.error("Unsupported file type")
	return

	# Store in session state
	st.session_state.uploaded_data = df

	# Generate schema
	schema_info = generate_database_schema(df)
	st.session_state.data_schema = schema_info["simple_schema"]

	# Display success message and metrics
	st.success(f"✅ {uploaded_file.name} loaded successfully!")
	display_data_metrics(df, uploaded_file.size)

	# Display schema
	display_database_schema(schema_info, df)

	# Create visualizations
	create_data_visualizations(df)

	# Display action buttons
	display_analysis_actions(df)

	# Data preview
	with st.expander("👀 Data Preview", expanded=False):
	st.dataframe(df.head(100), use_container_width=True)

	except Exception as e:
	st.error(f"Error loading file: {str(e)}")

	def render_sample_data_option():
	"""Render option to load sample data"""
	st.info("👆 Upload a CSV or JSON file to get started")

	if st.button("📋 Load Sample Data", help="Load sample sales data for testing"):
	sample_data = create_sample_data()
	st.session_state.uploaded_data = sample_data
	schema_info = generate_database_schema(sample_data)
	st.session_state.data_schema = schema_info["simple_schema"]
	st.success("✅ Sample data loaded!")
	st.rerun()

	def display_data_metrics(df: pd.DataFrame, file_size: int):
	"""Display key metrics about the loaded data"""
	col1, col2, col3, col4 = st.columns(4)
	with col1:
	st.metric("📊 Rows", f"{len(df):,}")
	with col2:
	st.metric("📋 Columns", len(df.columns))
	with col3:
	st.metric("💾 Size", f"{file_size / 1024:.1f} KB")
	with col4:
	st.metric("❓ Missing", f"{df.isnull().sum().sum():,}")

	def display_database_schema(schema_info: Dict[str, str], df: pd.DataFrame):
	"""Display database schema information"""
	with st.expander("🗄️ Database Schema", expanded=True):
	st.markdown("Generated Schema for AI Queries:")
	st.code(st.session_state.data_schema, language="sql")

	st.markdown("Full SQL Schema:")
	st.code(schema_info["sql_schema"], language="sql")

	# Column details
	col1, col2 = st.columns(2)
	with col1:
	st.markdown("📊 Numeric Columns:")
	numeric_cols = df.select_dtypes(include=[np.number]).columns.tolist()
	if numeric_cols:
	for col in numeric_cols:
	st.write(f"• {col} ({df[col].dtype})")
	else:
	st.write("None found")

	with col2:
	st.markdown("📝 Text Columns:")
	text_cols = df.select_dtypes(include=['object']).columns.tolist()
	if text_cols:
	for col in text_cols:
	st.write(f"• {col} (text)")
	else:
	st.write("None found")

	def display_analysis_actions(df: pd.DataFrame):
	"""Display analysis action buttons"""
	st.markdown("### 🚀 Analysis Actions")
	col1, col2, col3 = st.columns(3)

	with col1:
	if st.button("🔬 Complete EDA", key="eda_button", help="Comprehensive Exploratory Data Analysis"):
	with st.spinner("Performing comprehensive EDA analysis..."):
	perform_eda(df)
	st.session_state.show_eda_results = True
	st.session_state.show_ai_insights = False
	st.session_state.show_advanced_analytics = False

	with col2:
	if st.button("🤖 AI Insights", key="ai_insights", help="Generate AI-powered insights"):
	if check_api_availability():
	with st.spinner("🤖 Generating AI insights..."):
	generate_ai_insights(df)
	st.session_state.show_ai_insights = True
	st.session_state.show_eda_results = False
	st.session_state.show_advanced_analytics = False

	with col3:
	if st.button("📊 Advanced Analytics", key="advanced_analytics", help="Advanced statistical analysis"):
	st.session_state.show_advanced_analytics = True
	st.session_state.show_eda_results = False
	st.session_state.show_ai_insights = False

	# Display results based on selection
	display_analysis_results(df)

	def display_analysis_results(df: pd.DataFrame):
	"""Display analysis results based on user selection"""
	if st.session_state.show_eda_results and st.session_state.eda_results:
	display_eda_results(st.session_state.eda_results)
	elif st.session_state.show_ai_insights and st.session_state.ai_insights_text:
	display_ai_insights(st.session_state.ai_insights_text)
	elif st.session_state.show_advanced_analytics:
	display_advanced_analytics(df)

	# ===========================
	# Visualization Functions
	# ===========================

	def create_data_visualizations(df: pd.DataFrame):
	"""Create multiple visualizations for the uploaded data"""
	st.markdown("### 📊 Data Visualizations")

	numeric_cols = df.select_dtypes(include=[np.number]).columns.tolist()
	categorical_cols = df.select_dtypes(include=['object', 'category']).columns.tolist()

	tabs = st.tabs(["📈 Overview", "📊 Distributions", "🔗 Relationships", "📋 Summary"])

	with tabs[0]:
	create_overview_visualizations(df, numeric_cols, categorical_cols)

	with tabs[1]:
	create_distribution_visualizations(df, numeric_cols)

	with tabs[2]:
	create_relationship_visualizations(df, numeric_cols, categorical_cols)

	with tabs[3]:
	create_summary_statistics(df, numeric_cols, categorical_cols)

	def create_overview_visualizations(df: pd.DataFrame, numeric_cols: List[str], categorical_cols: List[str]):
	"""Create overview visualizations"""
	col1, col2 = st.columns(2)

	with col1:
	if len(numeric_cols) >= 2:
	# Correlation heatmap
	corr_matrix = df[numeric_cols].corr()
	fig = px.imshow(corr_matrix,
	text_auto=True,
	aspect="auto",
	title="Correlation Heatmap",
	color_continuous_scale="RdBu_r")
	fig.update_layout(height=400)
	st.plotly_chart(fig, use_container_width=True)

	with col2:
	if len(categorical_cols) > 0:
	# Categorical distribution
	col = categorical_cols[0]
	value_counts = df[col].value_counts().head(10)
	fig = px.pie(values=value_counts.values,
	names=value_counts.index,
	title=f"Distribution of {col}")
	fig.update_layout(height=400)
	st.plotly_chart(fig, use_container_width=True)

	def create_distribution_visualizations(df: pd.DataFrame, numeric_cols: List[str]):
	"""Create distribution visualizations"""
	if len(numeric_cols) > 0:
	col1, col2 = st.columns(2)

	with col1:
	selected_col = st.selectbox("Select numeric column for histogram", numeric_cols)
	fig = px.histogram(df, x=selected_col, title=f"Distribution of {selected_col}")
	st.plotly_chart(fig, use_container_width=True)

	with col2:
	fig = px.box(df, y=selected_col, title=f"Box Plot of {selected_col}")
	st.plotly_chart(fig, use_container_width=True)

	def create_relationship_visualizations(df: pd.DataFrame, numeric_cols: List[str], categorical_cols: List[str]):
	"""Create relationship visualizations"""
	if len(numeric_cols) >= 2:
	col1, col2 = st.columns(2)

	with col1:
	x_col = st.selectbox("X-axis", numeric_cols, key="x_scatter")
	with col2:
	y_col = st.selectbox("Y-axis", numeric_cols, key="y_scatter",
	index=1 if len(numeric_cols) > 1 else 0)

	color_col = None
	if categorical_cols:
	color_col = st.selectbox("Color by (optional)", ["None"] + categorical_cols)
	color_col = color_col if color_col != "None" else None

	fig = px.scatter(df, x=x_col, y=y_col, color=color_col,
	title=f"{x_col} vs {y_col}")
	st.plotly_chart(fig, use_container_width=True)

	def create_summary_statistics(df: pd.DataFrame, numeric_cols: List[str], categorical_cols: List[str]):
	"""Create summary statistics"""
	st.markdown("#### 📋 Data Summary")

	summary_data = {
	"Metric": ["Total Rows", "Total Columns", "Numeric Columns", "Categorical Columns", "Missing Values", "Memory Usage"],
	"Value": [
	f"{len(df):,}",
	f"{len(df.columns)}",
	f"{len(numeric_cols)}",
	f"{len(categorical_cols)}",
	f"{df.isnull().sum().sum():,}",
	f"{df.memory_usage(deep=True).sum() / 1024**2:.2f} MB"
	]
	}
	summary_df = pd.DataFrame(summary_data)
	st.dataframe(summary_df, use_container_width=True, hide_index=True)

	# ===========================
	# Query Interface
	# ===========================

	def render_query_interface():
	"""Render natural language query interface"""
	st.markdown("## 🚀 AI Query Interface")

	# Show current schema
	if st.session_state.data_schema:
	with st.expander("🗄️ Current Data Schema", expanded=False):
	st.code(st.session_state.data_schema, language="sql")

	# Query input
	query_input = st.text_area(
	"Natural Language Query",
	value=st.session_state.example_query,
	placeholder="Example: Show me the top 10 customers by total sales amount",
	height=100,
	help="Describe what you want to analyze in plain English"
	)

	# Clear example query after use
	if st.session_state.example_query and query_input == st.session_state.example_query:
	st.session_state.example_query = ""

	# Analysis buttons
	col1, col2 = st.columns(2)

	with col1:
	api_available = check_api_availability()
	if st.button("🧠 Analyze Query",
	disabled=not api_available or not query_input.strip(),
	help="Generate SQL and insights for your query"):
	if api_available and query_input.strip():
	analyze_single_query(query_input.strip())

	with col2:
	if st.button("⚔️ Model Battle",
	disabled=not api_available or not query_input.strip(),
	help="Compare multiple AI models on your query"):
	if api_available and query_input.strip():
	st.session_state.current_query = query_input.strip()
	st.session_state.show_model_selection = True
	st.rerun()

	# Status messages
	display_api_status()

	# Recent queries
	display_recent_queries()

	# Model selection interface
	if st.session_state.show_model_selection:
	render_model_selection_interface()

	def check_api_availability() -> bool:
	"""Check if API is available"""
	return bool(st.session_state.api_key and len(st.session_state.api_key) > 10)

	def display_api_status():
	"""Display API connection status"""
	if not check_api_availability():
	st.warning("⚠️ AI Features Disabled: API key not detected. Use the '🔄 Reload API Key' button in the sidebar.")
	else:
	st.success("✅ AI Features Active: Ready for natural language queries and model battles!")

	def display_recent_queries():
	"""Display recent queries"""
	if st.session_state.recent_queries:
	with st.expander("📝 Recent Queries", expanded=False):
	for i, recent_query in enumerate(st.session_state.recent_queries[-5:]):
	if st.button(f"🔄 {recent_query[:60]}...", key=f"recent_{i}"):
	st.session_state.example_query = recent_query
	st.rerun()

	# ===========================
	# Analysis Functions
	# ===========================

	def analyze_single_query(query: str):
	"""Analyze query with single model"""
	# Add to recent queries
	if query not in st.session_state.recent_queries:
	st.session_state.recent_queries.append(query)
	st.session_state.recent_queries = st.session_state.recent_queries[-10:]

	with st.spinner(f"🧠 Analyzing with {st.session_state.selected_model}..."):
	try:
	# Generate SQL and insights
	sql_result = generate_sql(query)
	insights_result = generate_insights(query)

	# Save to history
	save_analysis_to_history({
	"type": "Single Query Analysis",
	"query": query,
	"schema": st.session_state.data_schema,
	"sql_result": sql_result,
	"insights": insights_result,
	"model": st.session_state.selected_model
	})

	# Display results
	display_query_results(sql_result, insights_result)

	except Exception as e:
	st.error(f"Analysis failed: {str(e)}")

	def generate_sql(query: str) -> str:
	"""Generate SQL from natural language query"""
	prompt = f"""Convert this natural language query to SQL:

	Database Schema: {st.session_state.data_schema}

	Natural Language Query: {query}

	Instructions:
	- Use the exact column names from the schema
	- Generate clean, optimized SQL
	- Include appropriate WHERE, GROUP BY, ORDER BY clauses
	- Use proper SQL syntax
	- Return only the SQL query without explanations

	SQL Query:"""

	return make_api_call(st.session_state.selected_model, prompt)

	def generate_insights(query: str) -> str:
	"""Generate business insights from query"""
	prompt = f"""Provide detailed business insights for this data analysis query:

	Database Schema: {st.session_state.data_schema}

	Query: {query}

	Generate 4-5 key business insights in this format:
	Insight Title 1: Detailed explanation of what this analysis reveals about the business
	Insight Title 2: Another important finding or recommendation
	(continue for 4-5 insights)

	Focus on:
	- Business implications
	- Actionable recommendations
	- Data patterns and trends
	- Strategic insights
	- Potential opportunities or risks

	Business Insights:"""

	return make_api_call(st.session_state.selected_model, prompt)

	def display_query_results(sql_result: str, insights_result: str):
	"""Display query analysis results"""
	st.markdown("## 🎯 Analysis Results")

	tabs = st.tabs(["🔍 SQL Query", "💡 AI Insights", "🔄 Execute Query"])

	with tabs[0]:
	st.markdown("### 🔍 Generated SQL Query")
	st.code(sql_result, language='sql')

	if st.button("📋 Copy SQL", key="copy_sql"):
	st.success("SQL copied to clipboard! (Use Ctrl+C to copy from the code block above)")

	with tabs[1]:
	st.markdown("### 💡 AI-Powered Business Insights")
	insights = parse_insights(insights_result)

	for i, insight in enumerate(insights):
	st.markdown(f"""
	<div class="metric-card">
	<h4 style="color: #ffd700;">💡 {insight['title']}</h4>
	<p>{insight['text']}</p>
	</div>
	""", unsafe_allow_html=True)

	with tabs[2]:
	st.markdown("### 🔄 Execute Query on Your Data")

	if st.session_state.uploaded_data is not None:
	if st.button("▶️ Run SQL on Uploaded Data", key="execute_sql"):
	execute_sql_on_data(sql_result, st.session_state.uploaded_data)
	else:
	st.info("Upload data first to execute SQL queries")

	def execute_sql_on_data(sql_query: str, df: pd.DataFrame):
	"""Execute SQL query on the uploaded DataFrame"""
	try:
	# Create temporary SQLite database
	with tempfile.NamedTemporaryFile(delete=False, suffix='.db') as tmp_file:
	conn = sqlite3.connect(tmp_file.name)

	# Write DataFrame to SQLite
	df.to_sql('uploaded_data', conn, if_exists='replace', index=False)

	# Clean and execute SQL
	clean_sql = sql_query.strip()
	if clean_sql.lower().startswith('sql:'):
	clean_sql = clean_sql[4:].strip()

	# Execute query
	result_df = pd.read_sql_query(clean_sql, conn)
	conn.close()

	# Display results
	st.success("✅ Query executed successfully!")

	col1, col2 = st.columns(2)
	with col1:
	st.metric("Rows Returned", len(result_df))
	with col2:
	st.metric("Columns", len(result_df.columns))

	st.markdown("#### 📊 Query Results")
	st.dataframe(result_df, use_container_width=True)

	# Auto visualization
	if len(result_df) > 0:
	auto_visualize_results(result_df)

	except Exception as e:
	st.error(f"Error executing SQL: {str(e)}")
	st.info("💡 Tip: The AI-generated SQL might need adjustment for your specific data structure")

	def auto_visualize_results(result_df: pd.DataFrame):
	"""Automatically create visualizations for query results"""
	st.markdown("#### 📈 Auto-Generated Visualization")

	numeric_cols = result_df.select_dtypes(include=[np.number]).columns.tolist()

	if len(numeric_cols) == 1 and len(result_df) <= 50:
	text_cols = result_df.select_dtypes(include=['object']).columns.tolist()
	if text_cols:
	fig = px.bar(result_df,
	x=text_cols[0],
	y=numeric_cols[0],
	title=f"{numeric_cols[0]} by {text_cols[0]}")
	st.plotly_chart(fig, use_container_width=True)
	elif len(numeric_cols) >= 1 and len(result_df) > 10:
	fig = px.line(result_df,
	y=numeric_cols[0],
	title=f"Trend: {numeric_cols[0]}")
	st.plotly_chart(fig, use_container_width=True)

	# ===========================
	# Model Comparison Functions
	# ===========================

	def render_model_selection_interface():
	"""Render model selection interface for battle"""
	st.markdown("---")
	st.markdown("## ⚔️ Model Battle Setup")
	st.markdown(f"Query: {st.session_state.current_query}")

	st.markdown("### 🎯 Select Models for Battle")

	col1, col2 = st.columns(2)

	selected_models = []

	with col1:
	st.markdown("🚀 High-Performance Models:")
	if st.checkbox("Llama 3.3 70B (Most Advanced)", key="battle_llama33", value=True):
	selected_models.append("llama-3.3-70b-versatile")
	if st.checkbox("Llama 3 70B (Reliable)", key="battle_llama3", value=True):
	selected_models.append("llama3-70b-8192")
	if st.checkbox("DeepSeek R1 70B (Specialized)", key="battle_deepseek", value=False):
	selected_models.append("deepseek-r1-distill-llama-70b")

	with col2:
	st.markdown("⚡ Fast & Efficient Models:")
	if st.checkbox("Mixtral 8x7B (Fast & Efficient)", key="battle_mixtral", value=True):
	selected_models.append("mixtral-8x7b-32768")
	if st.checkbox("Gemma 2 9B (Lightweight)", key="battle_gemma", value=False):
	selected_models.append("gemma2-9b-it")
	if st.checkbox("Qwen QwQ 32B (Reasoning)", key="battle_qwen", value=False):
	selected_models.append("qwen-qwq-32b")

	if selected_models:
	st.success(f"✅ Selected Models: {len(selected_models)} models ready for battle")

	col1, col2, col3 = st.columns(3)
	with col1:
	test_rounds = st.selectbox("Test Rounds", [1, 2, 3], index=0)
	with col2:
	timeout_seconds = st.selectbox("Timeout (seconds)", [10, 20, 30], index=1)
	with col3:
	if st.button("❌ Cancel", key="cancel_battle"):
	st.session_state.show_model_selection = False
	st.rerun()

	if st.button("🚀 Start Model Battle", key="start_battle", type="primary"):
	st.session_state.show_model_selection = False
	run_model_comparison(selected_models, test_rounds, timeout_seconds)
	else:
	st.warning("⚠️ Please select at least one model for the battle")
	if st.button("❌ Cancel", key="cancel_no_models"):
	st.session_state.show_model_selection = False
	st.rerun()

	def run_model_comparison(selected_models: List[str], rounds: int, timeout: int):
	"""Run model comparison with selected models"""
	st.markdown("## ⚔️ Model Battle Arena")
	st.markdown(f"Testing {len(selected_models)} models with {rounds} round(s) each...")

	total_tests = len(selected_models) * rounds
	progress_bar = st.progress(0)
	status_text = st.empty()

	results = []
	test_count = 0

	for model in selected_models:
	model_results = []

	for round_num in range(rounds):
	test_count += 1
	status_text.text(f"Testing {model} (Round {round_num + 1}/{rounds})...")
	progress_bar.progress(test_count / total_tests)

	try:
	start_time = time.time()
	response = generate_comparison_response(model, timeout)
	response_time = time.time() - start_time

	score = score_model_response(response, response_time)

	model_results.append({
	'response': response,
	'response_time': response_time * 1000,
	'score': score,
	'success': True,
	'round': round_num + 1
	})
	except Exception as e:
	model_results.append({
	'response': f"Error: {str(e)}",
	'response_time': 0,
	'score': 0,
	'success': False,
	'round': round_num + 1
	})

	time.sleep(0.5) # Rate limiting

	# Calculate averages
	successful_results = [r for r in model_results if r['success']]
	if successful_results:
	avg_score = sum(r['score'] for r in successful_results) / len(successful_results)
	avg_time = sum(r['response_time'] for r in successful_results) / len(successful_results)
	best_response = max(successful_results, key=lambda x: x['score'])['response']
	else:
	avg_score = 0
	avg_time = 0
	best_response = "All attempts failed"

	results.append({
	'model': model,
	'avg_score': avg_score,
	'avg_response_time': avg_time,
	'success_rate': len(successful_results) / len(model_results) * 100,
	'best_response': best_response,
	'all_results': model_results,
	'success': len(successful_results) > 0
	})

	progress_bar.empty()
	status_text.empty()

	# Save to history
	save_analysis_to_history({
	"type": "Model Comparison",
	"query": st.session_state.current_query,
	"results": results
	})

	display_comparison_results(results)

	def generate_comparison_response(model: str, timeout: int) -> str:
	"""Generate response for model comparison"""
	prompt = f"""Analyze this query and provide SQL + business insight:

	Schema: {st.session_state.data_schema}
	Query: {st.session_state.current_query}

	Respond in this exact format:
	SQL: [your SQL query here]
	INSIGHT: [your business insight here]

	Keep response concise and focused."""

	return make_api_call(model, prompt, timeout)

	def score_model_response(response: str, response_time: float) -> int:
	"""Score model response based on quality and speed"""
	response_lower = response.lower()

	# Quality scoring
	has_sql = any(keyword in response_lower for keyword in ['select', 'from', 'where', 'group by', 'order by'])
	sql_score = 40 if has_sql else 0

	has_insight = any(keyword in response_lower for keyword in ['insight', 'analysis', 'recommendation', 'business'])
	insight_score = 30 if has_insight else 0

	length_score = min(len(response) / 20, 20)
	speed_score = max(0, 10 - (response_time * 2)) if response_time > 0 else 0

	total_score = sql_score + insight_score + length_score + speed_score
	return max(0, min(100, round(total_score)))

	def display_comparison_results(results: List[Dict]):
	"""Display model comparison results"""
	sorted_results = sorted([r for r in results if r['success']], key=lambda x: x['avg_score'], reverse=True)

	if not sorted_results:
	st.error("No successful results to display")
	return

	# Winner announcement
	winner = sorted_results[0]
	fastest = min(sorted_results, key=lambda x: x['avg_response_time'])
	most_reliable = max(sorted_results, key=lambda x: x['success_rate'])

	st.markdown("### 🏆 Battle Results")

	col1, col2, col3 = st.columns(3)

	with col1:
	st.markdown(f"""
	<div style="background: linear-gradient(45deg, #FFD700, #FFA500); padding: 20px; border-radius: 10px; text-align: center;">
	<h3 style="color: #000; margin: 0;">🏆 HIGHEST SCORE</h3>
	<h4 style="color: #000; margin: 5px 0;">{winner['model'].replace('-', ' ').title()}</h4>
	<p style="color: #000; margin: 0;">Avg Score: {winner['avg_score']:.1f}/100</p>
	</div>
	""", unsafe_allow_html=True)

	with col2:
	st.markdown(f"""
	<div style="background: linear-gradient(45deg, #40E0D0, #48D1CC); padding: 20px; border-radius: 10px; text-align: center;">
	<h3 style="color: #000; margin: 0;">⚡ FASTEST</h3>
	<h4 style="color: #000; margin: 5px 0;">{fastest['model'].replace('-', ' ').title()}</h4>
	<p style="color: #000; margin: 0;">Avg: {fastest['avg_response_time']:.0f}ms</p>
	</div>
	""", unsafe_allow_html=True)

	with col3:
	st.markdown(f"""
	<div style="background: linear-gradient(45deg, #98FB98, #90EE90); padding: 20px; border-radius: 10px; text-align: center;">
	<h3 style="color: #000; margin: 0;">🎯 MOST RELIABLE</h3>
	<h4 style="color: #000; margin: 5px 0;">{most_reliable['model'].replace('-', ' ').title()}</h4>
	<p style="color: #000; margin: 0;">{most_reliable['success_rate']:.0f}% Success</p>
	</div>
	""", unsafe_allow_html=True)

	# Performance chart
	display_performance_chart(sorted_results)

	# Detailed results
	display_detailed_results(sorted_results)

	def display_performance_chart(results: List[Dict]):
	"""Display performance comparison chart"""
	st.markdown("### 📊 Performance Comparison")

	models = [r['model'].replace('-', ' ').replace('versatile', '').replace('8192', '').title() for r in results]
	scores = [r['avg_score'] for r in results]
	times = [r['avg_response_time'] for r in results]

	fig = go.Figure()

	fig.add_trace(go.Bar(
	name='Average Score',
	x=models,
	y=scores,
	yaxis='y',
	marker_color='#FFD700',
	text=[f"{s:.1f}" for s in scores],
	textposition='auto'
	))

	fig.add_trace(go.Scatter(
	name='Response Time (ms)',
	x=models,
	y=times,
	yaxis='y2',
	mode='lines+markers',
	line=dict(color='#FF6B6B', width=3),
	marker=dict(size=10)
	))

	fig.update_layout(
	title=f'Model Performance: "{st.session_state.current_query[:50]}..."',
	xaxis=dict(title='Models'),
	yaxis=dict(title='Score (0-100)', side='left'),
	yaxis2=dict(title='Response Time (ms)', side='right', overlaying='y'),
	height=400,
	plot_bgcolor='rgba(0,0,0,0)',
	paper_bgcolor='rgba(0,0,0,0)',
	font=dict(color='white')
	)

	st.plotly_chart(fig, use_container_width=True)

	def display_detailed_results(results: List[Dict]):
	"""Display detailed results for each model"""
	st.markdown("### 📋 Detailed Results")

	for i, result in enumerate(results):
	with st.expander(f"{'🥇' if i == 0 else '🥈' if i == 1 else '🥉' if i == 2 else '📊'} {result['model']} - Avg Score: {result['avg_score']:.1f}/100"):
	col1, col2, col3, col4 = st.columns(4)
	with col1:
	st.metric("Avg Score", f"{result['avg_score']:.1f}/100")
	with col2:
	st.metric("Avg Speed", f"{result['avg_response_time']:.0f}ms")
	with col3:
	st.metric("Success Rate", f"{result['success_rate']:.0f}%")
	with col4:
	st.metric("Total Tests", len(result['all_results']))

	st.markdown("Best Response:")
	display_model_response(result['best_response'], i)

	def display_model_response(response: str, index: int):
	"""Display formatted model response"""
	if "SQL:" in response and "INSIGHT:" in response:
	parts = response.split("INSIGHT:")
	sql_part = parts[0].replace("SQL:", "").strip()
	insight_part = parts[1].strip()

	st.markdown("SQL:")
	st.code(sql_part, language='sql')
	st.markdown("Insight:")
	st.markdown(insight_part)
	else:
	st.text_area("", response, height=150, key=f"response_{index}")

	# ===========================
	# EDA Functions
	# ===========================

	def perform_eda(df: pd.DataFrame):
	"""Perform comprehensive EDA analysis"""
	eda_results = {
	'overview': generate_overview_stats(df),
	'distributions': generate_distribution_charts(df),
	'correlations': generate_correlation_analysis(df),
	'insights': generate_eda_insights(df),
	'data_quality': analyze_data_quality(df)
	}

	st.session_state.eda_results = eda_results

	# Save to history
	save_analysis_to_history({
	"type": "EDA",
	"data_shape": df.shape,
	"results": "EDA analysis completed"
	})

	def generate_overview_stats(df: pd.DataFrame) -> Dict:
	"""Generate overview statistics"""
	numeric_cols = df.select_dtypes(include=[np.number]).columns
	categorical_cols = df.select_dtypes(include=['object', 'category']).columns
	datetime_cols = df.select_dtypes(include=['datetime64']).columns

	overview = {
	'total_rows': len(df),
	'total_columns': len(df.columns),
	'numeric_columns': len(numeric_cols),
	'categorical_columns': len(categorical_cols),
	'datetime_columns': len(datetime_cols),
	'missing_values_total': df.isnull().sum().sum(),
	'duplicate_rows': df.duplicated().sum(),
	'memory_usage': f"{df.memory_usage(deep=True).sum() / 1024**2:.2f} MB"
	}

	if len(numeric_cols) > 0:
	overview['summary_stats'] = df[numeric_cols].describe()

	return overview

	def generate_distribution_charts(df: pd.DataFrame) -> Dict:
	"""Generate distribution charts"""
	charts = {}
	numeric_cols = df.select_dtypes(include=[np.number]).columns[:6] # Limit to 6

	if len(numeric_cols) > 0:
	# Create subplots for distributions
	fig = make_subplots(
	rows=(len(numeric_cols) + 1) // 2,
	cols=2,
	subplot_titles=[col for col in numeric_cols]
	)

	for i, col in enumerate(numeric_cols):
	row = (i // 2) + 1
	col_pos = (i % 2) + 1

	fig.add_trace(
	go.Histogram(x=df[col], name=col, showlegend=False),
	row=row, col=col_pos
	)

	fig.update_layout(
	title="Numeric Distributions",
	height=300 * ((len(numeric_cols) + 1) // 2)
	)

	charts['distributions'] = fig

	return charts

	def generate_correlation_analysis(df: pd.DataFrame) -> Dict:
	"""Generate correlation analysis"""
	correlations = {}
	numeric_cols = df.select_dtypes(include=[np.number]).columns

	if len(numeric_cols) >= 2:
	corr_matrix = df[numeric_cols].corr()

	# Heatmap
	fig = px.imshow(
	corr_matrix,
	text_auto=True,
	aspect="auto",
	title="Correlation Matrix",
	color_continuous_scale="RdBu_r"
	)
	correlations['heatmap'] = fig

	# Top correlations
	corr_pairs = []
	for i in range(len(corr_matrix.columns)):
	for j in range(i+1, len(corr_matrix.columns)):
	corr_pairs.append({
	'Variable 1': corr_matrix.columns[i],
	'Variable 2': corr_matrix.columns[j],
	'Correlation': corr_matrix.iloc[i, j]
	})

	if corr_pairs:
	corr_df = pd.DataFrame(corr_pairs)
	corr_df['Abs_Correlation'] = abs(corr_df['Correlation'])
	corr_df = corr_df.sort_values('Abs_Correlation', ascending=False)
	correlations['top_correlations'] = corr_df.head(10)

	return correlations

	def generate_eda_insights(df: pd.DataFrame) -> List[Dict]:
	"""Generate EDA insights"""
	insights = []

	# Basic insights
	if df.isnull().sum().sum() > 0:
	missing_pct = (df.isnull().sum().sum() / (len(df) * len(df.columns))) * 100
	insights.append({
	'title': 'Missing Data Alert',
	'description': f'Dataset contains {missing_pct:.1f}% missing values across all cells.'
	})

	if df.duplicated().sum() > 0:
	dup_pct = (df.duplicated().sum() / len(df)) * 100
	insights.append({
	'title': 'Duplicate Rows Found',
	'description': f'{df.duplicated().sum()} duplicate rows detected ({dup_pct:.1f}% of data).'
	})

	# Numeric insights
	numeric_cols = df.select_dtypes(include=[np.number]).columns
	if len(numeric_cols) > 0:
	for col in numeric_cols[:3]: # Top 3 numeric columns
	skewness = df[col].skew()
	if abs(skewness) > 1:
	insights.append({
	'title': f'Skewed Distribution: {col}',
	'description': f'{col} shows {"positive" if skewness > 0 else "negative"} skew ({skewness:.2f}).'
	})

	return insights[:5] # Limit to 5 insights

	def analyze_data_quality(df: pd.DataFrame) -> Dict:
	"""Analyze data quality"""
	quality = {}

	# Missing values chart
	missing_data = df.isnull().sum()
	missing_data = missing_data[missing_data > 0].sort_values(ascending=False)

	if len(missing_data) > 0:
	fig = px.bar(
	x=missing_data.index,
	y=missing_data.values,
	title="Missing Values by Column",
	labels={'x': 'Column', 'y': 'Missing Count'}
	)
	quality['missing_values'] = fig

	# Data types chart
	dtype_counts = df.dtypes.value_counts()
	fig = px.pie(
	values=dtype_counts.values,
	names=dtype_counts.index.astype(str),
	title="Data Types Distribution"
	)
	quality['data_types'] = fig

	# Duplicates info
	quality['duplicates'] = {
	'count': df.duplicated().sum(),
	'percentage': (df.duplicated().sum() / len(df)) * 100
	}

	return quality

	def display_eda_results(results: Dict):
	"""Display EDA results"""
	st.markdown("---")
	st.markdown("## 🔬 Comprehensive EDA Results")

	tabs = st.tabs(["📊 Overview", "📈 Distributions", "🔗 Correlations", "🎯 Insights", "📋 Data Quality"])

	with tabs[0]:
	display_overview_tab(results.get('overview', {}))

	with tabs[1]:
	display_distributions_tab(results.get('distributions', {}))

	with tabs[2]:
	display_correlations_tab(results.get('correlations', {}))

	with tabs[3]:
	display_insights_tab(results.get('insights', []))

	with tabs[4]:
	display_data_quality_tab(results.get('data_quality', {}))

	def display_overview_tab(overview: Dict):
	"""Display overview statistics"""
	st.markdown("### 📊 Dataset Overview")

	col1, col2, col3, col4 = st.columns(4)
	with col1:
	st.metric("Total Rows", f"{overview.get('total_rows', 0):,}")
	with col2:
	st.metric("Total Columns", overview.get('total_columns', 0))
	with col3:
	st.metric("Numeric Columns", overview.get('numeric_columns', 0))
	with col4:
	st.metric("Categorical Columns", overview.get('categorical_columns', 0))

	col5, col6, col7, col8 = st.columns(4)
	with col5:
	st.metric("Missing Values", f"{overview.get('missing_values_total', 0):,}")
	with col6:
	st.metric("Duplicate Rows", f"{overview.get('duplicate_rows', 0):,}")
	with col7:
	st.metric("Memory Usage", overview.get('memory_usage', '0 MB'))
	with col8:
	st.metric("DateTime Columns", overview.get('datetime_columns', 0))

	if 'summary_stats' in overview:
	st.markdown("### 📈 Summary Statistics")
	st.dataframe(overview['summary_stats'], use_container_width=True)

	def display_distributions_tab(distributions: Dict):
	"""Display distribution charts"""
	st.markdown("### 📈 Data Distributions")

	if distributions:
	for chart_name, chart in distributions.items():
	st.plotly_chart(chart, use_container_width=True)
	else:
	st.info("No distribution charts available")

	def display_correlations_tab(correlations: Dict):
	"""Display correlation analysis"""
	st.markdown("### 🔗 Correlation Analysis")

	if 'heatmap' in correlations:
	st.plotly_chart(correlations['heatmap'], use_container_width=True)

	if 'top_correlations' in correlations:
	st.markdown("#### 🔝 Top Correlations")
	st.dataframe(correlations['top_correlations'], use_container_width=True)

	def display_insights_tab(insights: List[Dict]):
	"""Display generated insights"""
	st.markdown("### 🎯 Generated Insights")

	if insights:
	for insight in insights:
	st.markdown(f"""
	<div class="metric-card">
	<h4 style="color: #ffd700;">💡 {insight['title']}</h4>
	<p>{insight['description']}</p>
	</div>
	""", unsafe_allow_html=True)
	else:
	st.info("No insights generated")

	def display_data_quality_tab(quality: Dict):
	"""Display data quality assessment"""
	st.markdown("### 📋 Data Quality Assessment")

	col1, col2 = st.columns(2)

	with col1:
	st.markdown("#### Missing Values")
	if 'missing_values' in quality:
	st.plotly_chart(quality['missing_values'], use_container_width=True)
	else:
	st.info("No missing values chart available")

	with col2:
	st.markdown("#### Data Types")
	if 'data_types' in quality:
	st.plotly_chart(quality['data_types'], use_container_width=True)
	else:
	st.info("No data types chart available")

	if 'duplicates' in quality:
	st.markdown("#### Duplicate Analysis")
	dup_info = quality['duplicates']
	col1, col2 = st.columns(2)
	with col1:
	st.metric("Duplicate Count", dup_info.get('count', 0))
	with col2:
	st.metric("Duplicate %", f"{dup_info.get('percentage', 0):.1f}%")

	# ===========================
	# AI Insights Functions
	# ===========================

	def generate_ai_insights(df: pd.DataFrame):
	"""Generate AI-powered insights"""
	try:
	# Prepare data summary
	summary = f"""
	Dataset Analysis:
	- Rows: {len(df):,}
	- Columns: {len(df.columns)}
	- Schema: {st.session_state.data_schema}
	- Missing values: {df.isnull().sum().sum():,}

	Column types:
	{df.dtypes.to_string()}

	Sample data:
	{df.head(3).to_string()}
	"""

	prompt = f"""Analyze this dataset and provide 5 key business insights:

	{summary}

	Format as:
	1. Insight Title: Description
	2. Insight Title: Description
	(etc.)

	Focus on business value, patterns, and actionable recommendations."""

	insights_text = make_api_call(st.session_state.selected_model, prompt)
	st.session_state.ai_insights_text = insights_text

	# Save to history
	save_analysis_to_history({
	"type": "AI Insights",
	"data_shape": df.shape,
	"insights": insights_text
	})

	except Exception as e:
	st.error(f"Failed to generate AI insights: {str(e)}")
	st.session_state.ai_insights_text = f"Error generating insights: {str(e)}"

	def display_ai_insights(insights_text: str):
	"""Display AI-generated insights"""
	st.markdown("---")
	st.markdown("## 🤖 AI-Generated Insights")

	insights = parse_insights(insights_text)

	for insight in insights:
	st.markdown(f"""
	<div class="metric-card">
	<h4 style="color: #ffd700;">💡 {insight['title']}</h4>
	<p>{insight['text']}</p>
	</div>
	""", unsafe_allow_html=True)

	def parse_insights(raw_insights: str) -> List[Dict]:
	"""Parse insights from API response"""
	insights = []
	lines = raw_insights.strip().split('\n')

	current_insight = None
	for line in lines:
	line = line.strip()
	if not line:
	continue

	# Look for patterns like "Title:" or "1. Title:"
	if line.startswith('') and ':' in line:
	if current_insight:
	insights.append(current_insight)

	parts = line.split('**:', 1)
	title = parts[0].replace('**', '').strip().lstrip('1234567890.- ')
	text = parts[1].strip() if len(parts) > 1 else ''
	current_insight = {'title': title, 'text': text}

	elif line.startswith(('1.', '2.', '3.', '4.', '5.')) and '**' in line:
	if current_insight:
	insights.append(current_insight)

	# Extract from numbered format
	clean_line = line.lstrip('1234567890. ').strip()
	if '**:' in clean_line:
	parts = clean_line.split('**:', 1)
	title = parts[0].replace('**', '').strip()
	text = parts[1].strip() if len(parts) > 1 else ''
	else:
	title = f"Insight {len(insights) + 1}"
	text = clean_line

	current_insight = {'title': title, 'text': text}

	elif current_insight and line:
	current_insight['text'] += ' ' + line

	if current_insight:
	insights.append(current_insight)

	# Fallback if no insights parsed
	if not insights and raw_insights.strip():
	insights.append({
	'title': 'AI Analysis',
	'text': raw_insights.strip()
	})

	return insights[:5]

	# ===========================
	# Advanced Analytics Functions
	# ===========================

	def display_advanced_analytics(df: pd.DataFrame):
	"""Display advanced analytics"""
	st.markdown("---")
	st.markdown("## 📊 Advanced Analytics")

	tabs = st.tabs(["📈 Statistical Summary", "🔍 Outlier Detection", "📊 Correlation Analysis", "🎯 Distribution Analysis"])

	with tabs[0]:
	show_statistical_summary(df)

	with tabs[1]:
	show_outlier_analysis(df)

	with tabs[2]:
	show_correlation_analysis(df)

	with tabs[3]:
	show_distribution_analysis(df)

	def show_statistical_summary(df: pd.DataFrame):
	"""Show advanced statistical summary"""
	st.markdown("### 📈 Advanced Statistical Summary")

	numeric_cols = df.select_dtypes(include=[np.number]).columns

	if len(numeric_cols) > 0:
	# Basic statistics
	st.markdown("#### 📊 Descriptive Statistics")
	st.dataframe(df[numeric_cols].describe(), use_container_width=True)

	# Advanced statistics
	st.markdown("#### 📏 Advanced Distribution Metrics")
	advanced_stats = pd.DataFrame({
	'Column': numeric_cols,
	'Skewness': [df[col].skew() for col in numeric_cols],
	'Kurtosis': [df[col].kurtosis() for col in numeric_cols],
	'Missing %': [df[col].isnull().sum() / len(df) * 100 for col in numeric_cols],
	'Zeros %': [(df[col] == 0).sum() / len(df) * 100 for col in numeric_cols],
	'Unique Values': [df[col].nunique() for col in numeric_cols]
	})
	st.dataframe(advanced_stats, use_container_width=True, hide_index=True)

	# Interpretation
	st.markdown("#### 🎯 Statistical Interpretation")
	for col in numeric_cols[:3]:
	skewness = df[col].skew()
	kurtosis = df[col].kurtosis()

	skew_interpretation = (
	"Normal" if abs(skewness) < 0.5 else
	"Slightly Skewed" if abs(skewness) < 1 else
	"Moderately Skewed" if abs(skewness) < 2 else
	"Highly Skewed"
	)

	kurt_interpretation = (
	"Normal" if abs(kurtosis) < 1 else
	"Heavy-tailed" if kurtosis > 1 else
	"Light-tailed"
	)

	st.markdown(f"{col}: {skew_interpretation} distribution, {kurt_interpretation} shape")
	else:
	st.info("No numeric columns found for statistical analysis")

	def show_outlier_analysis(df: pd.DataFrame):
	"""Show outlier detection analysis"""
	st.markdown("### 🔍 Advanced Outlier Detection")

	numeric_cols = df.select_dtypes(include=[np.number]).columns

	if len(numeric_cols) > 0:
	# Summary for all columns
	st.markdown("#### 📊 Outlier Summary (All Columns)")

	outlier_summary = []
	for col in numeric_cols:
	data = df[col].dropna()
	if len(data) > 0:
	Q1 = data.quantile(0.25)
	Q3 = data.quantile(0.75)
	IQR = Q3 - Q1
	lower_bound = Q1 - 1.5 * IQR
	upper_bound = Q3 + 1.5 * IQR

	outliers = df[(df[col] < lower_bound) \| (df[col] > upper_bound)]
	outlier_summary.append({
	'Column': col,
	'Outlier Count': len(outliers),
	'Outlier %': len(outliers) / len(df) * 100,
	'Lower Bound': lower_bound,
	'Upper Bound': upper_bound
	})

	if outlier_summary:
	outlier_df = pd.DataFrame(outlier_summary)
	st.dataframe(outlier_df, use_container_width=True, hide_index=True)

	# Detailed analysis
	st.markdown("#### 🎯 Detailed Outlier Analysis")
	selected_col = st.selectbox("Select column for detailed analysis", numeric_cols, key="outlier_detail")

	if selected_col:
	analyze_column_outliers(df, selected_col)
	else:
	st.info("No numeric columns found for outlier analysis")

	def analyze_column_outliers(df: pd.DataFrame, column: str):
	"""Analyze outliers for a specific column"""
	data = df[column].dropna()
	Q1 = data.quantile(0.25)
	Q3 = data.quantile(0.75)
	IQR = Q3 - Q1
	lower_bound = Q1 - 1.5 * IQR
	upper_bound = Q3 + 1.5 * IQR

	outliers = df[(df[column] < lower_bound) \| (df[column] > upper_bound)]

	col1, col2, col3, col4 = st.columns(4)
	with col1:
	st.metric("Total Outliers", len(outliers))
	with col2:
	st.metric("Outlier %", f"{len(outliers)/len(df)*100:.1f}%")
	with col3:
	st.metric("IQR", f"{IQR:.2f}")
	with col4:
	st.metric("Range", f"{upper_bound - lower_bound:.2f}")

	# Box plot
	fig = go.Figure()
	fig.add_trace(go.Box(
	y=data,
	name=column,
	boxpoints='outliers'
	))
	fig.update_layout(
	title=f'Box Plot with Outliers: {column}',
	height=400
	)
	st.plotly_chart(fig, use_container_width=True)

	# Show outlier values
	if len(outliers) > 0:
	st.markdown("#### 📋 Outlier Values")
	st.dataframe(outliers[[column]].head(20), use_container_width=True)

	def show_correlation_analysis(df: pd.DataFrame):
	"""Show correlation analysis"""
	st.markdown("### 📊 Advanced Correlation Analysis")

	numeric_cols = df.select_dtypes(include=[np.number]).columns

	if len(numeric_cols) >= 2:
	corr_matrix = df[numeric_cols].corr()

	# Enhanced heatmap
	fig = px.imshow(corr_matrix,
	text_auto=True,
	aspect="auto",
	title="Enhanced Correlation Matrix",
	color_continuous_scale="RdBu_r")
	fig.update_layout(height=500)
	st.plotly_chart(fig, use_container_width=True)

	# Correlation strength analysis
	st.markdown("#### 🎯 Correlation Strength Analysis")

	corr_pairs = []
	for i in range(len(corr_matrix.columns)):
	for j in range(i+1, len(corr_matrix.columns)):
	corr_value = corr_matrix.iloc[i, j]
	strength = (
	"Very Strong" if abs(corr_value) >= 0.8 else
	"Strong" if abs(corr_value) >= 0.6 else
	"Moderate" if abs(corr_value) >= 0.4 else
	"Weak" if abs(corr_value) >= 0.2 else
	"Very Weak"
	)

	corr_pairs.append({
	'Variable 1': corr_matrix.columns[i],
	'Variable 2': corr_matrix.columns[j],
	'Correlation': corr_value,
	'Abs Correlation': abs(corr_value),
	'Strength': strength
	})

	if corr_pairs:
	corr_df = pd.DataFrame(corr_pairs)
	corr_df = corr_df.sort_values('Abs Correlation', ascending=False)

	st.dataframe(corr_df[['Variable 1', 'Variable 2', 'Correlation', 'Strength']].head(15),
	use_container_width=True, hide_index=True)

	# Strong correlations warning
	strong_corr = corr_df[corr_df['Abs Correlation'] >= 0.6]
	if len(strong_corr) > 0:
	st.markdown("#### ⚠️ Strong Correlations (>0.6)")
	st.dataframe(strong_corr[['Variable 1', 'Variable 2', 'Correlation']],
	use_container_width=True, hide_index=True)
	st.warning("Strong correlations may indicate multicollinearity issues in modeling.")
	else:
	st.info("Need at least 2 numeric columns for correlation analysis")

	def show_distribution_analysis(df: pd.DataFrame):
	"""Show distribution analysis"""
	st.markdown("### 🎯 Advanced Distribution Analysis")

	numeric_cols = df.select_dtypes(include=[np.number]).columns
	categorical_cols = df.select_dtypes(include=['object', 'category']).columns

	if len(numeric_cols) > 0:
	st.markdown("#### 📊 Numeric Distribution Analysis")

	selected_col = st.selectbox("Select numeric column", numeric_cols, key="dist_numeric")

	col1, col2 = st.columns(2)

	with col1:
	# Histogram
	data = df[selected_col].dropna()
	fig = go.Figure()
	fig.add_trace(go.Histogram(
	x=data,
	name='Distribution',
	opacity=0.7,
	nbinsx=30
	))
	fig.update_layout(
	title=f'Distribution of {selected_col}',
	xaxis_title=selected_col,
	yaxis_title='Frequency',
	height=400
	)
	st.plotly_chart(fig, use_container_width=True)

	with col2:
	# Box plot
	fig = go.Figure()
	fig.add_trace(go.Box(
	y=data,
	name=selected_col,
	boxpoints='all',
	jitter=0.3,
	pointpos=-1.8
	))
	fig.update_layout(
	title=f'Box Plot of {selected_col}',
	height=400
	)
	st.plotly_chart(fig, use_container_width=True)

	# Distribution statistics
	st.markdown("#### 📈 Distribution Statistics")
	col1, col2, col3, col4 = st.columns(4)

	with col1:
	st.metric("Mean", f"{data.mean():.2f}")
	with col2:
	st.metric("Median", f"{data.median():.2f}")
	with col3:
	st.metric("Std Dev", f"{data.std():.2f}")
	with col4:
	st.metric("Range", f"{data.max() - data.min():.2f}")

	if len(categorical_cols) > 0:
	st.markdown("#### 🏷️ Categorical Distribution Analysis")

	selected_cat = st.selectbox("Select categorical column", categorical_cols, key="dist_categorical")

	value_counts = df[selected_cat].value_counts()

	col1, col2 = st.columns(2)

	with col1:
	# Bar chart
	fig = px.bar(
	x=value_counts.index[:15],
	y=value_counts.values[:15],
	title=f'Top Categories in {selected_cat}'
	)
	st.plotly_chart(fig, use_container_width=True)

	with col2:
	# Pie chart
	fig = px.pie(
	values=value_counts.values[:10],
	names=value_counts.index[:10],
	title=f'Distribution of {selected_cat}'
	)
	st.plotly_chart(fig, use_container_width=True)

	# Category statistics
	st.markdown("#### 📊 Category Statistics")
	col1, col2, col3, col4 = st.columns(4)

	with col1:
	st.metric("Unique Categories", df[selected_cat].nunique())
	with col2:
	st.metric("Most Frequent", value_counts.index[0])
	with col3:
	st.metric("Frequency", value_counts.iloc[0])
	with col4:
	st.metric("Missing Values", df[selected_cat].isnull().sum())

	# ===========================
	# History Functions
	# ===========================

	def save_analysis_to_history(analysis_record: Dict):
	"""Save analysis to history"""
	record = {
	"timestamp": datetime.now().isoformat(),
	"session_id": st.session_state.session_id,
	**analysis_record
	}
	st.session_state.analysis_history.append(record)

	def show_history():
	"""Display analysis history"""
	st.markdown("## 📊 Analysis History")

	history = st.session_state.analysis_history

	if not history:
	st.info("No analysis history found.")
	return

	for i, record in enumerate(reversed(history[-10:])):
	with st.expander(f"{record['type']} - {record['timestamp'][:19]}"):
	if record['type'] == 'Single Query Analysis':
	st.markdown(f"Query: {record['query']}")
	st.markdown("SQL Result:")
	st.code(record.get('sql_result', 'N/A'), language='sql')

	elif record['type'] == 'Model Comparison':
	st.markdown(f"Query: {record['query']}")
	st.markdown("Results:")
	for result in record.get('results', []):
	if isinstance(result, dict) and 'model' in result and 'avg_score' in result:
	st.markdown(f"- {result['model']}: {result['avg_score']:.1f}/100")

	elif record['type'] == 'EDA':
	st.markdown(f"Data Shape: {record.get('data_shape', 'N/A')}")
	st.markdown("Analysis completed successfully")

	elif record['type'] == 'AI Insights':
	st.markdown(f"Data Shape: {record.get('data_shape', 'N/A')}")
	st.markdown("Insights generated successfully")

	# ===========================
	# Main Application
	# ===========================

	def main():
	"""Main application entry point"""
	# Initialize
	inject_custom_css()
	initialize_session_state()

	# Render UI
	render_header()
	render_sidebar()

	# Main content area
	if st.session_state.uploaded_data is not None:
	render_query_interface()
	else:
	render_data_upload()

	if __name__ == "__main__":
	main()