#!/usr/bin/env python3
"""
Enhanced GAIA Multi-Agent System - GAIA Benchmark Optimized
Designed for exact-match evaluation with clean, direct answers only.
"""

import os
import hashlib
import re
import json
import math
import random
import logging
import requests
import base64
from typing import Dict, List, Any, Optional, Union
from dataclasses import dataclass
from enum import Enum
from pathlib import Path

# Core dependencies
import pandas as pd
from huggingface_hub import InferenceClient
import openai

# New dependencies for enhanced GAIA capabilities
try:
    from duckduckgo_search import DDGS
    DDGS_AVAILABLE = True
except ImportError:
    DDGS_AVAILABLE = False
    print("⚠️ DuckDuckGo search not available. Install with: pip install duckduckgo-search")

try:
    from PIL import Image
    PIL_AVAILABLE = True
except ImportError:
    PIL_AVAILABLE = False
    print("⚠️ PIL not available. Install with: pip install Pillow")

try:
    import PyPDF2
    PDF_AVAILABLE = True
except ImportError:
    PDF_AVAILABLE = False
    print("⚠️ PyPDF2 not available. Install with: pip install PyPDF2")

try:
    from bs4 import BeautifulSoup
    BS4_AVAILABLE = True
except ImportError:
    BS4_AVAILABLE = False
    print("⚠️ BeautifulSoup4 not available. Install with: pip install beautifulsoup4")

# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

class ToolType(Enum):
    WEB_SEARCH = "web_search"
    BROWSE_URL = "browse_url" 
    DOWNLOAD_FILE = "download_file"
    READ_PDF = "read_pdf"
    ANALYZE_IMAGE = "analyze_image"
    CALCULATOR = "calculator"

@dataclass
class ToolCall:
    tool: ToolType
    parameters: Dict[str, Any]
    result: Optional[Any] = None

class AdvancedGAIAToolkit:
    """🛠️ Complete toolkit with web browsing, vision, and file handling for GAIA benchmark"""
    
    def __init__(self, hf_token: str = None, openai_key: str = None):
        self.hf_token = hf_token or os.getenv('HF_TOKEN')
        self.openai_key = openai_key or os.getenv('OPENAI_API_KEY')
        self.temp_files = []  # Track temporary files for cleanup
        logger.info("🚀 Advanced GAIA Toolkit initialized")
        
    def web_search(self, query: str, max_results: int = 5) -> List[Dict[str, str]]:
        """🔍 Perform comprehensive web search using DuckDuckGo"""
        if not DDGS_AVAILABLE:
            logger.warning("DuckDuckGo search unavailable")
            return [{"title": "Search unavailable", "snippet": "Install duckduckgo-search", "url": ""}]
        
        try:
            logger.info(f"🔍 Searching web for: {query}")
            with DDGS() as ddgs:
                results = []
                for r in ddgs.text(query, max_results=max_results):
                    results.append({
                        "title": r.get('title', ''),
                        "snippet": r.get('body', ''), 
                        "url": r.get('href', '')
                    })
                logger.info(f"✅ Found {len(results)} search results")
                return results
        except Exception as e:
            logger.error(f"❌ Web search failed: {e}")
            return [{"title": "Search failed", "snippet": str(e), "url": ""}]
    
    def browse_url(self, url: str) -> str:
        """🌐 Browse and extract clean text content from URL"""
        try:
            logger.info(f"🌐 Browsing URL: {url}")
            headers = {
                'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
            }
            response = requests.get(url, timeout=10, headers=headers)
            response.raise_for_status()
            
            if BS4_AVAILABLE:
                soup = BeautifulSoup(response.content, 'html.parser')
                # Remove script and style elements
                for script in soup(["script", "style"]):
                    script.decompose()
                text = soup.get_text()
            else:
                # Basic HTML tag removal
                text = re.sub(r'<[^>]+>', ' ', response.text)
            
            # Clean up whitespace
            text = re.sub(r'\s+', ' ', text).strip()
            content = text[:8000]  # Limit content length for processing
            
            logger.info(f"✅ Extracted {len(content)} characters from {url}")
            return content
            
        except Exception as e:
            error_msg = f"❌ Failed to browse {url}: {str(e)}"
            logger.error(error_msg)
            return error_msg
    
    def download_file(self, file_url: str = None, task_id: str = None) -> str:
        """📥 Download file from GAIA API or direct URL"""
        try:
            if task_id:
                # Use GAIA API endpoint for task files
                api_url = f"https://huggingface.co/spaces/gaia-benchmark/leaderboard/resolve/main/files/{task_id}"
                file_url = api_url
                logger.info(f"📥 Downloading GAIA task file: {task_id}")
            else:
                logger.info(f"📥 Downloading file from: {file_url}")
            
            response = requests.get(file_url, timeout=30)
            response.raise_for_status()
            
            # Determine file extension from URL or content type
            if task_id:
                filename = f"gaia_task_{task_id}"
            else:
                filename = f"download_{hash(file_url) % 10000}"
                
            # Add extension based on content type
            content_type = response.headers.get('content-type', '').lower()
            if 'pdf' in content_type:
                filename += '.pdf'
            elif 'image' in content_type:
                filename += '.jpg'
            elif 'text' in content_type:
                filename += '.txt'
            
            # Save to temp file
            file_path = Path(filename)
            with open(file_path, 'wb') as f:
                f.write(response.content)
            
            self.temp_files.append(str(file_path))
            logger.info(f"✅ Downloaded file: {filename} ({len(response.content)} bytes)")
            
            return str(file_path)
            
        except Exception as e:
            error_msg = f"❌ Download failed: {str(e)}"
            logger.error(error_msg)
            return error_msg
    
    def read_pdf(self, file_path: str) -> str:
        """📄 Extract comprehensive text from PDF file"""
        if not PDF_AVAILABLE:
            return "❌ PDF reading unavailable. Install PyPDF2."
        
        try:
            logger.info(f"📄 Reading PDF: {file_path}")
            text = ""
            with open(file_path, 'rb') as file:
                pdf_reader = PyPDF2.PdfReader(file)
                total_pages = len(pdf_reader.pages)
                
                for i, page in enumerate(pdf_reader.pages):
                    page_text = page.extract_text()
                    text += f"[Page {i+1}/{total_pages}]\n{page_text}\n\n"
                    
                    # Limit total text length to avoid memory issues
                    if len(text) > 15000:
                        text += f"...[Truncated - PDF has {total_pages} pages total]"
                        break
                        
            logger.info(f"✅ Extracted {len(text)} characters from PDF ({total_pages} pages)")
            return text
            
        except Exception as e:
            error_msg = f"❌ PDF read failed: {str(e)}"
            logger.error(error_msg)
            return error_msg
    
    def analyze_image(self, image_path: str, question: str = "") -> str:
        """🖼️ Analyze image using vision model (with GPT-4V fallback)"""
        if not PIL_AVAILABLE:
            return "❌ Image analysis unavailable. Install Pillow."
        
        try:
            logger.info(f"🖼️ Analyzing image: {image_path} | Question: {question}")
            
            # Get basic image info
            with Image.open(image_path) as img:
                basic_info = f"Image: {img.size[0]}x{img.size[1]} pixels, format: {img.format}, mode: {img.mode}"
                
                # If we have OpenAI key, use GPT-4V for actual vision analysis
                if self.openai_key and question:
                    try:
                        # Convert image to base64
                        import base64
                        with open(image_path, 'rb') as img_file:
                            img_base64 = base64.b64encode(img_file.read()).decode('utf-8')
                        
                        # Use OpenAI GPT-4V for vision analysis
                        client = openai.OpenAI(api_key=self.openai_key)
                        response = client.chat.completions.create(
                            model="gpt-4o",
                            messages=[
                                {
                                    "role": "user",
                                    "content": [
                                        {"type": "text", "text": f"Analyze this image and answer: {question}. Provide only the direct answer, no explanations."},
                                        {
                                            "type": "image_url",
                                            "image_url": {"url": f"data:image/jpeg;base64,{img_base64}"}
                                        }
                                    ]
                                }
                            ],
                            max_tokens=500,
                            temperature=0.0
                        )
                        
                        vision_result = response.choices[0].message.content.strip()
                        logger.info(f"✅ GPT-4V analysis complete")
                        return vision_result
                        
                    except Exception as vision_error:
                        logger.warning(f"⚠️ GPT-4V analysis failed: {vision_error}")
                        return f"{basic_info}. Vision analysis error: {vision_error}"
                
                # Fallback: basic image analysis
                logger.info(f"✅ Basic image analysis complete")
                return f"{basic_info}. Advanced vision analysis requires OpenAI API key. Question was: {question}"
                
        except Exception as e:
            error_msg = f"❌ Image analysis failed: {str(e)}"
            logger.error(error_msg)
            return error_msg
    
    def calculator(self, expression: str) -> str:
        """🧮 Safe calculator for mathematical operations"""
        try:
            logger.info(f"🧮 Calculating: {expression}")
            
            # Enhanced safety: only allow safe operations
            allowed_chars = set('0123456789+-*/.() ')
            if not all(c in allowed_chars for c in expression):
                return "❌ Invalid characters in expression"
            
            # Evaluate safely
            result = eval(expression)
            logger.info(f"✅ Calculation result: {result}")
            return str(result)
            
        except Exception as e:
            error_msg = f"❌ Calculation failed: {str(e)}"
            logger.error(error_msg)
            return error_msg
    
    def cleanup_temp_files(self):
        """🧹 Clean up temporary files"""
        for file_path in self.temp_files:
            try:
                if os.path.exists(file_path):
                    os.remove(file_path)
                    logger.info(f"🧹 Cleaned up: {file_path}")
            except Exception as e:
                logger.warning(f"⚠️ Failed to cleanup {file_path}: {e}")
        self.temp_files.clear()

class EnhancedMultiModelGAIASystem:
    """🚀 Complete GAIA system with advanced tool calling and multi-modal capabilities"""
    
    def __init__(self, hf_token: str = None, openai_key: str = None):
        # Initialize enhanced toolkit
        self.toolkit = AdvancedGAIAToolkit(hf_token, openai_key)
        
        # Initialize AI clients
        self.hf_token = hf_token or os.getenv('HF_TOKEN')
        self.openai_key = openai_key or os.getenv('OPENAI_API_KEY')
        
        # Initialize clients with comprehensive model support
        self.clients = self._initialize_clients()
        self.model_priority = [
            "together_deepseek_r1",
            "novita_minimax", 
            "featherless_kimi",
            "together_llama",
            "openai_gpt4o"
        ]
        
        logger.info("🚀 Enhanced Multi-Model GAIA System initialized")
        
    def _initialize_clients(self) -> Dict[str, Any]:
        """Initialize all AI model clients with enhanced error handling"""
        clients = {}
        
        # Together AI Models (DeepSeek-R1, Llama-3.3-70B)
        try:
            clients["together_deepseek_r1"] = {
                "client": InferenceClient(model="deepseek-ai/DeepSeek-R1", token=self.hf_token),
                "model": "deepseek-ai/DeepSeek-R1",
                "provider": "Together AI"
            }
            clients["together_llama"] = {
                "client": InferenceClient(model="meta-llama/Llama-3.3-70B-Instruct", token=self.hf_token),
                "model": "meta-llama/Llama-3.3-70B-Instruct", 
                "provider": "Together AI"
            }
            logger.info("✅ Together AI models initialized")
        except Exception as e:
            logger.warning(f"⚠️ Together AI setup failed: {e}")
        
        # Novita AI Models (MiniMax-M1-80k)
        try:
            clients["novita_minimax"] = {
                "client": InferenceClient(model="MiniMaxAI/MiniMax-M1-80k", token=self.hf_token),
                "model": "MiniMaxAI/MiniMax-M1-80k",
                "provider": "Novita AI"
            }
            logger.info("✅ Novita AI models initialized")
        except Exception as e:
            logger.warning(f"⚠️ Novita AI setup failed: {e}")
            
        # Featherless AI Models (Kimi-Dev-72B)
        try:
            clients["featherless_kimi"] = {
                "client": InferenceClient(model="moonshotai/Kimi-Dev-72B", token=self.hf_token),
                "model": "moonshotai/Kimi-Dev-72B",
                "provider": "Featherless AI"
            }
            logger.info("✅ Featherless AI models initialized")
        except Exception as e:
            logger.warning(f"⚠️ Featherless AI setup failed: {e}")
        
        # OpenAI Models (GPT-4o)
        if self.openai_key:
            try:
                clients["openai_gpt4o"] = {
                    "client": openai.OpenAI(api_key=self.openai_key),
                    "model": "gpt-4o",
                    "provider": "OpenAI"
                }
                logger.info("✅ OpenAI models initialized")
            except Exception as e:
                logger.warning(f"⚠️ OpenAI setup failed: {e}")
        
        logger.info(f"📊 Total models available: {len(clients)}")
        return clients
    
    def parse_tool_calls(self, response: str) -> List[ToolCall]:
        """🔧 Parse advanced tool calls from AI response"""
        tool_calls = []
        
        # Enhanced patterns for tool calls
        patterns = [
            r'TOOL_CALL:\s*(\w+)\((.*?)\)',  # TOOL_CALL: web_search(query="...")
            r'<tool>(\w+)</tool>\s*<params>(.*?)</params>',  # XML-style
            r'```(\w+)\n(.*?)\n```',  # Code block style
        ]
        
        for pattern in patterns:
            matches = re.findall(pattern, response, re.DOTALL | re.IGNORECASE)
            for tool_name, params_str in matches:
                try:
                    params = self._parse_parameters(params_str)
                    tool_type = ToolType(tool_name.lower())
                    tool_calls.append(ToolCall(tool=tool_type, parameters=params))
                    logger.info(f"🔧 Parsed tool call: {tool_name} with params: {params}")
                except (ValueError, Exception) as e:
                    logger.warning(f"⚠️ Failed to parse tool call {tool_name}: {e}")
        
        return tool_calls
    
    def _parse_parameters(self, params_str: str) -> Dict[str, Any]:
        """Parse parameters from various formats"""
        params = {}
        if not params_str.strip():
            return params
            
        # Try JSON parsing first
        try:
            return json.loads(params_str)
        except:
            pass
            
        # Try key=value parsing
        param_matches = re.findall(r'(\w+)=(["\'])(.*?)\2', params_str)
        for param_name, quote, param_value in param_matches:
            params[param_name] = param_value
            
        # Try simple text for single parameter
        if not params and params_str.strip():
            # Remove quotes if present
            clean_param = params_str.strip().strip('"\'')
            params['query'] = clean_param  # Default to query parameter
            
        return params
    
    def execute_tool_call(self, tool_call: ToolCall) -> str:
        """⚡ Execute a single tool call with comprehensive error handling"""
        try:
            logger.info(f"⚡ Executing {tool_call.tool.value} with params: {tool_call.parameters}")
            
            if tool_call.tool == ToolType.WEB_SEARCH:
                query = tool_call.parameters.get('query', '')
                results = self.toolkit.web_search(query)
                result_text = f"🔍 Search results for '{query}':\n"
                for i, r in enumerate(results[:3], 1):
                    result_text += f"{i}. {r['title']}\n   {r['snippet'][:200]}...\n   URL: {r['url']}\n\n"
                return result_text
            
            elif tool_call.tool == ToolType.BROWSE_URL:
                url = tool_call.parameters.get('url', '')
                content = self.toolkit.browse_url(url)
                return f"🌐 Content from {url}:\n{content[:2000]}..."
                
            elif tool_call.tool == ToolType.DOWNLOAD_FILE:
                task_id = tool_call.parameters.get('task_id', '')
                url = tool_call.parameters.get('url', '')
                filename = self.toolkit.download_file(url, task_id)
                return f"📥 Downloaded file: {filename}"
            
            elif tool_call.tool == ToolType.READ_PDF:
                file_path = tool_call.parameters.get('file_path', '')
                text = self.toolkit.read_pdf(file_path)
                return f"📄 PDF content from {file_path}:\n{text[:2500]}..."
                
            elif tool_call.tool == ToolType.ANALYZE_IMAGE:
                image_path = tool_call.parameters.get('image_path', '')
                question = tool_call.parameters.get('question', '')
                result = self.toolkit.analyze_image(image_path, question)
                return f"🖼️ Image analysis: {result}"
                
            elif tool_call.tool == ToolType.CALCULATOR:
                expression = tool_call.parameters.get('expression', '')
                result = self.toolkit.calculator(expression)
                return f"🧮 Calculation: {expression} = {result}"
            
            else:
                return f"❌ Unknown tool: {tool_call.tool}"
                
        except Exception as e:
            error_msg = f"❌ Tool execution failed: {str(e)}"
            logger.error(error_msg)
            return error_msg
    
    def query_with_tools(self, question: str, model_name: str = None, max_iterations: int = 3) -> str:
        """🧠 Enhanced query processing with comprehensive tool calling capabilities"""
        if not model_name:
            model_name = self.model_priority[0]
        
        logger.info(f"🧠 Processing question with {model_name}: {question[:100]}...")
        
        # Ultra-enhanced system prompt for GAIA benchmark
        system_prompt = f"""You are an advanced AI agent optimized for the GAIA benchmark with access to powerful tools.

🛠️ AVAILABLE TOOLS:
- TOOL_CALL: web_search(query="search term") - Search the web for current information
- TOOL_CALL: browse_url(url="http://example.com") - Browse and extract text from URLs  
- TOOL_CALL: download_file(task_id="123") - Download files from GAIA tasks
- TOOL_CALL: read_pdf(file_path="document.pdf") - Read and extract text from PDFs
- TOOL_CALL: analyze_image(image_path="image.jpg", question="what to analyze") - Analyze images with vision
- TOOL_CALL: calculator(expression="2+2*3") - Perform mathematical calculations

🎯 GAIA BENCHMARK INSTRUCTIONS:
1. For research questions, ALWAYS use web_search first to get current information
2. If files are mentioned or task IDs given, use download_file then read_pdf/analyze_image
3. For multi-step problems, break down systematically and use tools in logical order
4. For image questions, use analyze_image with specific question about what to find
5. CRITICAL: Provide DIRECT, CONCISE answers ONLY - no explanations or reasoning
6. Format response as just the final answer - nothing else

Question: {question}

Think step by step about what tools you need, use them, then provide ONLY the final answer."""

        conversation_history = [
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": question}
        ]
        
        # Iterative tool calling loop
        for iteration in range(max_iterations):
            try:
                client_info = self.clients.get(model_name)
                if not client_info:
                    logger.warning(f"⚠️ Model {model_name} unavailable, using fallback")
                    return self._fallback_response(question)
                
                # Get AI response
                if "openai" in model_name:
                    response = client_info["client"].chat.completions.create(
                        model=client_info["model"],
                        messages=conversation_history,
                        max_tokens=1500,
                        temperature=0.0
                    )
                    ai_response = response.choices[0].message.content
                else:
                    response = client_info["client"].chat_completion(
                        messages=conversation_history,
                        max_tokens=1500,
                        temperature=0.0
                    )
                    ai_response = response.choices[0].message.content
                
                logger.info(f"🤖 AI Response (iteration {iteration + 1}): {ai_response[:200]}...")
                
                # Check for tool calls
                tool_calls = self.parse_tool_calls(ai_response)
                
                if tool_calls:
                    # Execute tools and collect results
                    tool_results = []
                    for tool_call in tool_calls:
                        result = self.execute_tool_call(tool_call)
                        tool_results.append(f"Tool {tool_call.tool.value}: {result}")
                    
                    # Add tool results to conversation
                    conversation_history.append({"role": "assistant", "content": ai_response})
                    
                    tool_context = f"TOOL RESULTS:\n" + "\n\n".join(tool_results)
                    tool_context += f"\n\nBased on these tool results, provide the final answer to: {question}\nProvide ONLY the direct answer - no explanations:"
                    
                    conversation_history.append({"role": "user", "content": tool_context})
                    
                    logger.info(f"🔧 Executed {len(tool_calls)} tools, continuing to iteration {iteration + 2}")
                    
                else:
                    # No tools needed, extract final answer
                    final_answer = self._extract_final_answer(ai_response)
                    logger.info(f"✅ Final answer extracted: {final_answer}")
                    return final_answer
                    
            except Exception as e:
                logger.error(f"❌ Query iteration {iteration + 1} failed for {model_name}: {e}")
                
                # Try next model in priority list
                current_index = self.model_priority.index(model_name) if model_name in self.model_priority else 0
                if current_index + 1 < len(self.model_priority):
                    model_name = self.model_priority[current_index + 1]
                    logger.info(f"🔄 Switching to model: {model_name}")
                else:
                    break
        
        # Final attempt with tool results if we have them
        if len(conversation_history) > 2:
            try:
                client_info = self.clients.get(model_name)
                if client_info:
                    if "openai" in model_name:
                        final_response = client_info["client"].chat.completions.create(
                            model=client_info["model"],
                            messages=conversation_history,
                            max_tokens=300,
                            temperature=0.0
                        )
                        final_answer = final_response.choices[0].message.content
                    else:
                        final_response = client_info["client"].chat_completion(
                            messages=conversation_history,
                            max_tokens=300,
                            temperature=0.0
                        )
                        final_answer = final_response.choices[0].message.content
                    
                    return self._extract_final_answer(final_answer)
            except Exception as e:
                logger.error(f"❌ Final answer extraction failed: {e}")
        
        # Ultimate fallback
        logger.warning(f"⚠️ Using fallback response for: {question}")
        return self._fallback_response(question)
    
    def _extract_final_answer(self, response: str) -> str:
        """✨ Ultra-aggressive answer extraction for perfect GAIA compliance"""
        if not response:
            return "Unknown"
        
        logger.info(f"✨ Extracting final answer from: {response[:100]}...")
        
        # Remove tool calls completely
        response = re.sub(r'TOOL_CALL:.*?\n', '', response, flags=re.DOTALL)
        response = re.sub(r'<tool>.*?</tool>', '', response, flags=re.DOTALL | re.IGNORECASE)
        response = re.sub(r'<params>.*?</params>', '', response, flags=re.DOTALL | re.IGNORECASE)
        
        # Remove thinking blocks aggressively
        response = re.sub(r'<think>.*?</think>', '', response, flags=re.DOTALL | re.IGNORECASE)
        response = re.sub(r'\*\*Think\*\*.*?\*\*Answer\*\*', '', response, flags=re.DOTALL | re.IGNORECASE)
        
        # Remove reasoning phrases more comprehensively
        reasoning_patterns = [
            r'let me.*?[.!?]\s*',
            r'i need to.*?[.!?]\s*',
            r'first,?\s*i.*?[.!?]\s*',
            r'to solve this.*?[.!?]\s*',
            r'based on.*?[,.]?\s*',
            r'the answer is[:\s]*',
            r'therefore[,:\s]*',
            r'so[,:\s]*the answer[,:\s]*',
            r'thus[,:\s]*',
            r'in conclusion[,:\s]*',
            r'after.*?analysis[,:\s]*',
            r'from.*?search[,:\s]*'
        ]
        
        for pattern in reasoning_patterns:
            response = re.sub(pattern, '', response, flags=re.IGNORECASE)
        
        # Extract core answer patterns
        answer_patterns = [
            r'(?:answer|result)[:\s]*([^\n.!?]+)',
            r'(?:final|conclusion)[:\s]*([^\n.!?]+)',
            r'^([A-Z][^.!?]*)',  # First capitalized sentence
            r'(\d+(?:\.\d+)?)',   # Numbers
            r'([A-Z][a-z]+(?:\s+[A-Z][a-z]+)?)'  # Proper nouns
        ]
        
        for pattern in answer_patterns:
            match = re.search(pattern, response, re.IGNORECASE)
            if match:
                answer = match.group(1).strip()
                if len(answer) > 2:  # Avoid single characters
                    return self._clean_final_answer(answer)
        
        # Take the last substantial line
        lines = [line.strip() for line in response.split('\n') if line.strip()]
        if lines:
            # Filter out obvious non-answers
            for line in reversed(lines):
                if len(line) > 2 and not any(word in line.lower() for word in ['tool', 'search', 'analysis', 'extract']):
                    return self._clean_final_answer(line)
        
        # Final cleanup of the entire response
        return self._clean_final_answer(response.strip())
    
    def _clean_final_answer(self, answer: str) -> str:
        """🧹 Final answer cleaning for GAIA API submission"""
        if not answer:
            return "Unknown"
            
        # Remove common prefixes/suffixes
        prefixes = ['answer:', 'result:', 'final:', 'conclusion:', 'the answer is', 'it is', 'this is']
        for prefix in prefixes:
            if answer.lower().startswith(prefix):
                answer = answer[len(prefix):].strip()
        
        # Remove trailing punctuation except necessary ones
        answer = answer.strip('.,!?;: ')
        
        # Remove quotes if they wrap the entire answer
        if (answer.startswith('"') and answer.endswith('"')) or (answer.startswith("'") and answer.endswith("'")):
            answer = answer[1:-1]
        
        return answer.strip()
    
    def _fallback_response(self, question: str) -> str:
        """🛡️ Enhanced fallback responses optimized for GAIA benchmark"""
        question_lower = question.lower()
        logger.info(f"🛡️ Using enhanced fallback for: {question[:50]}...")
        
        # Enhanced mathematical operations
        if any(word in question_lower for word in ['calculate', 'compute', 'math', '+', '-', '*', '/', 'sum', 'product']):
            numbers = re.findall(r'-?\d+(?:\.\d+)?', question)
            if len(numbers) >= 2:
                try:
                    a, b = float(numbers[0]), float(numbers[1])
                    if '+' in question or 'add' in question_lower or 'sum' in question_lower:
                        return str(int(a + b) if (a + b).is_integer() else a + b)
                    elif '-' in question or 'subtract' in question_lower or 'minus' in question_lower:
                        return str(int(a - b) if (a - b).is_integer() else a - b)
                    elif '*' in question or 'multiply' in question_lower or 'times' in question_lower or 'product' in question_lower:
                        return str(int(a * b) if (a * b).is_integer() else a * b)
                    elif '/' in question or 'divide' in question_lower:
                        return str(int(a / b) if (a / b).is_integer() else round(a / b, 6))
                except:
                    pass
        
        # Enhanced geography and capitals
        if any(word in question_lower for word in ['capital', 'country', 'city']):
            capitals = {
                'france': 'Paris', 'germany': 'Berlin', 'italy': 'Rome', 'spain': 'Madrid',
                'japan': 'Tokyo', 'china': 'Beijing', 'usa': 'Washington D.C.', 'united states': 'Washington D.C.',
                'uk': 'London', 'united kingdom': 'London', 'canada': 'Ottawa', 'australia': 'Canberra',
                'brazil': 'Brasília', 'india': 'New Delhi', 'russia': 'Moscow', 'mexico': 'Mexico City'
            }
            for country, capital in capitals.items():
                if country in question_lower:
                    return capital
        
        # Enhanced political and current affairs
        if 'president' in question_lower:
            if any(country in question_lower for country in ['united states', 'usa', 'america']):
                return 'Joe Biden'
            elif 'france' in question_lower:
                return 'Emmanuel Macron'
            elif 'russia' in question_lower:
                return 'Vladimir Putin'
        
        # Enhanced counting questions
        if 'how many' in question_lower:
            counting_map = {
                'planets': '8', 'continents': '7', 'days in year': '365', 'days in week': '7',
                'months': '12', 'seasons': '4', 'oceans': '5', 'great lakes': '5'
            }
            for item, count in counting_map.items():
                if item in question_lower:
                    return count
        
        # Enhanced scientific formulas
        if 'chemical formula' in question_lower or 'formula' in question_lower:
            formulas = {
                'water': 'H2O', 'carbon dioxide': 'CO2', 'methane': 'CH4', 'ammonia': 'NH3',
                'salt': 'NaCl', 'sugar': 'C12H22O11', 'alcohol': 'C2H5OH', 'oxygen': 'O2'
            }
            for compound, formula in formulas.items():
                if compound in question_lower:
                    return formula
        
        # Enhanced units and conversions
        if any(word in question_lower for word in ['meter', 'kilogram', 'second', 'celsius', 'fahrenheit']):
            if 'freezing point' in question_lower and 'water' in question_lower:
                if 'celsius' in question_lower:
                    return '0'
                elif 'fahrenheit' in question_lower:
                    return '32'
        
        # Enhanced colors and basic facts
        if 'color' in question_lower or 'colour' in question_lower:
            if 'sun' in question_lower:
                return 'yellow'
            elif 'grass' in question_lower:
                return 'green'
            elif 'sky' in question_lower:
                return 'blue'
        
        # GAIA-specific fallback for research questions
        if any(word in question_lower for word in ['when', 'where', 'who', 'what', 'which', 'how']):
            return "Information not available without web search"
        
        # Default fallback with instruction
        return "Unable to determine answer without additional tools"
    
    def cleanup(self):
        """🧹 Cleanup temporary resources"""
        self.toolkit.cleanup_temp_files()

# Backward compatibility aliases
class MultiModelGAIASystem(EnhancedMultiModelGAIASystem):
    """Alias for backward compatibility"""
    pass

def create_gaia_system(hf_token: str = None, openai_key: str = None) -> EnhancedMultiModelGAIASystem:
    """🚀 Create an enhanced GAIA system with all advanced capabilities"""
    return EnhancedMultiModelGAIASystem(hf_token=hf_token, openai_key=openai_key)

class BasicAgent:
    """🤖 GAIA-compatible agent interface with comprehensive tool calling"""
    
    def __init__(self, hf_token: str = None, openai_key: str = None):
        self.system = create_gaia_system(hf_token, openai_key)
        logger.info("🤖 BasicAgent with enhanced GAIA capabilities initialized")
    
    def query(self, question: str) -> str:
        """Process GAIA question with full tool calling support"""
        try:
            result = self.system.query_with_tools(question)
            return result
        except Exception as e:
            logger.error(f"❌ Agent query failed: {e}")
            return self.system._fallback_response(question)
    
    def clean_for_api_submission(self, response: str) -> str:
        """Clean response for GAIA API submission"""
        return self.system._extract_final_answer(response)
    
    def __call__(self, question: str) -> str:
        """Callable interface for backward compatibility"""
        return self.query(question)
    
    def cleanup(self):
        """Cleanup resources"""
        self.system.cleanup()

# Test function for comprehensive validation
def test_enhanced_gaia_system():
    """🧪 Test the enhanced GAIA system with tool calling"""
    print("🧪 Testing Enhanced GAIA System with Tool Calling")
    
    # Initialize the system
    agent = BasicAgent()
    
    # Test questions requiring different tools
    test_questions = [
        "What is 15 + 27?",  # Calculator
        "What is the capital of France?",  # Fallback knowledge
        "Search for the current weather in Paris",  # Web search
        "How many planets are in our solar system?",  # Fallback knowledge
        "What is 2 * 3 + 4?",  # Calculator
    ]
    
    print("\n" + "="*50)
    print("🎯 ENHANCED GAIA COMPLIANCE TEST")
    print("="*50)
    
    for question in test_questions:
        print(f"\nQ: {question}")
        response = agent.query(question)
        print(f"A: {response}")  # Should be clean, direct answers with tool usage
    
    # Cleanup
    agent.cleanup()
    print("\n✅ Enhanced GAIA system test complete!")

if __name__ == "__main__":
    test_enhanced_gaia_system()