Final_Assignment_Template

Configuration error

App Files Files Community

Orel MAZOR commited on 11 days ago

Commit

e4bc671

1 Parent(s): 0d0725b

Commit 1

Browse files

Files changed (10) hide show

.DS_Store +0 -0
.gradio/certificate.pem +31 -0
.vscode/launch.json +15 -0
__pycache__/agent.cpython-311.pyc +0 -0
__pycache__/agent2.cpython-311.pyc +0 -0
agent.py +1 -132
agent2.py +154 -288
app.py +2 -2
appasync.py +210 -0
custom_models.py +404 -0

.DS_Store ADDED Viewed

Binary file (6.15 kB). View file

.gradio/certificate.pem ADDED Viewed

	@@ -0,0 +1,31 @@

+-----BEGIN CERTIFICATE-----
+MIIFazCCA1OgAwIBAgIRAIIQz7DSQONZRGPgu2OCiwAwDQYJKoZIhvcNAQELBQAw
+TzELMAkGA1UEBhMCVVMxKTAnBgNVBAoTIEludGVybmV0IFNlY3VyaXR5IFJlc2Vh
+cmNoIEdyb3VwMRUwEwYDVQQDEwxJU1JHIFJvb3QgWDEwHhcNMTUwNjA0MTEwNDM4
+WhcNMzUwNjA0MTEwNDM4WjBPMQswCQYDVQQGEwJVUzEpMCcGA1UEChMgSW50ZXJu
+ZXQgU2VjdXJpdHkgUmVzZWFyY2ggR3JvdXAxFTATBgNVBAMTDElTUkcgUm9vdCBY
+MTCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBAK3oJHP0FDfzm54rVygc
+h77ct984kIxuPOZXoHj3dcKi/vVqbvYATyjb3miGbESTtrFj/RQSa78f0uoxmyF+
+0TM8ukj13Xnfs7j/EvEhmkvBioZxaUpmZmyPfjxwv60pIgbz5MDmgK7iS4+3mX6U
+A5/TR5d8mUgjU+g4rk8Kb4Mu0UlXjIB0ttov0DiNewNwIRt18jA8+o+u3dpjq+sW
+T8KOEUt+zwvo/7V3LvSye0rgTBIlDHCNAymg4VMk7BPZ7hm/ELNKjD+Jo2FR3qyH
+B5T0Y3HsLuJvW5iB4YlcNHlsdu87kGJ55tukmi8mxdAQ4Q7e2RCOFvu396j3x+UC
+B5iPNgiV5+I3lg02dZ77DnKxHZu8A/lJBdiB3QW0KtZB6awBdpUKD9jf1b0SHzUv
+KBds0pjBqAlkd25HN7rOrFleaJ1/ctaJxQZBKT5ZPt0m9STJEadao0xAH0ahmbWn
+OlFuhjuefXKnEgV4We0+UXgVCwOPjdAvBbI+e0ocS3MFEvzG6uBQE3xDk3SzynTn
+jh8BCNAw1FtxNrQHusEwMFxIt4I7mKZ9YIqioymCzLq9gwQbooMDQaHWBfEbwrbw
+qHyGO0aoSCqI3Haadr8faqU9GY/rOPNk3sgrDQoo//fb4hVC1CLQJ13hef4Y53CI
+rU7m2Ys6xt0nUW7/vGT1M0NPAgMBAAGjQjBAMA4GA1UdDwEB/wQEAwIBBjAPBgNV
+HRMBAf8EBTADAQH/MB0GA1UdDgQWBBR5tFnme7bl5AFzgAiIyBpY9umbbjANBgkq
+hkiG9w0BAQsFAAOCAgEAVR9YqbyyqFDQDLHYGmkgJykIrGF1XIpu+ILlaS/V9lZL
+ubhzEFnTIZd+50xx+7LSYK05qAvqFyFWhfFQDlnrzuBZ6brJFe+GnY+EgPbk6ZGQ
+3BebYhtF8GaV0nxvwuo77x/Py9auJ/GpsMiu/X1+mvoiBOv/2X/qkSsisRcOj/KK
+NFtY2PwByVS5uCbMiogziUwthDyC3+6WVwW6LLv3xLfHTjuCvjHIInNzktHCgKQ5
+ORAzI4JMPJ+GslWYHb4phowim57iaztXOoJwTdwJx4nLCgdNbOhdjsnvzqvHu7Ur
+TkXWStAmzOVyyghqpZXjFaH3pO3JLF+l+/+sKAIuvtd7u+Nxe5AW0wdeRlN8NwdC
+jNPElpzVmbUq4JUagEiuTDkHzsxHpFKVK7q4+63SM1N95R1NbdWhscdCb+ZAJzVc
+oyi3B43njTOQ5yOf+1CceWxG1bQVs5ZufpsMljq4Ui0/1lvh+wjChP4kqKOJ2qxq
+4RgqsahDYVvTH9w7jXbyLeiNdd8XM2w9U/t7y0Ff/9yi0GE44Za4rF2LN9d11TPA
+mRGunUHBcnWEvgJBQl9nJEiU0Zsnvgc/ubhPgXRR4Xq37Z0j4r7g1SgEEzwxA57d
+emyPxgcYxn/eR44/KJ4EBs+lVDR3veyJm+kXQ99b21/+jh5Xos1AnX5iItreGCc=
+-----END CERTIFICATE-----

.vscode/launch.json ADDED Viewed

	@@ -0,0 +1,15 @@

+{
+    // Use IntelliSense to learn about possible attributes.
+    // Hover to view descriptions of existing attributes.
+    // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
+    "version": "0.2.0",
+    "configurations": [
+        {
+            "name": "Python Debugger: Current File",
+            "type": "debugpy",
+            "request": "launch",
+            "program": "${file}",
+            "console": "integratedTerminal"
+        }
+    ]
+}

__pycache__/agent.cpython-311.pyc ADDED Viewed

Binary file (49.5 kB). View file

__pycache__/agent2.cpython-311.pyc ADDED Viewed

Binary file (19.7 kB). View file

agent.py CHANGED Viewed

@@ -10,6 +10,7 @@ import asyncio
 # Third-party imports
 import requests
 from transformers import AutoModelForCausalLM, AutoTokenizer
 # LlamaIndex core imports
 from llama_index.core import VectorStoreIndex, Document, Settings
@@ -120,140 +121,8 @@ def initialize_models(use_api_mode=False):
         print("Initializing models in non-API mode with local models...")
         try :
-            from typing import Optional, List, Any
-            from pydantic import Field, PrivateAttr
-            from llama_index.core.llms import CustomLLM, CompletionResponse, CompletionResponseGen, LLMMetadata
-            from llama_index.core.llms.callbacks import llm_completion_callback
-            from transformers import Qwen2_5_VLForConditionalGeneration, AutoProcessor
-            from qwen_vl_utils import process_vision_info
-            import torch
-            class QwenVL7BCustomLLM(CustomLLM):
-                model_name: str = Field(default="Qwen/Qwen2.5-VL-7B-Instruct")
-                context_window: int = Field(default=32768)
-                num_output: int = Field(default=256)
-                _model = PrivateAttr()
-                _processor = PrivateAttr()
-                def __init__(self, **kwargs):
-                    super().__init__(**kwargs)
-                    self._model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
-                        self.model_name, torch_dtype=torch.bfloat16, device_map='balanced'
-                    )
-                    self._processor = AutoProcessor.from_pretrained(self.model_name)
-                @property
-                def metadata(self) -> LLMMetadata:
-                    return LLMMetadata(
-                        context_window=self.context_window,
-                        num_output=self.num_output,
-                        model_name=self.model_name,
-                    )
-                @llm_completion_callback()
-                def complete(
-                    self,
-                    prompt: str,
-                    image_paths: Optional[List[str]] = None,
-                    **kwargs: Any
-                ) -> CompletionResponse:
-                    # Prepare multimodal input
-                    messages = [{"role": "user", "content": []}]
-                    if image_paths:
-                        for path in image_paths:
-                            messages[0]["content"].append({"type": "image", "image": path})
-                    messages[0]["content"].append({"type": "text", "text": prompt})
-                    # Tokenize and process
-                    text = self._processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
-                    image_inputs, video_inputs = process_vision_info(messages)
-                    inputs = self._processor(
-                        text=[text],
-                        images=image_inputs,
-                        videos=video_inputs,
-                        padding=True,
-                        return_tensors="pt",
-                    )
-                    inputs = inputs.to(self._model.device)
-                    # Generate output
-                    generated_ids = self._model.generate(**inputs, max_new_tokens=self.num_output)
-                    generated_ids_trimmed = [out_ids[len(in_ids):] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)]
-                    output_text = self._processor.batch_decode(
-                        generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
-                    )[0]
-                    return CompletionResponse(text=output_text)
-                @llm_completion_callback()
-                def stream_complete(
-                    self,
-                    prompt: str,
-                    image_paths: Optional[List[str]] = None,
-                    **kwargs: Any
-                ) -> CompletionResponseGen:
-                    response = self.complete(prompt, image_paths)
-                    for token in response.text:
-                        yield CompletionResponse(text=token, delta=token)
             proj_llm = QwenVL7BCustomLLM()
-            from typing import Any, List, Optional
-            from llama_index.core.embeddings import BaseEmbedding
-            from sentence_transformers import SentenceTransformer
-            from PIL import Image
-            class MultimodalCLIPEmbedding(BaseEmbedding):
-                """
-                Custom embedding class using CLIP for multimodal capabilities.
-                """
-                def __init__(self, model_name: str = "clip-ViT-B-32", **kwargs: Any) -> None:
-                    super().__init__(**kwargs)
-                    self._model = SentenceTransformer(model_name)
-                @classmethod
-                def class_name(cls) -> str:
-                    return "multimodal_clip"
-                def _get_query_embedding(self, query: str, image_path: Optional[str] = None) -> List[float]:
-                    if image_path:
-                        image = Image.open(image_path)
-                        embedding = self._model.encode(image)
-                        return embedding.tolist()
-                    else:
-                        embedding = self._model.encode(query)
-                        return embedding.tolist()
-                def _get_text_embedding(self, text: str, image_path: Optional[str] = None) -> List[float]:
-                    if image_path:
-                        image = Image.open(image_path)
-                        embedding = self._model.encode(image)
-                        return embedding.tolist()
-                    else:
-                        embedding = self._model.encode(text)
-                        return embedding.tolist()
-                def _get_text_embeddings(self, texts: List[str], image_paths: Optional[List[str]] = None) -> List[List[float]]:
-                    embeddings = []
-                    image_paths = image_paths or [None] * len(texts)
-                    for text, img_path in zip(texts, image_paths):
-                        if img_path:
-                            image = Image.open(img_path)
-                            emb = self._model.encode(image)
-                        else:
-                            emb = self._model.encode(text)
-                        embeddings.append(emb.tolist())
-                    return embeddings
-                async def _aget_query_embedding(self, query: str, image_path: Optional[str] = None) -> List[float]:
-                    return self._get_query_embedding(query, image_path)
-                async def _aget_text_embedding(self, text: str, image_path: Optional[str] = None) -> List[float]:
-                    return self._get_text_embedding(text, image_path)
             embed_model = MultimodalCLIPEmbedding()
             embed_model.max_seq_length = 1024

 # Third-party imports
 import requests
 from transformers import AutoModelForCausalLM, AutoTokenizer
+from custom_models import QwenVL7BCustomLLM, BaaiMultimodalEmbedding
 # LlamaIndex core imports
 from llama_index.core import VectorStoreIndex, Document, Settings
         print("Initializing models in non-API mode with local models...")
         try :
             proj_llm = QwenVL7BCustomLLM()
             embed_model = MultimodalCLIPEmbedding()
             embed_model.max_seq_length = 1024

agent2.py CHANGED Viewed

@@ -5,16 +5,8 @@ from typing import Dict, Any, List
 from langchain.docstore.document import Document
 from langchain.text_splitter import RecursiveCharacterTextSplitter
 from langchain_community.retrievers import BM25Retriever
-from smolagents import CodeAgent, OpenAIServerModel, tool, Tool
-from smolagents.vision_web_browser import initialize_driver, save_screenshot, helium_instructions
-from smolagents.agents import ActionStep
-from selenium import webdriver
-from selenium.webdriver.common.by import By
-from selenium.webdriver.common.keys import Keys
-import helium
-from PIL import Image
-from io import BytesIO
-from time import sleep
 # Langfuse observability imports
 from opentelemetry.sdk.trace import TracerProvider
@@ -22,10 +14,101 @@ from openinference.instrumentation.smolagents import SmolagentsInstrumentor
 from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter
 from opentelemetry.sdk.trace.export import SimpleSpanProcessor
 from opentelemetry import trace
-from opentelemetry.trace import format_trace_id
 from langfuse import Langfuse
 class BM25RetrieverTool(Tool):
     """
     BM25 retriever tool for document search when text documents are available
@@ -59,126 +142,6 @@ class BM25RetrieverTool(Tool):
             for i, doc in enumerate(docs)
         ])
-@tool
-def search_item_ctrl_f(text: str, nth_result: int = 1) -> str:
-    """Search for text on the current page via Ctrl + F and jump to the nth occurrence.
-    Args:
-        text: The text string to search for on the webpage
-        nth_result: Which occurrence to jump to (default is 1 for first occurrence)
-    Returns:
-        str: Result of the search operation with match count and navigation status
-    """
-    try:
-        driver = helium.get_driver()
-        elements = driver.find_elements(By.XPATH, f"//*[contains(text(), '{text}')]")
-        if nth_result > len(elements):
-            return f"Match n°{nth_result} not found (only {len(elements)} matches found)"
-        result = f"Found {len(elements)} matches for '{text}'."
-        elem = elements[nth_result - 1]
-        driver.execute_script("arguments[0].scrollIntoView(true);", elem)
-        result += f"Focused on element {nth_result} of {len(elements)}"
-        return result
-    except Exception as e:
-        return f"Error searching for text: {e}"
-@tool
-def go_back() -> str:
-    """Navigate back to the previous page in browser history.
-    Returns:
-        str: Confirmation message or error description
-    """
-    try:
-        driver = helium.get_driver()
-        driver.back()
-        return "Navigated back to previous page"
-    except Exception as e:
-        return f"Error going back: {e}"
-@tool
-def close_popups() -> str:
-    """Close any visible modal or pop-up on the page by sending ESC key.
-    Returns:
-        str: Confirmation message or error description
-    """
-    try:
-        driver = helium.get_driver()
-        webdriver.ActionChains(driver).send_keys(Keys.ESCAPE).perform()
-        return "Attempted to close popups"
-    except Exception as e:
-        return f"Error closing popups: {e}"
-@tool
-def scroll_page(direction: str = "down", amount: int = 3) -> str:
-    """Scroll the webpage in the specified direction.
-    Args:
-        direction: Direction to scroll, either 'up' or 'down'
-        amount: Number of scroll actions to perform
-    Returns:
-        str: Confirmation message or error description
-    """
-    try:
-        driver = helium.get_driver()
-        for _ in range(amount):
-            if direction.lower() == "down":
-                driver.execute_script("window.scrollBy(0, 300);")
-            elif direction.lower() == "up":
-                driver.execute_script("window.scrollBy(0, -300);")
-            sleep(0.5)
-        return f"Scrolled {direction} {amount} times"
-    except Exception as e:
-        return f"Error scrolling: {e}"
-@tool
-def get_page_text() -> str:
-    """Extract all visible text from the current webpage.
-    Returns:
-        str: The visible text content of the page
-    """
-    try:
-        driver = helium.get_driver()
-        text = driver.find_element(By.TAG_NAME, "body").text
-        return f"Page text (first 2000 chars): {text[:2000]}"
-    except Exception as e:
-        return f"Error getting page text: {e}"
-def save_screenshot_callback(memory_step: ActionStep, agent: CodeAgent) -> None:
-    """Save screenshots for web browser automation"""
-    try:
-        sleep(1.0)
-        driver = helium.get_driver()
-        if driver is not None:
-            # Clean up old screenshots
-            for previous_memory_step in agent.memory.steps:
-                if isinstance(previous_memory_step, ActionStep) and previous_memory_step.step_number <= memory_step.step_number - 2:
-                    previous_memory_step.observations_images = None
-            png_bytes = driver.get_screenshot_as_png()
-            image = Image.open(BytesIO(png_bytes))
-            memory_step.observations_images = [image.copy()]
-            # Update observations with current URL
-            url_info = f"Current url: {driver.current_url}"
-            memory_step.observations = (
-                url_info if memory_step.observations is None
-                else memory_step.observations + "\n" + url_info
-            )
-    except Exception as e:
-        print(f"Error in screenshot callback: {e}")
 class GAIAAgent:
     """
     GAIA agent using smolagents with Gemini 2.0 Flash and Langfuse observability
@@ -200,6 +163,8 @@ class GAIAAgent:
             model_id="gemini-2.0-flash",
             api_base="https://generativelanguage.googleapis.com/v1beta/openai/",
             api_key=gemini_api_key,
         )
         # Store user and session IDs for tracking
@@ -207,26 +172,17 @@ class GAIAAgent:
         self.session_id = session_id or "gaia-session"
         # GAIA system prompt from the leaderboard
-        self.system_prompt = """You are a general AI assistant. I will ask you a question. Report your thoughts and reasoning process clearly. You should use the available tools to gather information and solve problems step by step.
-When using web browser automation:
-- Use helium commands like go_to(), click(), scroll_down()
-- Take screenshots to see what's happening
-- Handle popups and forms appropriately
-- Be patient with page loading
-For document retrieval:
-- Use the BM25 retriever when there are text documents attached
-- Search with relevant keywords from the question
-Your final answer should be as few words as possible, a number, or a comma-separated list. Don't use articles, abbreviations, or units unless specified."""
         # Initialize retriever tool (will be updated when documents are loaded)
         self.retriever_tool = BM25RetrieverTool()
-        # Initialize web driver for browser automation
-        self.driver = None
         # Create the agent
         self.agent = None
         self._create_agent()
@@ -234,6 +190,13 @@ Your final answer should be as few words as possible, a number, or a comma-separ
         # Initialize Langfuse client
         self.langfuse = Langfuse()
     def _setup_langfuse_observability(self):
         """Set up Langfuse observability with OpenTelemetry"""
         # Get Langfuse keys from environment variables
@@ -271,48 +234,17 @@ Your final answer should be as few words as possible, a number, or a comma-separ
         """Create the CodeAgent with tools"""
         base_tools = [
             self.retriever_tool,
-            search_item_ctrl_f,
-            go_back,
-            close_popups,
-            scroll_page,
-            get_page_text
         ]
         self.agent = CodeAgent(
-            tools=base_tools,
             model=self.model,
-            add_base_tools=True,
-            planning_interval=3,
-            additional_authorized_imports=["helium", "requests", "BeautifulSoup", "json"],
-            step_callbacks=[save_screenshot_callback] if self.driver else [],
-            max_steps=5,
-            description=self.system_prompt,
-            verbosity_level=2,
-        )
-    def initialize_browser(self):
-        """Initialize browser for web automation tasks"""
-        try:
-            chrome_options = webdriver.ChromeOptions()
-            chrome_options.add_argument("--force-device-scale-factor=1")
-            chrome_options.add_argument("--window-size=1000,1350")
-            chrome_options.add_argument("--disable-pdf-viewer")
-            chrome_options.add_argument("--window-position=0,0")
-            chrome_options.add_argument("--no-sandbox")
-            chrome_options.add_argument("--disable-dev-shm-usage")
-            self.driver = helium.start_chrome(headless=False, options=chrome_options)
-            # Recreate agent with browser tools
-            self._create_agent()
-            # Import helium for the agent
-            self.agent.python_executor("from helium import *")
-            return True
-        except Exception as e:
-            print(f"Failed to initialize browser: {e}")
-            return False
     def load_documents_from_file(self, file_path: str):
         """Load and process documents from a file for BM25 retrieval"""
@@ -375,35 +307,22 @@ Your final answer should be as few words as possible, a number, or a comma-separ
         if task_id:
             trace_tags.append(f"task-{task_id}")
-        # Start Langfuse trace with OpenTelemetry
-        with self.tracer.start_as_current_span("GAIA-Question-Solving") as span:
             try:
-                # Set span attributes for tracking
-                span.set_attribute("langfuse.user.id", self.user_id)
-                span.set_attribute("langfuse.session.id", self.session_id)
-                span.set_attribute("langfuse.tags", trace_tags)
-                span.set_attribute("gaia.task_id", task_id)
-                span.set_attribute("gaia.question_length", len(question))
-                # Get trace ID for Langfuse linking
-                current_span = trace.get_current_span()
-                span_context = current_span.get_span_context()
-                trace_id = span_context.trace_id
-                formatted_trace_id = format_trace_id(trace_id)
-                # Create Langfuse trace
-                langfuse_trace = self.langfuse.trace(
-                    id=formatted_trace_id,
-                    name="GAIA Question Solving",
-                    input={"question": question, "task_id": task_id},
                     user_id=self.user_id,
                     session_id=self.session_id,
-                    tags=trace_tags,
-                    metadata={
-                        "model": self.model.model_id,
-                        "question_length": len(question),
-                        "has_file": bool(task_id)
-                    }
                 )
                 # Download and load file if task_id provided
@@ -412,47 +331,22 @@ Your final answer should be as few words as possible, a number, or a comma-separ
                     file_path = self.download_gaia_file(task_id)
                     if file_path:
                         file_loaded = self.load_documents_from_file(file_path)
-                        span.set_attribute("gaia.file_loaded", file_loaded)
                         print(f"Loaded file for task {task_id}")
-                # Check if this requires web browsing
-                web_indicators = ["navigate", "browser", "website", "webpage", "url", "click", "search on"]
-                needs_browser = any(indicator in question.lower() for indicator in web_indicators)
-                span.set_attribute("gaia.needs_browser", needs_browser)
-                if needs_browser and not self.driver:
-                    print("Initializing browser for web automation...")
-                    browser_initialized = self.initialize_browser()
-                    span.set_attribute("gaia.browser_initialized", browser_initialized)
                 # Prepare the prompt
                 prompt = f"""
-Question: {question}
-{f'Task ID: {task_id}' if task_id else ''}
-{f'File loaded: Yes' if file_loaded else 'File loaded: No'}
-Solve this step by step. Use the available tools to gather information and provide a precise answer.
                 """
-                if needs_browser:
-                    prompt += "\n" + helium_instructions
                 print("=== AGENT REASONING ===")
                 result = self.agent.run(prompt)
                 print("=== END REASONING ===")
-                # Update Langfuse trace with result
-                langfuse_trace.update(
-                    output={"answer": str(result)},
-                    end_time=None  # Will be set automatically
-                )
-                # Add success attributes
-                span.set_attribute("gaia.success", True)
-                span.set_attribute("gaia.answer_length", len(str(result)))
-                # Flush Langfuse data
-                self.langfuse.flush()
                 return str(result)
@@ -460,26 +354,14 @@ Solve this step by step. Use the available tools to gather information and provi
                 error_msg = f"Error processing question: {str(e)}"
                 print(error_msg)
-                # Log error to span and Langfuse
-                span.set_attribute("gaia.success", False)
-                span.set_attribute("gaia.error", str(e))
-                if 'langfuse_trace' in locals():
-                    langfuse_trace.update(
-                        output={"error": error_msg},
-                        level="ERROR"
-                    )
-                self.langfuse.flush()
                 return error_msg
-            finally:
-                # Clean up browser if initialized
-                if self.driver:
-                    try:
-                        helium.kill_browser()
-                    except:
-                        pass
     def evaluate_answer(self, question: str, answer: str, expected_answer: str = None) -> Dict[str, Any]:
         """
@@ -506,29 +388,20 @@ Provide your rating as JSON: {{"accuracy": X, "completeness": Y, "clarity": Z, "
             # Try to parse JSON response
             import json
-            try:
-                scores = json.loads(evaluation_result)
-                return scores
-            except:
-                # Fallback if JSON parsing fails
-                return {
-                    "accuracy": 3,
-                    "completeness": 3,
-                    "clarity": 3,
-                    "overall": 3,
-                    "reasoning": "Could not parse evaluation response",
-                    "raw_evaluation": evaluation_result
-                }
-        except Exception as e:
             return {
-                "accuracy": 1,
-                "completeness": 1,
-                "clarity": 1,
-                "overall": 1,
-                "reasoning": f"Evaluation failed: {str(e)}"
             }
     def add_user_feedback(self, trace_id: str, feedback_score: int, comment: str = None):
         """
         Add user feedback to a specific trace
@@ -566,7 +439,7 @@ if __name__ == "__main__":
     # Example question
     question_data = {
-        "Question": "How many studio albums Mercedes Sosa has published between 2000-2009?",
         "task_id": ""
     }
@@ -575,11 +448,4 @@ if __name__ == "__main__":
         question_data,
         tags=["music-question", "discography"]
     )
-    print(f"Answer: {answer}")
-    # Evaluate the answer
-    evaluation = agent.evaluate_answer(
-        question_data["Question"],
-        answer
-    )
-    print(f"Evaluation: {evaluation}")

 from langchain.docstore.document import Document
 from langchain.text_splitter import RecursiveCharacterTextSplitter
 from langchain_community.retrievers import BM25Retriever
+from smolagents import CodeAgent, OpenAIServerModel, Tool
+from smolagents import PythonInterpreterTool, SpeechToTextTool
 # Langfuse observability imports
 from opentelemetry.sdk.trace import TracerProvider
 from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter
 from opentelemetry.sdk.trace.export import SimpleSpanProcessor
 from opentelemetry import trace
 from langfuse import Langfuse
+from smolagents import SpeechToTextTool, PythonInterpreterTool
+import requests
+from markdownify import markdownify
+from requests.exceptions import RequestException
+from smolagents import tool
+import re
+from concurrent.futures import ThreadPoolExecutor, TimeoutError
+class WebSearchTool(Tool):
+    name = "web_search"
+    description = """Performs a duckduckgo web search based on your query (think a Google search) then returns the top search results."""
+    inputs = {"query": {"type": "string", "description": "The search query to perform."}}
+    output_type = "string"
+    def __init__(self, max_results=10, **kwargs):
+        super().__init__()
+        self.max_results = max_results
+        try:
+            from duckduckgo_search import DDGS
+        except ImportError as e:
+            raise ImportError(
+                "You must install package `duckduckgo_search` to run this tool: for instance run `pip install duckduckgo-search`."
+            ) from e
+        self.ddgs = DDGS(**kwargs)
+    def _perform_search(self, query: str):
+        """Internal method to perform the actual search."""
+        return self.ddgs.text(query, max_results=self.max_results)
+    def forward(self, query: str) -> str:
+        results = []
+        # First attempt with timeout
+        with ThreadPoolExecutor(max_workers=1) as executor:
+            try:
+                future = executor.submit(self._perform_search, query)
+                results = future.result(timeout=30)  # 30 second timeout
+            except TimeoutError:
+                print("First search attempt timed out after 30 seconds, retrying...")
+                results = []
+        # Retry if no results or timeout occurred
+        if len(results) == 0:
+            print("Retrying search...")
+            with ThreadPoolExecutor(max_workers=1) as executor:
+                try:
+                    future = executor.submit(self._perform_search, query)
+                    results = future.result(timeout=30)  # 30 second timeout for retry
+                except TimeoutError:
+                    raise Exception("Search timed out after 30 seconds on both attempts. Try a different query.")
+        # Final check for results
+        if len(results) == 0:
+            raise Exception("No results found after two attempts! Try a less restrictive/shorter query.")
+        postprocessed_results = [f"[{result['title']}]({result['href']})\n{result['body']}" for result in results]
+        return "## Search Results\n\n" + "\n\n".join(postprocessed_results)
+@tool
+def visit_webpage(url: str) -> str:
+    """Visits a webpage at the given URL and returns its content as a markdown string.
+    Args:
+        url: The URL of the webpage to visit.
+    Returns:
+        The content of the webpage converted to Markdown, or an error message if the request fails.
+    """
+    try:
+        # Send a GET request to the URL
+        response = requests.get(url)
+        response.raise_for_status()  # Raise an exception for bad status codes
+        # Parse the content as HTML with BeautifulSoup
+        from bs4 import BeautifulSoup
+        soup = BeautifulSoup(response.content, 'html.parser')
+        # Extract text and convert to Markdown
+        content = soup.get_text(separator="\n", strip=True)
+        markdown_content = markdownify(content)
+        # Clean up the markdown content
+        markdown_content = re.sub(r'\n+', '\n', markdown_content)  # Remove excessive newlines
+        markdown_content = re.sub(r'\s+', ' ', markdown_content)  # Remove excessive spaces
+        markdown_content = markdown_content.strip()  # Strip leading/trailing whitespace
+        return markdown_content
+    except RequestException as e:
+        return f"Error fetching the webpage: {str(e)}"
+    except Exception as e:
+        return f"An unexpected error occurred: {str(e)}"
 class BM25RetrieverTool(Tool):
     """
     BM25 retriever tool for document search when text documents are available
             for i, doc in enumerate(docs)
         ])
 class GAIAAgent:
     """
     GAIA agent using smolagents with Gemini 2.0 Flash and Langfuse observability
             model_id="gemini-2.0-flash",
             api_base="https://generativelanguage.googleapis.com/v1beta/openai/",
             api_key=gemini_api_key,
+            temperature=0.0,
+            top_p=1.0,
         )
         # Store user and session IDs for tracking
         self.session_id = session_id or "gaia-session"
         # GAIA system prompt from the leaderboard
+        self.system_prompt = """You are a general AI assistant. I will ask you a question. Report your thoughts, and finish your answer with the following template: FINAL ANSWER: [YOUR FINAL ANSWER]. YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.
+        IMPORTANT :
+        - When you need to find information in a document, use the BM25 retriever tool to search for relevant sections.
+        - When you need to find information in a visited web page, do not use the BM25 retriever tool, but instead use the visit_webpage tool to fetch the content of the page, and then use the retrieved content to answer the question.
+        - In the last step of your reasoning, if you think your reasoning is not able to answer the question, answer the question directy with your internal reasoning, without using the BM25 retriever tool or the visit_webpage tool.
+        """
         # Initialize retriever tool (will be updated when documents are loaded)
         self.retriever_tool = BM25RetrieverTool()
         # Create the agent
         self.agent = None
         self._create_agent()
         # Initialize Langfuse client
         self.langfuse = Langfuse()
+        from langfuse import get_client
+        self.langfuse = get_client()  # ✅ Use get_client() for v3
+        # Store user and session IDs for tracking
+        self.user_id = user_id or "gaia-user"
+        self.session_id = session_id or "gaia-session"
     def _setup_langfuse_observability(self):
         """Set up Langfuse observability with OpenTelemetry"""
         # Get Langfuse keys from environment variables
         """Create the CodeAgent with tools"""
         base_tools = [
             self.retriever_tool,
+            visit_webpage,
         ]
         self.agent = CodeAgent(
+            tools=base_tools + [
+                SpeechToTextTool(),
+                WebSearchTool(),
+                PythonInterpreterTool()],
             model=self.model,
+            description=self.system_prompt,
+            max_steps=6        )
     def load_documents_from_file(self, file_path: str):
         """Load and process documents from a file for BM25 retrieval"""
         if task_id:
             trace_tags.append(f"task-{task_id}")
+        # Use SDK v3 context manager approach
+        with self.langfuse.start_as_current_span(
+            name="GAIA-Question-Solving",
+            input={"question": question, "task_id": task_id},
+            metadata={
+                "model": self.model.model_id,
+                "question_length": len(question),
+                "has_file": bool(task_id)
+            }
+        ) as span:
             try:
+                # Set trace attributes using v3 syntax
+                span.update_trace(
                     user_id=self.user_id,
                     session_id=self.session_id,
+                    tags=trace_tags
                 )
                 # Download and load file if task_id provided
                     file_path = self.download_gaia_file(task_id)
                     if file_path:
                         file_loaded = self.load_documents_from_file(file_path)
                         print(f"Loaded file for task {task_id}")
                 # Prepare the prompt
                 prompt = f"""
+    Question: {question}
+    {f'Task ID: {task_id}' if task_id else ''}
+    {f'File loaded: Yes' if file_loaded else 'File loaded: No'}
                 """
                 print("=== AGENT REASONING ===")
                 result = self.agent.run(prompt)
                 print("=== END REASONING ===")
+                # Update span with result using v3 syntax
+                span.update(output={"answer": str(result)})
                 return str(result)
                 error_msg = f"Error processing question: {str(e)}"
                 print(error_msg)
+                # Log error using v3 syntax
+                span.update(
+                    output={"error": error_msg},
+                    level="ERROR"
+                )
                 return error_msg
     def evaluate_answer(self, question: str, answer: str, expected_answer: str = None) -> Dict[str, Any]:
         """
             # Try to parse JSON response
             import json
+            scores = json.loads(evaluation_result)
+            return scores
+        except json.JSONDecodeError:
+            # If JSON parsing fails, return a default structure
+            print("Failed to parse evaluation result as JSON. Returning default scores.")
             return {
+                "accuracy": 0,
+                "completeness": 0,
+                "clarity": 0,
+                "overall": 0,
+                "reasoning": "Could not parse evaluation result"
             }
     def add_user_feedback(self, trace_id: str, feedback_score: int, comment: str = None):
         """
         Add user feedback to a specific trace
     # Example question
     question_data = {
+        "Question": "How many studio albums Mercedes Sosa has published between 2000-2009? Search on the English Wikipedia webpage.",
         "task_id": ""
     }
         question_data,
         tags=["music-question", "discography"]
     )
+    print(f"Answer: {answer}")

app.py CHANGED Viewed

@@ -10,7 +10,7 @@ import pandas as pd
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 # Import your custom agent from agent.py
-from agent import EnhancedGAIAAgent
 # --- Basic Agent Definition ---
 # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
@@ -18,7 +18,7 @@ class BasicAgent:
     def __init__(self):
         print("BasicAgent initialized.")
         # Initialize your enhanced GAIA agent
-        self.gaia_agent = EnhancedGAIAAgent()
     async def __call__(self, question: str) -> str:
         try:

 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 # Import your custom agent from agent.py
+from agent2 import GAIAAgent
 # --- Basic Agent Definition ---
 # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
     def __init__(self):
         print("BasicAgent initialized.")
         # Initialize your enhanced GAIA agent
+        self.gaia_agent = GAIAAgent()
     async def __call__(self, question: str) -> str:
         try:

appasync.py ADDED Viewed

	@@ -0,0 +1,210 @@

+import os
+import gradio as gr
+import requests
+import inspect
+import pandas as pd
+# (Keep Constants as is)
+# --- Constants ---
+DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
+# Import your custom agent from agent.py
+from agent2 import GAIAAgent
+# --- Basic Agent Definition ---
+# ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
+class BasicAgent:
+    def __init__(self):
+        print("BasicAgent initialized.")
+        # Initialize your enhanced GAIA agent
+        self.gaia_agent = GAIAAgent()
+    def __call__(self, question: str) -> str:
+        try:
+            question_data = {
+                "Question": question,
+                "task_id": "basic_agent_task"
+            }
+            answer = self.gaia_agent.solve_gaia_question(question_data)
+            return str(answer)
+        except Exception as e:
+            return e
+def run_and_submit_all( profile: gr.OAuthProfile | None):
+    """
+    Fetches all questions, runs the BasicAgent on them, submits all answers,
+    and displays the results.
+    """
+    # --- Determine HF Space Runtime URL and Repo URL ---
+    space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
+    if profile:
+        username= f"{profile.username}"
+        print(f"User logged in: {username}")
+    else:
+        print("User not logged in.")
+        return "Please Login to Hugging Face with the button.", None
+    api_url = DEFAULT_API_URL
+    questions_url = f"{api_url}/questions"
+    submit_url = f"{api_url}/submit"
+    # 1. Instantiate Agent ( modify this part to create your agent)
+    try:
+        agent = BasicAgent()
+    except Exception as e:
+        print(f"Error instantiating agent: {e}")
+        return f"Error initializing agent: {e}", None
+    # In the case of an app running as a hugging Face space, this link points toward your codebase ( usefull for others so please keep it public)
+    #agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
+    agent_code = "Running on Kaggle"
+    print(agent_code)
+    # 2. Fetch Questions
+    print(f"Fetching questions from: {questions_url}")
+    try:
+        response = requests.get(questions_url, timeout=15)
+        response.raise_for_status()
+        questions_data = response.json()
+        if not questions_data:
+             print("Fetched questions list is empty.")
+             return "Fetched questions list is empty or invalid format.", None
+        print(f"Fetched {len(questions_data)} questions.")
+    except requests.exceptions.RequestException as e:
+        print(f"Error fetching questions: {e}")
+        return f"Error fetching questions: {e}", None
+    except requests.exceptions.JSONDecodeError as e:
+         print(f"Error decoding JSON response from questions endpoint: {e}")
+         print(f"Response text: {response.text[:500]}")
+         return f"Error decoding server response for questions: {e}", None
+    except Exception as e:
+        print(f"An unexpected error occurred fetching questions: {e}")
+        return f"An unexpected error occurred fetching questions: {e}", None
+    # 3. Run your Agent
+    results_log = []
+    answers_payload = []
+    print(f"Running agent on {len(questions_data)} questions...")
+    for item in questions_data:
+        task_id = item.get("task_id")
+        question_text = item.get("question")
+        print(question_text)
+        if not task_id or question_text is None:
+            print(f"Skipping item with missing task_id or question: {item}")
+            continue
+        try:
+            submitted_answer = agent(question_text)
+            answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
+            results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
+        except Exception as e:
+             print(f"Error running agent on task {task_id}: {e}")
+             results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
+    if not answers_payload:
+        print("Agent did not produce any answers to submit.")
+        return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
+    # 4. Prepare Submission
+    submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
+    status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
+    print(status_update)
+    # 5. Submit
+    print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
+    try:
+        response = requests.post(submit_url, json=submission_data, timeout=60)
+        response.raise_for_status()
+        result_data = response.json()
+        final_status = (
+            f"Submission Successful!\n"
+            f"User: {result_data.get('username')}\n"
+            f"Overall Score: {result_data.get('score', 'N/A')}% "
+            f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
+            f"Message: {result_data.get('message', 'No message received.')}"
+        )
+        print("Submission successful.")
+        results_df = pd.DataFrame(results_log)
+        return final_status, results_df
+    except requests.exceptions.HTTPError as e:
+        error_detail = f"Server responded with status {e.response.status_code}."
+        try:
+            error_json = e.response.json()
+            error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
+        except requests.exceptions.JSONDecodeError:
+            error_detail += f" Response: {e.response.text[:500]}"
+        status_message = f"Submission Failed: {error_detail}"
+        print(status_message)
+        results_df = pd.DataFrame(results_log)
+        return status_message, results_df
+    except requests.exceptions.Timeout:
+        status_message = "Submission Failed: The request timed out."
+        print(status_message)
+        results_df = pd.DataFrame(results_log)
+        return status_message, results_df
+    except requests.exceptions.RequestException as e:
+        status_message = f"Submission Failed: Network error - {e}"
+        print(status_message)
+        results_df = pd.DataFrame(results_log)
+        return status_message, results_df
+    except Exception as e:
+        status_message = f"An unexpected error occurred during submission: {e}"
+        print(status_message)
+        results_df = pd.DataFrame(results_log)
+        return status_message, results_df
+# --- Build Gradio Interface using Blocks ---
+with gr.Blocks() as demo:
+    gr.Markdown("# Basic Agent Evaluation Runner")
+    gr.Markdown(
+        """
+        **Instructions:**
+        1.  Please clone this space, then modify the code to define your agent's logic, the tools, the necessary packages, etc ...
+        2.  Log in to your Hugging Face account using the button below. This uses your HF username for submission.
+        3.  Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
+        ---
+        **Disclaimers:**
+        Once clicking on the "submit button, it can take quite some time ( this is the time for the agent to go through all the questions).
+        This space provides a basic setup and is intentionally sub-optimal to encourage you to develop your own, more robust solution. For instance for the delay process of the submit button, a solution could be to cache the answers and submit in a seperate action or even to answer the questions in async.
+        """
+    )
+    gr.LoginButton()
+    run_button = gr.Button("Run Evaluation & Submit All Answers")
+    status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
+    # Removed max_rows=10 from DataFrame constructor
+    results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
+    run_button.click(
+        fn=run_and_submit_all,
+        outputs=[status_output, results_table]
+    )
+if __name__ == "__main__":
+    print("\n" + "-"*30 + " App Starting " + "-"*30)
+    # Check for SPACE_HOST and SPACE_ID at startup for information
+    #space_host_startup = os.getenv("SPACE_HOST")
+    #space_id_startup = os.getenv("SPACE_ID") # Get SPACE_ID at startup
+    #if space_host_startup:
+        #print(f"✅ SPACE_HOST found: {space_host_startup}")
+        #print(f"   Runtime URL should be: https://{space_host_startup}.hf.space")
+    #else:
+        #print("ℹ️  SPACE_HOST environment variable not found (running locally?).")
+    #if space_id_startup: # Print repo URLs if SPACE_ID is found
+        #print(f"✅ SPACE_ID found: {space_id_startup}")
+        #print(f"   Repo URL: https://huggingface.co/spaces/{space_id_startup}")
+        #print(f"   Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
+    #else:
+        #print("ℹ️  SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
+    print("-"*(60 + len(" App Starting ")) + "\n")
+    print("Launching Gradio Interface for Basic Agent Evaluation...")
+    demo.launch(debug=True, share=True)

custom_models.py ADDED Viewed

	@@ -0,0 +1,404 @@

+from typing import Optional, List, Any
+from pydantic import Field, PrivateAttr
+from llama_index.core.llms import CustomLLM, CompletionResponse, CompletionResponseGen, LLMMetadata
+from llama_index.core.llms.callbacks import llm_completion_callback
+from transformers import Qwen2_5_VLForConditionalGeneration, AutoProcessor
+from qwen_vl_utils import process_vision_info
+import torch
+from typing import Any, List, Optional
+from llama_index.core.embeddings import BaseEmbedding
+from sentence_transformers import SentenceTransformer
+from PIL import Image
+class QwenVL7BCustomLLM(CustomLLM):
+    model_name: str = Field(default="Qwen/Qwen2.5-VL-7B-Instruct")
+    context_window: int = Field(default=32768)
+    num_output: int = Field(default=256)
+    _model = PrivateAttr()
+    _processor = PrivateAttr()
+    def __init__(self, **kwargs):
+        super().__init__(**kwargs)
+        self._model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
+            self.model_name, torch_dtype=torch.bfloat16, device_map='balanced'
+        )
+        self._processor = AutoProcessor.from_pretrained(self.model_name)
+    @property
+    def metadata(self) -> LLMMetadata:
+        return LLMMetadata(
+            context_window=self.context_window,
+            num_output=self.num_output,
+            model_name=self.model_name,
+        )
+    @llm_completion_callback()
+    def complete(
+        self,
+        prompt: str,
+        image_paths: Optional[List[str]] = None,
+        **kwargs: Any
+    ) -> CompletionResponse:
+        # Prepare multimodal input
+        messages = [{"role": "user", "content": []}]
+        if image_paths:
+            for path in image_paths:
+                messages[0]["content"].append({"type": "image", "image": path})
+        messages[0]["content"].append({"type": "text", "text": prompt})
+        # Tokenize and process
+        text = self._processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
+        image_inputs, video_inputs = process_vision_info(messages)
+        inputs = self._processor(
+            text=[text],
+            images=image_inputs,
+            videos=video_inputs,
+            padding=True,
+            return_tensors="pt",
+        )
+        inputs = inputs.to(self._model.device)
+        # Generate output
+        generated_ids = self._model.generate(**inputs, max_new_tokens=self.num_output)
+        generated_ids_trimmed = [out_ids[len(in_ids):] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)]
+        output_text = self._processor.batch_decode(
+            generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
+        )[0]
+        return CompletionResponse(text=output_text)
+    @llm_completion_callback()
+    def stream_complete(
+        self,
+        prompt: str,
+        image_paths: Optional[List[str]] = None,
+        **kwargs: Any
+    ) -> CompletionResponseGen:
+        response = self.complete(prompt, image_paths)
+        for token in response.text:
+            yield CompletionResponse(text=token, delta=token)
+class MultimodalCLIPEmbedding(BaseEmbedding):
+    """
+    Custom embedding class using CLIP for multimodal capabilities.
+    """
+    def __init__(self, model_name: str = "clip-ViT-B-32", **kwargs: Any) -> None:
+        super().__init__(**kwargs)
+        self._model = SentenceTransformer(model_name)
+    @classmethod
+    def class_name(cls) -> str:
+        return "multimodal_clip"
+    def _get_query_embedding(self, query: str, image_path: Optional[str] = None) -> List[float]:
+        if image_path:
+            image = Image.open(image_path)
+            embedding = self._model.encode(image)
+            return embedding.tolist()
+        else:
+            embedding = self._model.encode(query)
+            return embedding.tolist()
+    def _get_text_embedding(self, text: str, image_path: Optional[str] = None) -> List[float]:
+        if image_path:
+            image = Image.open(image_path)
+            embedding = self._model.encode(image)
+            return embedding.tolist()
+        else:
+            embedding = self._model.encode(text)
+            return embedding.tolist()
+    def _get_text_embeddings(self, texts: List[str], image_paths: Optional[List[str]] = None) -> List[List[float]]:
+        embeddings = []
+        image_paths = image_paths or [None] * len(texts)
+        for text, img_path in zip(texts, image_paths):
+            if img_path:
+                image = Image.open(img_path)
+                emb = self._model.encode(image)
+            else:
+                emb = self._model.encode(text)
+            embeddings.append(emb.tolist())
+        return embeddings
+    async def _aget_query_embedding(self, query: str, image_path: Optional[str] = None) -> List[float]:
+        return self._get_query_embedding(query, image_path)
+    async def _aget_text_embedding(self, text: str, image_path: Optional[str] = None) -> List[float]:
+        return self._get_text_embedding(text, image_path)
+# BAAI embedding class
+# To run on Terminal before running the app, you need to install the FlagEmbedding package.
+# This can be done by cloning the repository and installing it in editable mode.
+#!git clone https://github.com/FlagOpen/FlagEmbedding.git
+#cd FlagEmbedding/research/visual_bge
+#pip install -e .
+#go back to the app directory
+#cd ../../..
+class BaaiMultimodalEmbedding(BaseEmbedding):
+    """
+    Custom embedding class using BAAI's FlagEmbedding for multimodal capabilities.
+    Implements the visual_bge Visualized_BGE model with bge-m3 backend.
+    """
+    def __init__(self,
+                 model_name_bge: str = "BAAI/bge-m3",
+                 model_weight: str = "Visualized_m3.pth",
+                 device: str = "cuda:1",
+                 **kwargs: Any) -> None:
+        super().__init__(**kwargs)
+        # Set device
+        self.device = torch.device(device if torch.cuda.is_available() else "cpu")
+        print(f"BaaiMultimodalEmbedding initializing on device: {self.device}")
+        # Import the visual_bge module
+        from visual_bge.modeling import Visualized_BGE
+        self._model = Visualized_BGE(
+            model_name_bge=model_name_bge,
+            model_weight=model_weight
+        )
+        self._model.to(self.device)
+        self._model.eval()
+        print(f"Successfully loaded BAAI Visualized_BGE with {model_name_bge}")
+    @classmethod
+    def class_name(cls) -> str:
+        return "baai_multimodal"
+    def _get_query_embedding(self, query: str, image_path: Optional[str] = None) -> List[float]:
+        """Get embedding for query with optional image"""
+        with torch.no_grad():
+            if hasattr(self._model, 'encode') and hasattr(self._model, 'preprocess_val'):
+                # Using visual_bge
+                if image_path and query:
+                    # Combined text and image query
+                    embedding = self._model.encode(image=image_path, text=query)
+                elif image_path:
+                    # Image only
+                    embedding = self._model.encode(image=image_path)
+                else:
+                    # Text only
+                    embedding = self._model.encode(text=query)
+            else:
+                # Fallback to sentence-transformers
+                if image_path:
+                    from PIL import Image
+                    image = Image.open(image_path)
+                    embedding = self._model.encode(image)
+                else:
+                    embedding = self._model.encode(query)
+            return embedding.cpu().numpy().tolist() if torch.is_tensor(embedding) else embedding.tolist()
+    def _get_text_embedding(self, text: str, image_path: Optional[str] = None) -> List[float]:
+        """Get embedding for text with optional image"""
+        return self._get_query_embedding(text, image_path)
+    def _get_text_embeddings(self, texts: List[str], image_paths: Optional[List[str]] = None) -> List[List[float]]:
+        """Get embeddings for multiple texts with optional images"""
+        embeddings = []
+        image_paths = image_paths or [None] * len(texts)
+        for text, img_path in zip(texts, image_paths):
+            emb = self._get_text_embedding(text, img_path)
+            embeddings.append(emb)
+        return embeddings
+    async def _aget_query_embedding(self, query: str, image_path: Optional[str] = None) -> List[float]:
+        return self._get_query_embedding(query, image_path)
+    async def _aget_text_embedding(self, text: str, image_path: Optional[str] = None) -> List[float]:
+        return self._get_text_embedding(text, image_path)
+class PixtralQuantizedLLM(CustomLLM):
+    """
+    Pixtral 12B quantized model implementation for Kaggle compatibility.
+    Uses float8 quantization for memory efficiency.
+    """
+    model_name: str = Field(default="mistralai/Pixtral-12B-2409")
+    context_window: int = Field(default=128000)
+    num_output: int = Field(default=512)
+    quantization: str = Field(default="fp8")
+    _model = PrivateAttr()
+    _processor = PrivateAttr()
+    def __init__(self, **kwargs):
+        super().__init__(**kwargs)
+        # Check if we're in a Kaggle environment or have limited resources
+        import psutil
+        available_memory = psutil.virtual_memory().available / (1024**3)  # GB
+        if available_memory < 20:  # Less than 20GB RAM
+            print(f"Limited memory detected ({available_memory:.1f}GB), using quantized version")
+            self._load_quantized_model()
+        else:
+            print("Sufficient memory available, attempting full model load")
+            try:
+                self._load_full_model()
+            except Exception as e:
+                print(f"Full model loading failed: {e}, falling back to quantized")
+                self._load_quantized_model()
+    def _load_quantized_model(self):
+        """Load quantized Pixtral model for resource-constrained environments"""
+        try:
+            # Try to use a pre-quantized version from HuggingFace
+            quantized_models = [
+                "RedHatAI/pixtral-12b-FP8-dynamic"            ]
+            model_loaded = False
+            for model_id in quantized_models:
+                try:
+                    print(f"Attempting to load quantized model: {model_id}")
+                    # Standard quantized model loading
+                    from transformers import AutoModelForCausalLM, AutoProcessor
+                    self._model = AutoModelForCausalLM.from_pretrained(
+                        model_id,
+                        torch_dtype=torch.float8,
+                        device_map="auto",
+                        trust_remote_code=True
+                    )
+                    self._processor = AutoProcessor.from_pretrained(model_id)
+                    print(f"Successfully loaded quantized Pixtral: {model_id}")
+                    model_loaded = True
+                    break
+                except Exception as e:
+                    print(f"Failed to load {model_id}: {e}")
+                    continue
+            if not model_loaded:
+                print("All quantized models failed, using CPU-only fallback")
+                self._load_cpu_fallback()
+        except Exception as e:
+            print(f"Quantized loading failed: {e}")
+            self._load_cpu_fallback()
+    def _load_full_model(self):
+        """Load full Pixtral model"""
+        from transformers import AutoModelForCausalLM, AutoProcessor
+        self._model = AutoModelForCausalLM.from_pretrained(
+            self.model_name,
+            torch_dtype=torch.bfloat16,
+            device_map="auto",
+            trust_remote_code=True
+        )
+        self._processor = AutoProcessor.from_pretrained(self.model_name)
+    def _load_cpu_fallback(self):
+        """Fallback to CPU-only inference"""
+        try:
+            from transformers import AutoModelForCausalLM, AutoProcessor
+            self._model = AutoModelForCausalLM.from_pretrained(
+                "microsoft/DialoGPT-medium",  # Smaller fallback model
+                torch_dtype=torch.float32,
+                device_map="cpu"
+            )
+            self._processor = AutoProcessor.from_pretrained("microsoft/DialoGPT-medium")
+            print("Using CPU fallback model (DialoGPT-medium)")
+        except Exception as e:
+            print(f"CPU fallback failed: {e}")
+            # Use a minimal implementation
+            self._model = None
+            self._processor = None
+    @property
+    def metadata(self) -> LLMMetadata:
+        return LLMMetadata(
+            context_window=self.context_window,
+            num_output=self.num_output,
+            model_name=f"{self.model_name}-{self.quantization}",
+        )
+    @llm_completion_callback()
+    def complete(
+        self,
+        prompt: str,
+        image_paths: Optional[List[str]] = None,
+        **kwargs: Any
+    ) -> CompletionResponse:
+        if self._model is None:
+            return CompletionResponse(text="Model not available in current environment")
+        try:
+            # Prepare multimodal input if images provided
+            if image_paths and hasattr(self._processor, 'apply_chat_template'):
+                # Handle multimodal input
+                messages = [{"role": "user", "content": []}]
+                if image_paths:
+                    for path in image_paths[:4]:  # Limit to 4 images for memory
+                        messages[0]["content"].append({"type": "image", "image": path})
+                messages[0]["content"].append({"type": "text", "text": prompt})
+                # Process the input
+                inputs = self._processor(messages, return_tensors="pt", padding=True)
+                inputs = {k: v.to(self._model.device) for k, v in inputs.items()}
+                # Generate
+                with torch.no_grad():
+                    outputs = self._model.generate(
+                        **inputs,
+                        max_new_tokens=min(self.num_output, 256),  # Limit for memory
+                        do_sample=True,
+                        temperature=0.7,
+                        pad_token_id=self._processor.tokenizer.eos_token_id
+                    )
+                # Decode response
+                response = self._processor.batch_decode(outputs, skip_special_tokens=True)[0]
+                # Extract only the new generated part
+                if len(messages[0]["content"]) > 0:
+                    response = response.split(prompt)[-1].strip()
+            else:
+                # Text-only fallback
+                inputs = self._processor(prompt, return_tensors="pt", padding=True)
+                inputs = {k: v.to(self._model.device) for k, v in inputs.items()}
+                with torch.no_grad():
+                    outputs = self._model.generate(
+                        **inputs,
+                        max_new_tokens=min(self.num_output, 256),
+                        do_sample=True,
+                        temperature=0.7,
+                        pad_token_id=self._processor.tokenizer.eos_token_id
+                    )
+                response = self._processor.batch_decode(outputs, skip_special_tokens=True)[0]
+                response = response.replace(prompt, "").strip()
+            return CompletionResponse(text=response)
+        except Exception as e:
+            error_msg = f"Generation error: {str(e)}"
+            print(error_msg)
+            return CompletionResponse(text=error_msg)
+    @llm_completion_callback()
+    def stream_complete(
+        self,
+        prompt: str,
+        image_paths: Optional[List[str]] = None,
+        **kwargs: Any
+    ) -> CompletionResponseGen:
+        # For quantized models, streaming might not be efficient
+        # Return the complete response as a single chunk
+        response = self.complete(prompt, image_paths, **kwargs)
+        yield response