Coool2 commited on
Commit
c0d2b2f
·
1 Parent(s): 9ca4eb5

Update agent.py

Browse files
Files changed (1) hide show
  1. agent.py +298 -143
agent.py CHANGED
@@ -2,9 +2,10 @@
2
  import logging
3
  import os
4
  import re
5
- from typing import Dict, Any, List
6
  from urllib.parse import urlparse
7
  import torch
 
8
 
9
  # Third-party imports
10
  import requests
@@ -22,7 +23,6 @@ from llama_index.core.workflow import Context
22
  from llama_index.postprocessor.colpali_rerank import ColPaliRerank
23
  from llama_index.core.schema import ImageNode, TextNode
24
 
25
-
26
  # LlamaIndex specialized imports
27
  from llama_index.embeddings.huggingface import HuggingFaceEmbedding
28
  from llama_index.llms.huggingface import HuggingFaceLLM
@@ -34,25 +34,38 @@ from llama_index.tools.arxiv import ArxivToolSpec
34
  from llama_index.tools.duckduckgo import DuckDuckGoSearchToolSpec
35
  from llama_index.core.agent.workflow import AgentWorkflow
36
 
37
- # --- Import all required official LlamaIndex Readers ---
38
  from llama_index.readers.file import (
39
  PDFReader,
40
  DocxReader,
41
  CSVReader,
42
- PandasExcelReader)
43
- from typing import List, Union
44
- from llama_index.core import VectorStoreIndex, Document, Settings
45
- from llama_index.core.tools import QueryEngineTool
46
- from llama_index.core.node_parser import SentenceWindowNodeParser, HierarchicalNodeParser
47
- from llama_index.core.postprocessor import SentenceTransformerRerank
48
- from llama_index.core.query_engine import RetrieverQueryEngine
49
- from llama_index.core.query_pipeline import QueryPipeline
 
 
 
 
 
 
 
 
 
 
 
 
50
 
51
  import importlib.util
52
  import sys
53
-
54
  import weave
 
55
  weave.init("gaia-llamaindex-agents")
 
56
  def get_max_memory_config(max_memory_per_gpu):
57
  """Generate max_memory config for available GPUs"""
58
  if torch.cuda.is_available():
@@ -63,44 +76,135 @@ def get_max_memory_config(max_memory_per_gpu):
63
  return max_memory
64
  return None
65
 
66
- model_id = "Qwen/Qwen3-8B"
67
- proj_llm = HuggingFaceLLM(
68
- model_name=model_id,
69
- tokenizer_name=model_id,
70
- device_map="auto",
71
- max_new_tokens = 16000,
72
- model_kwargs={"torch_dtype": "auto"},
73
- generate_kwargs={
74
- "temperature": 0.6,
75
- "top_p": 0.95,
76
- "top_k": 20
77
- }
78
- )
 
 
 
 
 
 
 
 
79
 
80
- code_llm = HuggingFaceLLM(
81
- model_name="Qwen/Qwen2.5-Coder-3B-Instruct",
82
- tokenizer_name="Qwen/Qwen2.5-Coder-3B-Instruct",
83
- device_map= "auto",
84
- model_kwargs={
85
- "torch_dtype": "auto"},
86
- # Set generation parameters for precise, non-creative code output
87
- generate_kwargs={"do_sample": False}
88
- )
89
 
90
- embed_model = HuggingFaceEmbedding(
91
- model_name="llamaindex/vdr-2b-multi-v1",
92
- device="cpu",
93
- trust_remote_code=True,
94
- model_kwargs={
95
- "torch_dtype": "auto",
96
- "low_cpu_mem_usage": True
97
- }
98
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
99
 
 
 
 
 
 
 
100
  logging.basicConfig(level=logging.INFO)
101
  logging.getLogger("llama_index.core.agent").setLevel(logging.DEBUG)
102
  logging.getLogger("llama_index.llms").setLevel(logging.DEBUG)
103
 
 
 
 
 
 
 
 
104
  Settings.llm = proj_llm
105
  Settings.embed_model = embed_model
106
 
@@ -109,12 +213,25 @@ def read_and_parse_content(input_path: str) -> List[Document]:
109
  Reads and parses content from a local file path into Document objects.
110
  URL handling has been moved to search_and_extract_top_url.
111
  """
112
- # Remove URL handling - this will now only handle local files
 
 
 
 
 
 
 
 
 
 
 
 
 
113
  if not os.path.exists(input_path):
114
  return [Document(text=f"Error: File not found at {input_path}")]
115
 
116
  file_extension = os.path.splitext(input_path)[1].lower()
117
-
118
  # Readers map
119
  readers_map = {
120
  '.pdf': PDFReader(),
@@ -125,31 +242,39 @@ def read_and_parse_content(input_path: str) -> List[Document]:
125
  '.xlsx': PandasExcelReader(),
126
  }
127
 
 
128
  if file_extension in ['.mp3', '.mp4', '.wav', '.m4a', '.flac']:
129
  try:
130
- loader = AssemblyAIAudioTranscriptReader(file_path=input_path)
131
- documents = loader.load_data()
 
 
 
 
 
 
132
  return documents
133
  except Exception as e:
134
  return [Document(text=f"Error transcribing audio: {e}")]
135
 
 
136
  if file_extension in ['.jpg', '.jpeg', '.png', '.gif', '.bmp', '.webp']:
137
- # Load the actual image content, not just the path
138
  try:
139
  with open(input_path, 'rb') as f:
140
  image_data = f.read()
141
  return [Document(
142
  text=f"IMAGE_CONTENT_BINARY",
143
  metadata={
144
- "source": input_path,
145
- "type": "image",
146
  "path": input_path,
147
- "image_data": image_data # Store actual image data
148
  }
149
  )]
150
  except Exception as e:
151
  return [Document(text=f"Error reading image: {e}")]
152
 
 
153
  if file_extension in readers_map:
154
  loader = readers_map[file_extension]
155
  documents = loader.load_data(file=input_path)
@@ -170,52 +295,51 @@ def read_and_parse_content(input_path: str) -> List[Document]:
170
 
171
  class DynamicQueryEngineManager:
172
  """Single unified manager for all RAG operations - replaces the entire static approach."""
173
-
174
  def __init__(self, initial_documents: List[str] = None):
175
  self.documents = []
176
  self.query_engine_tool = None
177
-
178
  # Load initial documents if provided
179
  if initial_documents:
180
  self._load_initial_documents(initial_documents)
181
-
182
  self._create_rag_tool()
183
-
184
  def _load_initial_documents(self, document_paths: List[str]):
185
  """Load initial documents using read_and_parse_content."""
186
  for path in document_paths:
187
  docs = read_and_parse_content(path)
188
  self.documents.extend(docs)
189
  print(f"Loaded {len(self.documents)} initial documents")
190
-
191
  def _create_rag_tool(self):
192
  """Create RAG tool using multimodal-aware parsing."""
193
  documents = self.documents if self.documents else [
194
  Document(text="No documents loaded yet. Use web search to add content.")
195
  ]
196
-
197
  # Separate text and image documents for proper processing
198
  text_documents = []
199
  image_documents = []
200
-
201
  for doc in documents:
202
  doc_type = doc.metadata.get("type", "")
203
  source = doc.metadata.get("source", "").lower()
204
  file_type = doc.metadata.get("file_type", "")
205
-
206
  # Identify image documents
207
- if (doc_type in ["image", "web_image"] or
208
  file_type in ['jpg', 'png', 'jpeg', 'gif', 'bmp', 'webp'] or
209
  any(ext in source for ext in ['.jpg', '.png', '.jpeg', '.gif', '.bmp', '.webp'])):
210
  image_documents.append(doc)
211
  else:
212
  text_documents.append(doc)
213
-
214
  # Use UnstructuredElementNodeParser for text content with multimodal awareness
215
  element_parser = UnstructuredElementNodeParser()
216
-
217
  nodes = []
218
-
219
  # Process text documents with UnstructuredElementNodeParser
220
  if text_documents:
221
  try:
@@ -228,7 +352,7 @@ class DynamicQueryEngineManager:
228
  simple_parser = SimpleNodeParser.from_defaults(chunk_size=1024, chunk_overlap=200)
229
  text_nodes = simple_parser.get_nodes_from_documents(text_documents)
230
  nodes.extend(text_nodes)
231
-
232
  # Process image documents as ImageNodes
233
  if image_documents:
234
  for img_doc in image_documents:
@@ -248,61 +372,67 @@ class DynamicQueryEngineManager:
248
  metadata=img_doc.metadata
249
  )
250
  nodes.append(text_node)
251
-
252
  index = VectorStoreIndex(nodes)
 
253
  class HybridReranker:
254
  def __init__(self):
255
  self.text_reranker = SentenceTransformerRerank(
256
- model="cross-encoder/ms-marco-MiniLM-L-2-v2",
257
  top_n=3
258
  )
 
259
  self.visual_reranker = ColPaliRerank(
260
  top_n=3,
261
  model="vidore/colpali-v1.2",
262
  keep_retrieval_score=True,
263
  device="cpu"
264
  )
 
265
  def postprocess_nodes(self, nodes, query_bundle):
266
- # Your exact implementation
267
  text_nodes = []
268
  visual_nodes = []
269
-
270
  for node in nodes:
271
- if (hasattr(node, 'image_path') and node.image_path) or \
272
- (hasattr(node, 'metadata') and node.metadata.get('file_type') in ['jpg', 'png', 'jpeg', 'pdf']) or \
273
- (hasattr(node, 'metadata') and node.metadata.get('type') in ['image', 'web_image']):
274
  visual_nodes.append(node)
275
  else:
276
  text_nodes.append(node)
277
-
 
278
  reranked_text = []
279
  reranked_visual = []
280
-
281
  if text_nodes:
282
  reranked_text = self.text_reranker.postprocess_nodes(text_nodes, query_bundle)
283
-
284
  if visual_nodes:
285
  reranked_visual = self.visual_reranker.postprocess_nodes(visual_nodes, query_bundle)
286
-
 
287
  combined_results = []
288
  max_len = max(len(reranked_text), len(reranked_visual))
289
-
290
  for i in range(max_len):
291
  if i < len(reranked_text):
292
  combined_results.append(reranked_text[i])
293
  if i < len(reranked_visual):
294
  combined_results.append(reranked_visual[i])
295
-
296
  return combined_results[:5]
297
-
298
  hybrid_reranker = HybridReranker()
299
-
300
  query_engine = index.as_query_engine(
301
  similarity_top_k=20,
302
  node_postprocessors=[hybrid_reranker],
303
  response_mode="tree_summarize"
304
  )
305
-
 
 
 
306
  self.query_engine_tool = QueryEngineTool.from_defaults(
307
  query_engine=query_engine,
308
  name="dynamic_hybrid_multimodal_rag_tool",
@@ -312,16 +442,16 @@ class DynamicQueryEngineManager:
312
  "Automatically updated with web search content."
313
  )
314
  )
315
-
316
  def add_documents(self, new_documents: List[Document]):
317
  """Add documents from web search and recreate tool."""
318
  self.documents.extend(new_documents)
319
  self._create_rag_tool() # Recreate with ALL documents
320
  print(f"Added {len(new_documents)} documents. Total: {len(self.documents)}")
321
-
322
  def get_tool(self):
323
  return self.query_engine_tool
324
-
325
  # Global instance
326
  dynamic_qe_manager = DynamicQueryEngineManager()
327
 
@@ -336,15 +466,16 @@ def search_and_extract_content_from_url(query: str) -> List[Document]:
336
  """
337
  # Get URL from search
338
  search_results = base_duckduckgo_tool(query, max_results=1)
339
- url_match = re.search(r"https?://\S+", str(search_results))
340
-
341
  if not url_match:
342
  return [Document(text="No URL could be extracted from the search results.")]
343
-
344
  url = url_match.group(0)[:-2]
345
  print(url)
 
346
  documents = []
347
-
348
  try:
349
  # Check if it's a YouTube URL
350
  if "youtube" in urlparse(url).netloc or "youtu.be" in urlparse(url).netloc:
@@ -353,34 +484,36 @@ def search_and_extract_content_from_url(query: str) -> List[Document]:
353
  else:
354
  loader = BeautifulSoupWebReader()
355
  documents = loader.load_data(urls=[url])
 
356
  for doc in documents:
357
  doc.metadata["source"] = url
358
  doc.metadata["type"] = "web_text"
 
359
  return documents
360
  except Exception as e:
361
  # Handle any exceptions that occur during content extraction
362
  return [Document(text=f"Error extracting content from URL: {str(e)}")]
363
-
364
  def enhanced_web_search_and_update(query: str) -> str:
365
  """
366
  Performs web search, extracts content, and adds it to the dynamic query engine.
367
  """
368
  # Extract content from web search
369
  documents = search_and_extract_content_from_url(query)
370
-
371
  # Add documents to the dynamic query engine
372
  if documents and not any("Error" in doc.text for doc in documents):
373
  dynamic_qe_manager.add_documents(documents)
374
-
375
  # Return summary of what was added
376
  text_docs = [doc for doc in documents if doc.metadata.get("type") == "web_text"]
377
  image_docs = [doc for doc in documents if doc.metadata.get("type") == "web_image"]
378
-
379
  summary = f"Successfully added web content to knowledge base:\n"
380
  summary += f"- {len(text_docs)} text documents\n"
381
  summary += f"- {len(image_docs)} images\n"
382
  summary += f"Source: {documents[0].metadata.get('source', 'Unknown')}"
383
-
384
  return summary
385
  else:
386
  error_msg = documents[0].text if documents else "No content extracted"
@@ -431,7 +564,7 @@ for module in core_modules:
431
  # Data science modules (may not be available)
432
  optional_modules = {
433
  "numpy": "numpy",
434
- "np": "numpy",
435
  "pandas": "pandas",
436
  "pd": "pandas",
437
  "scipy": "scipy",
@@ -468,15 +601,13 @@ if safe_globals.get("PIL"):
468
  safe_globals["Image"] = image_module
469
 
470
  def execute_python_code(code: str) -> str:
471
- try:
472
  exec_locals = {}
473
  exec(code, safe_globals, exec_locals)
474
-
475
  if 'result' in exec_locals:
476
  return str(exec_locals['result'])
477
  else:
478
  return "Code executed successfully"
479
-
480
  except Exception as e:
481
  return f"Code execution failed: {str(e)}"
482
 
@@ -490,21 +621,20 @@ def clean_response(response: str) -> str:
490
  """Clean response by removing common prefixes"""
491
  response_clean = response.strip()
492
  prefixes_to_remove = [
493
- "FINAL ANSWER:", "Answer:", "The answer is:",
494
- "Based on my analysis,", "After reviewing,",
495
  "The result is:", "Final result:", "According to",
496
  "In conclusion,", "Therefore,", "Thus,"
497
  ]
498
-
499
  for prefix in prefixes_to_remove:
500
  if response_clean.startswith(prefix):
501
  response_clean = response_clean[len(prefix):].strip()
502
-
503
  return response_clean
504
 
505
  def llm_reformat(response: str, question: str) -> str:
506
  """Use LLM to reformat the response according to GAIA requirements"""
507
-
508
  format_prompt = f"""Extract the exact answer from the response below. Follow GAIA formatting rules strictly.
509
 
510
  GAIA Format Rules:
@@ -532,17 +662,18 @@ Answer: C++, Java, Python
532
  Now extract the exact answer:
533
  Question: {question}
534
  Response: {response}
 
535
  Answer:"""
536
 
537
  try:
538
  # Use the global LLM instance
539
  formatting_response = proj_llm.complete(format_prompt)
540
  answer = str(formatting_response).strip()
541
-
542
  # Extract just the answer after "Answer:"
543
  if "Answer:" in answer:
544
  answer = answer.split("Answer:")[-1].strip()
545
-
546
  return answer
547
  except Exception as e:
548
  print(f"LLM reformatting failed: {e}")
@@ -551,46 +682,56 @@ Answer:"""
551
  def final_answer_tool(agent_response: str, question: str) -> str:
552
  """
553
  Simplified final answer tool using only LLM reformatting.
554
-
555
  Args:
556
  agent_response: The raw response from agent reasoning
557
  question: The original question for context
558
-
559
  Returns:
560
  Exact answer in GAIA format
561
  """
562
-
563
  # Step 1: Clean the response
564
  cleaned_response = clean_response(agent_response)
565
-
566
  # Step 2: Use LLM reformatting
567
  formatted_answer = llm_reformat(cleaned_response, question)
568
-
569
  print(f"Original response cleaned: {cleaned_response[:100]}...")
570
  print(f"LLM formatted answer: {formatted_answer}")
571
-
572
- return formatted_answer
573
 
 
574
 
575
  class EnhancedGAIAAgent:
576
  def __init__(self):
577
  print("Initializing Enhanced GAIA Agent...")
578
-
579
  # Vérification du token HuggingFace
580
- hf_token = os.getenv("HUGGINGFACEHUB_API_TOKEN")
581
  if not hf_token:
582
  print("Warning: HUGGINGFACEHUB_API_TOKEN not found, some features may not work")
583
-
584
  # Initialize the dynamic query engine manager
585
  self.dynamic_qe_manager = DynamicQueryEngineManager()
586
-
587
  # Create enhanced agents with dynamic tools
588
  self.external_knowledge_agent = ReActAgent(
589
- name="external_knowledge_agent",
590
  description="Advanced information retrieval with dynamic knowledge base",
591
- system_prompt="""You are an advanced information specialist with a sophisticated RAG system.
592
- Your knowledge base uses hybrid reranking and grows dynamically with each web search and document addition.
593
- Always add relevant content to your knowledge base, then query it for answers.""",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
594
  tools=[
595
  enhanced_web_search_tool,
596
  self.dynamic_qe_manager.get_tool(),
@@ -598,8 +739,9 @@ class EnhancedGAIAAgent:
598
  ],
599
  llm=proj_llm,
600
  max_steps=8,
601
- verbose=True)
602
-
 
603
  self.code_agent = ReActAgent(
604
  name="code_agent",
605
  description="Handles Python code for calculations and data processing",
@@ -607,28 +749,30 @@ class EnhancedGAIAAgent:
607
  tools=[code_execution_tool],
608
  llm=code_llm,
609
  max_steps=6,
610
- verbose=True)
611
-
 
612
  # Fixed indentation: coordinator initialization inside __init__
613
  self.coordinator = AgentWorkflow(
614
  agents=[self.external_knowledge_agent, self.code_agent],
615
  root_agent="external_knowledge_agent"
616
  )
617
-
618
  def download_gaia_file(self, task_id: str, api_url: str = "https://agents-course-unit4-scoring.hf.space") -> str:
619
  """Download file associated with task_id"""
620
  try:
621
  response = requests.get(f"{api_url}/files/{task_id}", timeout=30)
622
  response.raise_for_status()
623
-
624
  filename = f"task_{task_id}_file"
 
625
  with open(filename, 'wb') as f:
626
  f.write(response.content)
 
627
  return filename
628
  except Exception as e:
629
  print(f"Failed to download file for task {task_id}: {e}")
630
  return None
631
-
632
  def add_documents_to_knowledge_base(self, file_path: str):
633
  """Add downloaded GAIA documents to the dynamic knowledge base"""
634
  try:
@@ -636,25 +780,26 @@ class EnhancedGAIAAgent:
636
  if documents:
637
  self.dynamic_qe_manager.add_documents(documents)
638
  print(f"Added {len(documents)} documents from {file_path} to dynamic knowledge base")
639
-
640
  # Update the agent's tools with the refreshed query engine
641
  self.external_knowledge_agent.tools = [
642
  enhanced_web_search_tool,
643
  self.dynamic_qe_manager.get_tool(), # Get the updated tool
644
  code_execution_tool
645
  ]
 
646
  return True
647
  except Exception as e:
648
  print(f"Failed to add documents from {file_path}: {e}")
649
  return False
650
-
651
  async def solve_gaia_question(self, question_data: Dict[str, Any]) -> str:
652
  """
653
  Solve GAIA question with dynamic knowledge base integration
654
  """
655
  question = question_data.get("Question", "")
656
  task_id = question_data.get("task_id", "")
657
-
658
  # Try to download and add file to knowledge base if task_id provided
659
  file_path = None
660
  if task_id:
@@ -666,44 +811,54 @@ class EnhancedGAIAAgent:
666
  print(f"Successfully integrated GAIA file into dynamic knowledge base")
667
  except Exception as e:
668
  print(f"Failed to download/process file for task {task_id}: {e}")
669
-
670
- # Enhanced context prompt with dynamic knowledge base awareness
671
  context_prompt = f"""
672
  GAIA Task ID: {task_id}
673
  Question: {question}
674
  {f'File processed and added to knowledge base: {file_path}' if file_path else 'No additional files'}
675
 
676
- You are a general AI assistant. I will ask you a question. Report your thoughts, and finish your answer with the following template: FINAL ANSWER: [YOUR FINAL ANSWER]. YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string."""
677
-
 
 
 
 
 
 
 
 
 
 
 
 
678
  try:
679
  ctx = Context(self.coordinator)
680
  print("=== AGENT REASONING STEPS ===")
681
  print(f"Dynamic knowledge base contains {len(self.dynamic_qe_manager.documents)} documents")
682
-
683
  handler = self.coordinator.run(ctx=ctx, user_msg=context_prompt)
684
-
685
  full_response = ""
 
686
  async for event in handler.stream_events():
687
  if isinstance(event, AgentStream):
688
  print(event.delta, end="", flush=True)
689
  full_response += event.delta
690
-
691
  final_response = await handler
692
  print("\n=== END REASONING ===")
693
-
694
  # Extract the final formatted answer
695
- final_answer = str(final_response).strip()
696
-
697
  print(f"Final GAIA formatted answer: {final_answer}")
698
  print(f"Knowledge base now contains {len(self.dynamic_qe_manager.documents)} documents")
699
-
700
  return final_answer
701
-
702
  except Exception as e:
703
  error_msg = f"Error processing question: {str(e)}"
704
  print(error_msg)
705
  return error_msg
706
-
707
  def get_knowledge_base_stats(self):
708
  """Get statistics about the current knowledge base"""
709
  return {
@@ -711,14 +866,14 @@ You are a general AI assistant. I will ask you a question. Report your thoughts,
711
  "document_sources": [doc.metadata.get("source", "Unknown") for doc in self.dynamic_qe_manager.documents]
712
  }
713
 
714
- import asyncio
715
-
716
  async def main():
717
  agent = EnhancedGAIAAgent()
 
718
  question_data = {
719
  "Question": "How many studio albums were published by Mercedes Sosa between 2000 and 2009 (included)? You can use the latest 2022 version of english wikipedia.",
720
  "task_id": ""
721
  }
 
722
  print(question_data)
723
  answer = await agent.solve_gaia_question(question_data)
724
  print(f"Answer: {answer}")
 
2
  import logging
3
  import os
4
  import re
5
+ from typing import Dict, Any, List, Optional, Union
6
  from urllib.parse import urlparse
7
  import torch
8
+ import asyncio
9
 
10
  # Third-party imports
11
  import requests
 
23
  from llama_index.postprocessor.colpali_rerank import ColPaliRerank
24
  from llama_index.core.schema import ImageNode, TextNode
25
 
 
26
  # LlamaIndex specialized imports
27
  from llama_index.embeddings.huggingface import HuggingFaceEmbedding
28
  from llama_index.llms.huggingface import HuggingFaceLLM
 
34
  from llama_index.tools.duckduckgo import DuckDuckGoSearchToolSpec
35
  from llama_index.core.agent.workflow import AgentWorkflow
36
 
37
+ # Import all required official LlamaIndex Readers
38
  from llama_index.readers.file import (
39
  PDFReader,
40
  DocxReader,
41
  CSVReader,
42
+ PandasExcelReader,
43
+ VideoAudioReader # Adding VideoAudioReader for handling audio/video without API
44
+ )
45
+
46
+ # Optional API-based imports (conditionally loaded)
47
+ try:
48
+ # Gemini (for API mode)
49
+ from llama_index.llms.gemini import Gemini
50
+ from llama_index.embeddings.gemini import GeminiEmbedding
51
+ from llama_index_llms_vllm import Vllm
52
+ GEMINI_AVAILABLE = True
53
+ except ImportError:
54
+ GEMINI_AVAILABLE = False
55
+
56
+ try:
57
+ # LlamaParse for document parsing (API mode)
58
+ from llama_cloud_services import LlamaParse
59
+ LLAMAPARSE_AVAILABLE = True
60
+ except ImportError:
61
+ LLAMAPARSE_AVAILABLE = False
62
 
63
  import importlib.util
64
  import sys
 
65
  import weave
66
+
67
  weave.init("gaia-llamaindex-agents")
68
+
69
  def get_max_memory_config(max_memory_per_gpu):
70
  """Generate max_memory config for available GPUs"""
71
  if torch.cuda.is_available():
 
76
  return max_memory
77
  return None
78
 
79
+ # Initialize models based on API availability
80
+ def initialize_models(use_api_mode=False):
81
+ """Initialize LLM, Code LLM, and Embed models based on mode"""
82
+ if use_api_mode and GEMINI_AVAILABLE:
83
+ # API Mode - Using Google's Gemini models
84
+ try:
85
+ print("Initializing models in API mode with Gemini...")
86
+ google_api_key = os.environ.get("GOOGLE_API_KEY")
87
+ if not google_api_key:
88
+ print("WARNING: GOOGLE_API_KEY not found. Falling back to non-API mode.")
89
+ return initialize_models(use_api_mode=False)
90
+
91
+ # Main LLM - Gemini 2.0 Flash
92
+ proj_llm = Gemini(
93
+ model="models/gemini-2.0-flash",
94
+ api_key=google_api_key,
95
+ max_tokens=16000,
96
+ temperature=0.6,
97
+ top_p=0.95,
98
+ top_k=20
99
+ )
100
 
101
+ # Same model for code since Gemini is good at code
102
+ code_llm = proj_llm
 
 
 
 
 
 
 
103
 
104
+ # Vertex AI multimodal embedding
105
+ embed_model = GeminiEmbedding(
106
+ model_name="models/embedding-001",
107
+ api_key=google_api_key,
108
+ task_type="retrieval_document"
109
+ )
110
+
111
+ return proj_llm, code_llm, embed_model
112
+ except Exception as e:
113
+ print(f"Error initializing API mode: {e}")
114
+ print("Falling back to non-API mode...")
115
+ return initialize_models(use_api_mode=False)
116
+ else:
117
+ # Non-API Mode - Using HuggingFace models
118
+ print("Initializing models in non-API mode with local models...")
119
+ try:
120
+ # Try to use Pixtral 12B with vLLM if available
121
+ pixtral_model = "Qwen/Qwen3-8B" # Fallback model
122
+ try:
123
+ if importlib.util.find_spec("llama_index_llms_vllm") is not None:
124
+ from llama_index_llms_vllm import Vllm
125
+ # Check if Pixtral 12B is accessible
126
+ if os.path.exists("/path/to/pixtral-12b") or True: # Placeholder check
127
+ pixtral_model = "mistralai/pixtral-12b"
128
+ print(f"Using Pixtral 12B with vLLM")
129
+
130
+ # Custom prompt template for Pixtral model
131
+ def messages_to_prompt(messages):
132
+ prompt = "\n".join([str(x) for x in messages])
133
+ return f"<s>[INST] {prompt} [/INST] </s>\n"
134
+
135
+ def completion_to_prompt(completion):
136
+ return f"<s>[INST] {completion} [/INST] </s>\n"
137
+
138
+ proj_llm = Vllm(
139
+ model=pixtral_model,
140
+ tensor_parallel_size=1, # Adjust based on available GPUs
141
+ max_new_tokens=16000,
142
+ messages_to_prompt=messages_to_prompt,
143
+ completion_to_prompt=completion_to_prompt,
144
+ temperature=0.6,
145
+ top_p=0.95,
146
+ top_k=20
147
+ )
148
+ else:
149
+ # Use regular Qwen model if Pixtral not found
150
+ raise ImportError("Pixtral 12B not found")
151
+ else:
152
+ raise ImportError("vLLM not available")
153
+ except (ImportError, Exception) as e:
154
+ print(f"Error loading Pixtral with vLLM: {e}")
155
+ print(f"Falling back to {pixtral_model} with HuggingFace...")
156
+
157
+ # Fallback to regular HuggingFace LLM
158
+ proj_llm = HuggingFaceLLM(
159
+ model_name=pixtral_model,
160
+ tokenizer_name=pixtral_model,
161
+ device_map="auto",
162
+ max_new_tokens=16000,
163
+ model_kwargs={"torch_dtype": "auto"},
164
+ generate_kwargs={
165
+ "temperature": 0.6,
166
+ "top_p": 0.95,
167
+ "top_k": 20
168
+ }
169
+ )
170
+
171
+ # Code LLM
172
+ code_llm = HuggingFaceLLM(
173
+ model_name="Qwen/Qwen2.5-Coder-3B-Instruct",
174
+ tokenizer_name="Qwen/Qwen2.5-Coder-3B-Instruct",
175
+ device_map="auto",
176
+ model_kwargs={"torch_dtype": "auto"},
177
+ generate_kwargs={"do_sample": False}
178
+ )
179
+
180
+ # Embedding model
181
+ embed_model = HuggingFaceEmbedding(
182
+ model_name="llamaindex/vdr-2b-multi-v1",
183
+ device="cpu",
184
+ trust_remote_code=True,
185
+ model_kwargs={
186
+ "torch_dtype": "auto",
187
+ "low_cpu_mem_usage": True
188
+ }
189
+ )
190
 
191
+ return proj_llm, code_llm, embed_model
192
+ except Exception as e:
193
+ print(f"Error initializing models: {e}")
194
+ raise
195
+
196
+ # Setup logging
197
  logging.basicConfig(level=logging.INFO)
198
  logging.getLogger("llama_index.core.agent").setLevel(logging.DEBUG)
199
  logging.getLogger("llama_index.llms").setLevel(logging.DEBUG)
200
 
201
+ # Use environment variable to determine API mode
202
+ USE_API_MODE = os.environ.get("USE_API_MODE", "false").lower() == "true"
203
+
204
+ # Initialize models based on API mode setting
205
+ proj_llm, code_llm, embed_model = initialize_models(use_api_mode=USE_API_MODE)
206
+
207
+ # Set global settings
208
  Settings.llm = proj_llm
209
  Settings.embed_model = embed_model
210
 
 
213
  Reads and parses content from a local file path into Document objects.
214
  URL handling has been moved to search_and_extract_top_url.
215
  """
216
+ # Check if API mode and LlamaParse is available for enhanced document parsing
217
+ if USE_API_MODE and LLAMAPARSE_AVAILABLE:
218
+ try:
219
+ llamacloud_api_key = os.environ.get("LLAMA_CLOUD_API_KEY")
220
+ if llamacloud_api_key:
221
+ # Use LlamaParse for enhanced document parsing
222
+ print(f"Using LlamaParse to extract content from {input_path}")
223
+ parser = LlamaParse(api_key=llamacloud_api_key)
224
+ return parser.load_data(input_path)
225
+ except Exception as e:
226
+ print(f"Error using LlamaParse: {e}")
227
+ print("Falling back to standard document parsing...")
228
+
229
+ # Standard document parsing (fallback)
230
  if not os.path.exists(input_path):
231
  return [Document(text=f"Error: File not found at {input_path}")]
232
 
233
  file_extension = os.path.splitext(input_path)[1].lower()
234
+
235
  # Readers map
236
  readers_map = {
237
  '.pdf': PDFReader(),
 
242
  '.xlsx': PandasExcelReader(),
243
  }
244
 
245
+ # Audio/Video files using the appropriate reader based on mode
246
  if file_extension in ['.mp3', '.mp4', '.wav', '.m4a', '.flac']:
247
  try:
248
+ if USE_API_MODE:
249
+ # Use AssemblyAI with API mode
250
+ loader = AssemblyAIAudioTranscriptReader(file_path=input_path)
251
+ documents = loader.load_data()
252
+ else:
253
+ # Use VideoAudioReader without API
254
+ loader = VideoAudioReader()
255
+ documents = loader.load_data(input_path)
256
  return documents
257
  except Exception as e:
258
  return [Document(text=f"Error transcribing audio: {e}")]
259
 
260
+ # Handle image files
261
  if file_extension in ['.jpg', '.jpeg', '.png', '.gif', '.bmp', '.webp']:
 
262
  try:
263
  with open(input_path, 'rb') as f:
264
  image_data = f.read()
265
  return [Document(
266
  text=f"IMAGE_CONTENT_BINARY",
267
  metadata={
268
+ "source": input_path,
269
+ "type": "image",
270
  "path": input_path,
271
+ "image_data": image_data
272
  }
273
  )]
274
  except Exception as e:
275
  return [Document(text=f"Error reading image: {e}")]
276
 
277
+ # Use appropriate reader for supported file types
278
  if file_extension in readers_map:
279
  loader = readers_map[file_extension]
280
  documents = loader.load_data(file=input_path)
 
295
 
296
  class DynamicQueryEngineManager:
297
  """Single unified manager for all RAG operations - replaces the entire static approach."""
298
+
299
  def __init__(self, initial_documents: List[str] = None):
300
  self.documents = []
301
  self.query_engine_tool = None
302
+
303
  # Load initial documents if provided
304
  if initial_documents:
305
  self._load_initial_documents(initial_documents)
306
+
307
  self._create_rag_tool()
308
+
309
  def _load_initial_documents(self, document_paths: List[str]):
310
  """Load initial documents using read_and_parse_content."""
311
  for path in document_paths:
312
  docs = read_and_parse_content(path)
313
  self.documents.extend(docs)
314
  print(f"Loaded {len(self.documents)} initial documents")
315
+
316
  def _create_rag_tool(self):
317
  """Create RAG tool using multimodal-aware parsing."""
318
  documents = self.documents if self.documents else [
319
  Document(text="No documents loaded yet. Use web search to add content.")
320
  ]
321
+
322
  # Separate text and image documents for proper processing
323
  text_documents = []
324
  image_documents = []
325
+
326
  for doc in documents:
327
  doc_type = doc.metadata.get("type", "")
328
  source = doc.metadata.get("source", "").lower()
329
  file_type = doc.metadata.get("file_type", "")
330
+
331
  # Identify image documents
332
+ if (doc_type in ["image", "web_image"] or
333
  file_type in ['jpg', 'png', 'jpeg', 'gif', 'bmp', 'webp'] or
334
  any(ext in source for ext in ['.jpg', '.png', '.jpeg', '.gif', '.bmp', '.webp'])):
335
  image_documents.append(doc)
336
  else:
337
  text_documents.append(doc)
338
+
339
  # Use UnstructuredElementNodeParser for text content with multimodal awareness
340
  element_parser = UnstructuredElementNodeParser()
 
341
  nodes = []
342
+
343
  # Process text documents with UnstructuredElementNodeParser
344
  if text_documents:
345
  try:
 
352
  simple_parser = SimpleNodeParser.from_defaults(chunk_size=1024, chunk_overlap=200)
353
  text_nodes = simple_parser.get_nodes_from_documents(text_documents)
354
  nodes.extend(text_nodes)
355
+
356
  # Process image documents as ImageNodes
357
  if image_documents:
358
  for img_doc in image_documents:
 
372
  metadata=img_doc.metadata
373
  )
374
  nodes.append(text_node)
375
+
376
  index = VectorStoreIndex(nodes)
377
+
378
  class HybridReranker:
379
  def __init__(self):
380
  self.text_reranker = SentenceTransformerRerank(
381
+ model="cross-encoder/ms-marco-MiniLM-L-2-v2",
382
  top_n=3
383
  )
384
+
385
  self.visual_reranker = ColPaliRerank(
386
  top_n=3,
387
  model="vidore/colpali-v1.2",
388
  keep_retrieval_score=True,
389
  device="cpu"
390
  )
391
+
392
  def postprocess_nodes(self, nodes, query_bundle):
393
+ # Separate text and visual nodes
394
  text_nodes = []
395
  visual_nodes = []
396
+
397
  for node in nodes:
398
+ if (hasattr(node, 'image_path') and node.image_path) or (hasattr(node, 'metadata') and node.metadata.get('file_type') in ['jpg', 'png', 'jpeg', 'pdf']) or (hasattr(node, 'metadata') and node.metadata.get('type') in ['image', 'web_image']):
 
 
399
  visual_nodes.append(node)
400
  else:
401
  text_nodes.append(node)
402
+
403
+ # Apply appropriate reranker
404
  reranked_text = []
405
  reranked_visual = []
406
+
407
  if text_nodes:
408
  reranked_text = self.text_reranker.postprocess_nodes(text_nodes, query_bundle)
409
+
410
  if visual_nodes:
411
  reranked_visual = self.visual_reranker.postprocess_nodes(visual_nodes, query_bundle)
412
+
413
+ # Interleave results
414
  combined_results = []
415
  max_len = max(len(reranked_text), len(reranked_visual))
416
+
417
  for i in range(max_len):
418
  if i < len(reranked_text):
419
  combined_results.append(reranked_text[i])
420
  if i < len(reranked_visual):
421
  combined_results.append(reranked_visual[i])
422
+
423
  return combined_results[:5]
424
+
425
  hybrid_reranker = HybridReranker()
426
+
427
  query_engine = index.as_query_engine(
428
  similarity_top_k=20,
429
  node_postprocessors=[hybrid_reranker],
430
  response_mode="tree_summarize"
431
  )
432
+
433
+ # Create QueryEngineTool
434
+ from llama_index.core.tools import QueryEngineTool
435
+
436
  self.query_engine_tool = QueryEngineTool.from_defaults(
437
  query_engine=query_engine,
438
  name="dynamic_hybrid_multimodal_rag_tool",
 
442
  "Automatically updated with web search content."
443
  )
444
  )
445
+
446
  def add_documents(self, new_documents: List[Document]):
447
  """Add documents from web search and recreate tool."""
448
  self.documents.extend(new_documents)
449
  self._create_rag_tool() # Recreate with ALL documents
450
  print(f"Added {len(new_documents)} documents. Total: {len(self.documents)}")
451
+
452
  def get_tool(self):
453
  return self.query_engine_tool
454
+
455
  # Global instance
456
  dynamic_qe_manager = DynamicQueryEngineManager()
457
 
 
466
  """
467
  # Get URL from search
468
  search_results = base_duckduckgo_tool(query, max_results=1)
469
+ url_match = re.search(r"https?://\\S+", str(search_results))
470
+
471
  if not url_match:
472
  return [Document(text="No URL could be extracted from the search results.")]
473
+
474
  url = url_match.group(0)[:-2]
475
  print(url)
476
+
477
  documents = []
478
+
479
  try:
480
  # Check if it's a YouTube URL
481
  if "youtube" in urlparse(url).netloc or "youtu.be" in urlparse(url).netloc:
 
484
  else:
485
  loader = BeautifulSoupWebReader()
486
  documents = loader.load_data(urls=[url])
487
+
488
  for doc in documents:
489
  doc.metadata["source"] = url
490
  doc.metadata["type"] = "web_text"
491
+
492
  return documents
493
  except Exception as e:
494
  # Handle any exceptions that occur during content extraction
495
  return [Document(text=f"Error extracting content from URL: {str(e)}")]
496
+
497
  def enhanced_web_search_and_update(query: str) -> str:
498
  """
499
  Performs web search, extracts content, and adds it to the dynamic query engine.
500
  """
501
  # Extract content from web search
502
  documents = search_and_extract_content_from_url(query)
503
+
504
  # Add documents to the dynamic query engine
505
  if documents and not any("Error" in doc.text for doc in documents):
506
  dynamic_qe_manager.add_documents(documents)
507
+
508
  # Return summary of what was added
509
  text_docs = [doc for doc in documents if doc.metadata.get("type") == "web_text"]
510
  image_docs = [doc for doc in documents if doc.metadata.get("type") == "web_image"]
511
+
512
  summary = f"Successfully added web content to knowledge base:\n"
513
  summary += f"- {len(text_docs)} text documents\n"
514
  summary += f"- {len(image_docs)} images\n"
515
  summary += f"Source: {documents[0].metadata.get('source', 'Unknown')}"
516
+
517
  return summary
518
  else:
519
  error_msg = documents[0].text if documents else "No content extracted"
 
564
  # Data science modules (may not be available)
565
  optional_modules = {
566
  "numpy": "numpy",
567
+ "np": "numpy",
568
  "pandas": "pandas",
569
  "pd": "pandas",
570
  "scipy": "scipy",
 
601
  safe_globals["Image"] = image_module
602
 
603
  def execute_python_code(code: str) -> str:
604
+ try:
605
  exec_locals = {}
606
  exec(code, safe_globals, exec_locals)
 
607
  if 'result' in exec_locals:
608
  return str(exec_locals['result'])
609
  else:
610
  return "Code executed successfully"
 
611
  except Exception as e:
612
  return f"Code execution failed: {str(e)}"
613
 
 
621
  """Clean response by removing common prefixes"""
622
  response_clean = response.strip()
623
  prefixes_to_remove = [
624
+ "FINAL ANSWER:", "Answer:", "The answer is:",
625
+ "Based on my analysis,", "After reviewing,",
626
  "The result is:", "Final result:", "According to",
627
  "In conclusion,", "Therefore,", "Thus,"
628
  ]
629
+
630
  for prefix in prefixes_to_remove:
631
  if response_clean.startswith(prefix):
632
  response_clean = response_clean[len(prefix):].strip()
633
+
634
  return response_clean
635
 
636
  def llm_reformat(response: str, question: str) -> str:
637
  """Use LLM to reformat the response according to GAIA requirements"""
 
638
  format_prompt = f"""Extract the exact answer from the response below. Follow GAIA formatting rules strictly.
639
 
640
  GAIA Format Rules:
 
662
  Now extract the exact answer:
663
  Question: {question}
664
  Response: {response}
665
+
666
  Answer:"""
667
 
668
  try:
669
  # Use the global LLM instance
670
  formatting_response = proj_llm.complete(format_prompt)
671
  answer = str(formatting_response).strip()
672
+
673
  # Extract just the answer after "Answer:"
674
  if "Answer:" in answer:
675
  answer = answer.split("Answer:")[-1].strip()
676
+
677
  return answer
678
  except Exception as e:
679
  print(f"LLM reformatting failed: {e}")
 
682
  def final_answer_tool(agent_response: str, question: str) -> str:
683
  """
684
  Simplified final answer tool using only LLM reformatting.
 
685
  Args:
686
  agent_response: The raw response from agent reasoning
687
  question: The original question for context
 
688
  Returns:
689
  Exact answer in GAIA format
690
  """
 
691
  # Step 1: Clean the response
692
  cleaned_response = clean_response(agent_response)
693
+
694
  # Step 2: Use LLM reformatting
695
  formatted_answer = llm_reformat(cleaned_response, question)
696
+
697
  print(f"Original response cleaned: {cleaned_response[:100]}...")
698
  print(f"LLM formatted answer: {formatted_answer}")
 
 
699
 
700
+ return formatted_answer
701
 
702
  class EnhancedGAIAAgent:
703
  def __init__(self):
704
  print("Initializing Enhanced GAIA Agent...")
705
+
706
  # Vérification du token HuggingFace
707
+ hf_token = os.environ.get("HUGGINGFACEHUB_API_TOKEN")
708
  if not hf_token:
709
  print("Warning: HUGGINGFACEHUB_API_TOKEN not found, some features may not work")
710
+
711
  # Initialize the dynamic query engine manager
712
  self.dynamic_qe_manager = DynamicQueryEngineManager()
713
+
714
  # Create enhanced agents with dynamic tools
715
  self.external_knowledge_agent = ReActAgent(
716
+ name="external_knowledge_agent",
717
  description="Advanced information retrieval with dynamic knowledge base",
718
+ system_prompt="""You are an advanced information specialist with a sophisticated RAG system.
719
+ Your knowledge base uses hybrid reranking and grows dynamically with each web search and document addition.
720
+
721
+ IMPORTANT INSTRUCTIONS FOR YOUR REASONING PROCESS:
722
+ 1. Pay careful attention to ALL details in the user's question.
723
+ 2. Think step by step about what is being asked, breaking down the requirements.
724
+ 3. Identify specific qualifiers (e.g., "studio albums" vs just "albums", "between 2000-2010" vs "all time").
725
+ 4. If searching for information, include ALL important details in your search query.
726
+ 5. Double-check that your final answer addresses the EXACT question asked, not a simplified version.
727
+
728
+ For example:
729
+ - If asked "How many studio albums did Taylor Swift release between 2006-2010?", don't just search for
730
+ "Taylor Swift albums" - include "studio albums" AND the specific date range in your search.
731
+ - If asked about "Fortune 500 companies headquartered in California", don't just search for
732
+ "Fortune 500 companies" - include the location qualifier.
733
+
734
+ Always add relevant content to your knowledge base, then query it for answers.""",
735
  tools=[
736
  enhanced_web_search_tool,
737
  self.dynamic_qe_manager.get_tool(),
 
739
  ],
740
  llm=proj_llm,
741
  max_steps=8,
742
+ verbose=True
743
+ )
744
+
745
  self.code_agent = ReActAgent(
746
  name="code_agent",
747
  description="Handles Python code for calculations and data processing",
 
749
  tools=[code_execution_tool],
750
  llm=code_llm,
751
  max_steps=6,
752
+ verbose=True
753
+ )
754
+
755
  # Fixed indentation: coordinator initialization inside __init__
756
  self.coordinator = AgentWorkflow(
757
  agents=[self.external_knowledge_agent, self.code_agent],
758
  root_agent="external_knowledge_agent"
759
  )
760
+
761
  def download_gaia_file(self, task_id: str, api_url: str = "https://agents-course-unit4-scoring.hf.space") -> str:
762
  """Download file associated with task_id"""
763
  try:
764
  response = requests.get(f"{api_url}/files/{task_id}", timeout=30)
765
  response.raise_for_status()
 
766
  filename = f"task_{task_id}_file"
767
+
768
  with open(filename, 'wb') as f:
769
  f.write(response.content)
770
+
771
  return filename
772
  except Exception as e:
773
  print(f"Failed to download file for task {task_id}: {e}")
774
  return None
775
+
776
  def add_documents_to_knowledge_base(self, file_path: str):
777
  """Add downloaded GAIA documents to the dynamic knowledge base"""
778
  try:
 
780
  if documents:
781
  self.dynamic_qe_manager.add_documents(documents)
782
  print(f"Added {len(documents)} documents from {file_path} to dynamic knowledge base")
783
+
784
  # Update the agent's tools with the refreshed query engine
785
  self.external_knowledge_agent.tools = [
786
  enhanced_web_search_tool,
787
  self.dynamic_qe_manager.get_tool(), # Get the updated tool
788
  code_execution_tool
789
  ]
790
+
791
  return True
792
  except Exception as e:
793
  print(f"Failed to add documents from {file_path}: {e}")
794
  return False
795
+
796
  async def solve_gaia_question(self, question_data: Dict[str, Any]) -> str:
797
  """
798
  Solve GAIA question with dynamic knowledge base integration
799
  """
800
  question = question_data.get("Question", "")
801
  task_id = question_data.get("task_id", "")
802
+
803
  # Try to download and add file to knowledge base if task_id provided
804
  file_path = None
805
  if task_id:
 
811
  print(f"Successfully integrated GAIA file into dynamic knowledge base")
812
  except Exception as e:
813
  print(f"Failed to download/process file for task {task_id}: {e}")
814
+
815
+ # Enhanced context prompt with dynamic knowledge base awareness and step-by-step reasoning
816
  context_prompt = f"""
817
  GAIA Task ID: {task_id}
818
  Question: {question}
819
  {f'File processed and added to knowledge base: {file_path}' if file_path else 'No additional files'}
820
 
821
+ You are a general AI assistant. I will ask you a question.
822
+
823
+ IMPORTANT INSTRUCTIONS:
824
+ 1. Think through this STEP BY STEP, carefully analyzing all aspects of the question.
825
+ 2. Pay special attention to specific qualifiers like dates, types, categories, or locations.
826
+ 3. Make sure your searches include ALL important details from the question.
827
+ 4. Report your thoughts and reasoning process clearly.
828
+ 5. Finish your answer with: FINAL ANSWER: [YOUR FINAL ANSWER]
829
+
830
+ YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings.
831
+ If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise.
832
+ If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise.
833
+ If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string."""
834
+
835
  try:
836
  ctx = Context(self.coordinator)
837
  print("=== AGENT REASONING STEPS ===")
838
  print(f"Dynamic knowledge base contains {len(self.dynamic_qe_manager.documents)} documents")
839
+
840
  handler = self.coordinator.run(ctx=ctx, user_msg=context_prompt)
 
841
  full_response = ""
842
+
843
  async for event in handler.stream_events():
844
  if isinstance(event, AgentStream):
845
  print(event.delta, end="", flush=True)
846
  full_response += event.delta
847
+
848
  final_response = await handler
849
  print("\n=== END REASONING ===")
850
+
851
  # Extract the final formatted answer
852
+ final_answer = final_answer_tool(str(final_response), question)
 
853
  print(f"Final GAIA formatted answer: {final_answer}")
854
  print(f"Knowledge base now contains {len(self.dynamic_qe_manager.documents)} documents")
855
+
856
  return final_answer
 
857
  except Exception as e:
858
  error_msg = f"Error processing question: {str(e)}"
859
  print(error_msg)
860
  return error_msg
861
+
862
  def get_knowledge_base_stats(self):
863
  """Get statistics about the current knowledge base"""
864
  return {
 
866
  "document_sources": [doc.metadata.get("source", "Unknown") for doc in self.dynamic_qe_manager.documents]
867
  }
868
 
 
 
869
  async def main():
870
  agent = EnhancedGAIAAgent()
871
+
872
  question_data = {
873
  "Question": "How many studio albums were published by Mercedes Sosa between 2000 and 2009 (included)? You can use the latest 2022 version of english wikipedia.",
874
  "task_id": ""
875
  }
876
+
877
  print(question_data)
878
  answer = await agent.solve_gaia_question(question_data)
879
  print(f"Answer: {answer}")