priyamarwaha commited on
Commit
a94fa9b
·
verified ·
1 Parent(s): 81917a3

Upload 30 files

Browse files
Files changed (31) hide show
  1. .gitattributes +2 -0
  2. agent.py +262 -0
  3. answers/1f975693-876d-457b-a649-393859e79bf3.json +5 -0
  4. answers/2d83110e-a098-4ebb-9987-066c06fa42d0.json +5 -0
  5. answers/305ac316-eef6-4446-960a-92d80d542f82.json +5 -0
  6. answers/3cef3a44-215e-4aed-8e3b-b1e3f08063b7.json +5 -0
  7. answers/3f57289b-8c60-48be-bd80-01f8099ca449.json +5 -0
  8. answers/4fc2f1ae-8625-45b5-ab34-ad4433bc21f8.json +5 -0
  9. answers/5a0c1adf-205e-4841-a666-7c3ef95def9d.json +5 -0
  10. answers/6f37996b-2ac7-44b0-8e68-6d28256631b4.json +5 -0
  11. answers/7bd855d8-463d-4ed5-93ca-5fe35145f733.json +5 -0
  12. answers/840bfca7-4f7b-481a-8794-c560c340185d.json +5 -0
  13. answers/8e867cd7-cff9-4e6c-867a-ff5ddc2550be.json +5 -0
  14. answers/99c9cc74-fdc8-46c6-8f8d-3ce2d3bfeea3.json +5 -0
  15. answers/9d191bce-651d-4746-be2d-7ef8ecadb9c2.json +5 -0
  16. answers/a0c07678-e491-4bbc-8f0b-07405144218f.json +5 -0
  17. answers/a1e91b78-d3d8-4675-bb8d-62741b4b68a6.json +5 -0
  18. answers/bda648d7-d618-4883-88f4-3466eabd860e.json +5 -0
  19. answers/cabe07ed-9eca-40ea-8ead-410ef5e83f91.json +5 -0
  20. answers/cca530fc-4052-43b2-b130-b30968d8aa44.json +5 -0
  21. answers/cf106601-ab4f-4af9-b045-5295fe67b37d.json +5 -0
  22. answers/f918266a-b3e0-4914-865d-4faa564f1aef.json +5 -0
  23. app.py +180 -105
  24. dataset_helper.py +172 -0
  25. downloads/1f975693-876d-457b-a649-393859e79bf3.mp3 +3 -0
  26. downloads/7bd855d8-463d-4ed5-93ca-5fe35145f733.xlsx +0 -0
  27. downloads/99c9cc74-fdc8-46c6-8f8d-3ce2d3bfeea3.mp3 +3 -0
  28. downloads/cca530fc-4052-43b2-b130-b30968d8aa44.png +0 -0
  29. downloads/f918266a-b3e0-4914-865d-4faa564f1aef.py +35 -0
  30. requirements.txt +16 -2
  31. tools.py +314 -0
.gitattributes CHANGED
@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ downloads/1f975693-876d-457b-a649-393859e79bf3.mp3 filter=lfs diff=lfs merge=lfs -text
37
+ downloads/99c9cc74-fdc8-46c6-8f8d-3ce2d3bfeea3.mp3 filter=lfs diff=lfs merge=lfs -text
agent.py ADDED
@@ -0,0 +1,262 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # agent.py
2
+ import logging # Import logging
3
+ import os # For file/directory operations
4
+ import json # For reading/writing JSON answer files
5
+ # import base64 # No longer needed here
6
+ from typing import TypedDict, Annotated, Optional, List
7
+
8
+ from dotenv import load_dotenv # Import load_dotenv
9
+
10
+ from langchain_core.messages import AnyMessage, SystemMessage, HumanMessage, ToolMessage
11
+ from langchain_openai import ChatOpenAI
12
+ from langgraph.graph import StateGraph, START, END
13
+ from langgraph.graph.message import add_messages
14
+ from langgraph.prebuilt import ToolNode, tools_condition
15
+
16
+ from dataset_helper import download_file # For potential use in file handling
17
+
18
+
19
+ # Get the logger instance configured in app.py
20
+ logger = logging.getLogger("eval_logger")
21
+
22
+ # Load environment variables from .env file at the beginning
23
+ # This will load OPENAI_API_KEY if it's set in a .env file in the root directory.
24
+ if load_dotenv():
25
+ logger.info(".env file loaded successfully by agent.py.")
26
+ else:
27
+ logger.info(".env file not found or empty in agent.py, relying on system environment variables.")
28
+
29
+ # Import tools AFTER .env might have been loaded
30
+ from tools import TOOLS
31
+
32
+ # --- Agent State Definition ---
33
+ class AgentState(TypedDict):
34
+ task_id: str
35
+ original_question: str
36
+ input_file_path: Optional[str] # Path to the locally downloaded file, if any
37
+ messages: Annotated[list[AnyMessage], add_messages]
38
+ # Potentially add other fields like 'scratchpad' or 'intermediate_steps' if needed
39
+
40
+ # --- Tool Definitions --- MOVED TO tools.py ---
41
+ # vision_llm, extract_text_from_image, search_tool, TOOLS list are now in tools.py
42
+
43
+ # --- LangGraph Agent Class ---
44
+ class LangGraphAgent:
45
+ def __init__(self, api_url: str, answers_dir: str = "answers"):
46
+ logger.info("LangGraphAgent initializing...")
47
+ self.api_url = api_url # Needed for download_file, though not directly by graph
48
+ self.answers_dir = answers_dir
49
+ os.makedirs(self.answers_dir, exist_ok=True)
50
+ logger.info(f"Answers will be stored in: {os.path.abspath(self.answers_dir)}")
51
+
52
+ # Initialize LLM for the agent
53
+ # Ensure OPENAI_API_KEY is set in your environment
54
+ try:
55
+ self.llm = ChatOpenAI(model="gpt-4o", temperature=0)
56
+ # Bind tools imported from tools.py
57
+ self.agent_llm = self.llm.bind_tools(TOOLS, parallel_tool_calls=False) # parallel_tool_calls=False as per example
58
+ except Exception as e:
59
+ logger.error(f"Failed to initialize agent LLM (ChatOpenAI with gpt-4o) or bind tools: {e}. Ensure OPENAI_API_KEY is set.", exc_info=True)
60
+ self.llm = None
61
+ self.agent_llm = None
62
+
63
+ # Build the graph
64
+ self.graph = self._build_graph()
65
+ logger.info("LangGraphAgent initialized successfully.")
66
+
67
+ def _save_answer(self, task_id: str, question: str, answer: str):
68
+ """Saves the generated answer to a JSON file."""
69
+ answer_payload = {"task_id": task_id, "question": question, "answer": answer}
70
+ answer_file_path = os.path.join(self.answers_dir, f"{task_id}.json")
71
+ try:
72
+ with open(answer_file_path, 'w') as f:
73
+ json.dump(answer_payload, f, indent=4)
74
+ logger.info(f"Answer for task_id {task_id} saved to {answer_file_path}")
75
+ except IOError as e:
76
+ logger.error(f"Error saving answer for task_id {task_id} to {answer_file_path}: {e}", exc_info=True)
77
+
78
+ def _load_answer(self, task_id: str) -> str | None:
79
+ """Loads an answer from a JSON file if it exists."""
80
+ answer_file_path = os.path.join(self.answers_dir, f"{task_id}.json")
81
+ if os.path.exists(answer_file_path):
82
+ try:
83
+ with open(answer_file_path, 'r') as f:
84
+ answer_data = json.load(f)
85
+ logger.info(f"Loaded existing answer for task_id {task_id} from {answer_file_path}")
86
+ return answer_data.get("answer")
87
+ except (IOError, json.JSONDecodeError) as e:
88
+ logger.error(f"Error loading answer for task_id {task_id} from {answer_file_path}: {e}", exc_info=True)
89
+ return None
90
+
91
+ # --- Graph Node Definitions ---
92
+ def _assistant_node(self, state: AgentState):
93
+ logger.info(f"_assistant_node called for task_id: {state['task_id']}. Current messages count: {len(state['messages'])}")
94
+ if not self.agent_llm:
95
+ logger.error("Agent LLM not initialized. Cannot proceed with assistant node.")
96
+ # Return a message indicating error, which will be added to state by add_messages
97
+ # This helps in debugging and ensures flow continues to an extent
98
+ error_message = SystemMessage(content="Error: Agent LLM not initialized. Cannot generate response.")
99
+ return {"messages": [error_message]}
100
+
101
+ system_prompt_parts = [
102
+ f"You are a helpful AI assistant for the GAIA benchmark. Your goal is to answer the user's question accurately and concisely. ",
103
+ f"The user's question is about task_id: {state['task_id']}.\n",
104
+ f"The original question is: {state['original_question']}\n"
105
+ ]
106
+
107
+ input_file_path = state.get('input_file_path')
108
+ original_question_text = state['original_question']
109
+
110
+ if input_file_path:
111
+ system_prompt_parts.append(f"A local file is available at path: {input_file_path}. ")
112
+ file_extension = os.path.splitext(input_file_path)[1].lower()
113
+ if file_extension in ['.png', '.jpg', '.jpeg', '.gif', '.webp']:
114
+ system_prompt_parts.append(f"This file appears to be an image. You can use the 'analyse_image' tool to analyse it. This tool requires the 'img_path' (which is '{input_file_path}') and the 'question' (which is '{original_question_text}') to be passed as arguments. This tool works only for local image files. ")
115
+ elif file_extension in ['.mp3', '.wav', '.aac', '.flac', '.ogg', '.opus']: # Common audio types for AssemblyAI
116
+ system_prompt_parts.append(f"This file appears to be an audio file. You can use the 'analyse_audio' tool to analyse its content. This tool requires the 'audio_path' (which is '{input_file_path}') and the 'question' (which is '{original_question_text}') to be passed as arguments. This tool works only for local audio files and cannot process web URLs. ")
117
+ elif file_extension == '.py':
118
+ system_prompt_parts.append(f"This file appears to be a Python script. You can use the 'execute_python_code_from_file' tool to understand its content and answer questions about it (e.g., predict its output or describe its functionality). This tool requires the 'file_path' (which is '{input_file_path}') and the 'question' (which is '{original_question_text}') as arguments. This tool analyses the code textually; it does not execute it. ")
119
+ elif file_extension in ['.xls', '.xlsx']:
120
+ system_prompt_parts.append(f"This file appears to be an Excel file. To answer questions requiring calculations, data manipulation, or specific lookups: "
121
+ f"1. You should generate a Python script using the pandas library. "
122
+ f"2. Use the 'execute_pandas_script_for_excel' tool to run this script. "
123
+ f"3. The script will have access to a variable 'excel_file_path' which holds the path: '{input_file_path}'. Use this variable in your script to load the Excel file (e.g., pd.read_excel(excel_file_path)). "
124
+ f"4. Your generated Python script MUST end with a print() statement that outputs ONLY the final answer, precisely formatted. "
125
+ f"5. If you first need to understand the structure of the Excel file (sheet names, columns), you can use the 'analyse_excel_file' tool which provides a textual (CSV) representation of the data. But for computation, use 'execute_pandas_script_for_excel'. "
126
+ f"Pass the '{input_file_path}' as 'excel_file_path' and your generated script as 'python_code' to the 'execute_pandas_script_for_excel' tool. ")
127
+ else:
128
+ system_prompt_parts.append(f"The provided file '{input_file_path}' is not a supported image, audio, Python, or Excel type for direct analysis with available tools. Do not attempt to use 'analyse_image', 'analyse_audio', 'execute_python_code_from_file', or 'analyse_excel_file'/'execute_pandas_script_for_excel' for this file. You may need to rely on web search or the question text itself. ")
129
+ else:
130
+ system_prompt_parts.append("No local file was provided with this question. ")
131
+
132
+ system_prompt_parts.append("If the question text itself contains a URL (e.g., a link to a YouTube video or other website), you should primarily use the 'web_search' tool to find information related to that URL and the question. For YouTube URLs, specifically rely on 'web_search' as direct transcript access is not available. ")
133
+ system_prompt_parts.append("You also have access to a 'web_search' tool for general information or if the question implies online content (e.g., a URL mentioned in the question text). ")
134
+ system_prompt_parts.append("If a tool fails or a file type is unsupported, do not try the same tool repeatedly on it. Use web_search or state you cannot answer if appropriate. ")
135
+ system_prompt_parts.append("Prioritize answering the question. If after about 5-7 tool execution cycles you cannot find a definitive answer, you MUST provide the best possible answer based on the information you have gathered or state CLEARLY that you cannot answer the question. DO NOT get stuck in overly long loops of tool use. Be decisive and conclude your reasoning.")
136
+ system_prompt_parts.append("When providing your final answer, it is crucial that it is ONLY the answer itself, with absolutely no additional conversation, explanations, or formatting like 'The answer is...' or 'Based on my findings...'. Be direct. ")
137
+ system_prompt_parts.append("The final answer format must be one of the following: ")
138
+ system_prompt_parts.append("1. A number (e.g., 42, 1000, 3.14). Do not use commas for thousands separators (e.g., write 1000 not 1,000). Do not use units like '$' or '%' unless the question explicitly asks for it in the answer format. ")
139
+ system_prompt_parts.append("2. As few words as possible (e.g., 'Paris', 'Mount Everest'). Do not use articles (a, an, the) unless part of a proper name. Avoid abbreviations (e.g., use 'Los Angeles' not 'LA') unless the question implies it. Write digits in plain text (e.g., 'two' instead of '2') unless the question asks for a numerical digit. ")
140
+ system_prompt_parts.append("3. A comma-separated list of numbers and/or strings (e.g., 'red,blue,green', '1,2,three', 'Tokyo,London,New York'). Apply the rules from 1 and 2 to each element in the list. Ensure there are no spaces after commas unless a list element itself naturally contains a space (e.g. a multi-word city name). ")
141
+ system_prompt_parts.append("Adhere to these formatting rules strictly for the final output.")
142
+ system_prompt_parts.append("You also have access to a 'wikipedia_tool' to get information from Wikipedia. It's good for general knowledge questions, facts, definitions, and summaries on a wide range of topics.")
143
+ system_prompt_parts.append("For questions specifically about the visual content of a YouTube video, use the 'analyse_youtube' tool. Provide the 'youtube_url' and the 'question'. This tool uses a Gemini multimodal model. If this tool fails or cannot answer, you can fall back to 'web_search' for general information about the video.")
144
+ system_prompt_parts.append("If you encounter a particularly complex question (e.g., historical queries with multiple constraints, or questions requiring deep, multi-step reasoning) and you are struggling to find a definitive answer after attempting with standard tools (like web_search, wikipedia_tool) for a few cycles (e.g., 2-3 attempts), you can use the 'deep_analysis_with_gemini' tool. Pass the original, full question to this tool. Use this as a strategic escalation for very challenging textual questions.")
145
+ system_prompt_parts.append("If a tool fails or a file type is unsupported, do not try the same tool repeatedly on it. Use web_search or state you cannot answer if appropriate. ")
146
+
147
+ system_prompt = "".join(system_prompt_parts)
148
+
149
+ messages_for_llm = [SystemMessage(content=system_prompt)] + state["messages"]
150
+
151
+ logger.debug(f"Messages being sent to LLM for task {state['task_id']}: {messages_for_llm}")
152
+ response_message = self.agent_llm.invoke(messages_for_llm)
153
+ logger.debug(f"LLM response for task {state['task_id']}: {response_message}")
154
+ return {"messages": [response_message]} # LangGraph's add_messages will append this
155
+
156
+ def _build_graph(self) -> StateGraph:
157
+ logger.info("Building LangGraph...")
158
+ builder = StateGraph(AgentState)
159
+ builder.add_node("assistant", self._assistant_node)
160
+ tool_node = ToolNode(TOOLS) # Create a ToolNode with all our tools
161
+ builder.add_node("tools", tool_node)
162
+
163
+ builder.add_edge(START, "assistant")
164
+ builder.add_conditional_edges(
165
+ "assistant",
166
+ tools_condition, # LangGraph's prebuilt tools_condition
167
+ # END # If no tool call, end. (Modified below to ensure final processing)
168
+ )
169
+ # builder.add_edge("tools", "assistant") # Loop back from tools to assistant
170
+
171
+ # Modified flow: Tools execute, then always go back to assistant for summarization/final answer
172
+ # If assistant decided no tool, tools_condition might route to END if not handled
173
+ # We want the assistant to make the final decision to END.
174
+
175
+ # If assistant calls a tool, route to tools.
176
+ # If assistant does not call a tool, it should be the final answer.
177
+ # tools_condition will route to END if no tool calls are present in the AI message.
178
+ # So, if tools_condition routes to END, it means the assistant provided the final answer.
179
+
180
+ builder.add_edge("tools", "assistant") # Always go back to assistant after a tool run
181
+
182
+ # graph = builder.compile(checkpointer=None, recursion_limit=35) # Incorrect parameter
183
+ graph = builder.compile(checkpointer=None) # Corrected: remove recursion_limit
184
+ logger.info("LangGraph built successfully.")
185
+ # try:
186
+ # # For debugging: display graph structure if possible (requires graphviz)
187
+ # # from IPython.display import Image, display
188
+ # # display(Image(graph.get_graph(xray=True).draw_mermaid_png()))
189
+ # logger.info("Graph visualization (mermaid PNG) can be generated if IPython and graphviz are available.")
190
+ # except Exception as e:
191
+ # logger.warning(f"Could not generate graph visualization: {e}")
192
+ return graph
193
+
194
+ def __call__(self, task_id: str, question: str, file_name: str | None) -> tuple[str, bool]:
195
+ logger.info(f"LangGraphAgent __call__ for task_id: {task_id}")
196
+
197
+ # 1. Check for cached answer first
198
+ cached_answer = self._load_answer(task_id)
199
+ if cached_answer is not None:
200
+ logger.info(f"Returning cached answer for {task_id}.")
201
+ return cached_answer, True
202
+
203
+ if not self.graph or not self.agent_llm:
204
+ logger.error("Agent graph or LLM not initialized. Cannot process question.")
205
+ return "Error: Agent not properly initialized.", False
206
+
207
+ # 2. Download file if provided
208
+ local_file_path = None
209
+ if file_name:
210
+ logger.info(f"Associated file '{file_name}' for task {task_id}. Attempting download.")
211
+ local_file_path = download_file(self.api_url, task_id, file_name, download_dir="downloads") # Ensure 'downloads' dir
212
+ if local_file_path:
213
+ logger.info(f"File '{file_name}' available at {local_file_path} for task {task_id}.")
214
+ else:
215
+ logger.error(f"Failed to download file '{file_name}' for task {task_id}.")
216
+ # Agent might still try to answer or this could be a hard failure depending on the question
217
+
218
+ # 3. Invoke the graph
219
+ initial_state: AgentState = {
220
+ "task_id": task_id,
221
+ "original_question": question,
222
+ "input_file_path": local_file_path,
223
+ "messages": [HumanMessage(content=question)]
224
+ }
225
+
226
+ final_answer_content = f"Error: Agent did not produce a final answer for task {task_id}." # Default error
227
+ try:
228
+ logger.info(f"Invoking graph for task_id: {task_id} with initial state.")
229
+ # Stream events for debugging if needed:
230
+ # for event in self.graph.stream(initial_state, stream_mode="values"):
231
+ # logger.debug(f"Graph event for {task_id}: {event}")
232
+ # final_state = event
233
+
234
+ final_state = self.graph.invoke(initial_state, config={'recursion_limit': 50}) # Increased to 50
235
+ logger.info(f"Graph invocation complete for task_id: {task_id}.")
236
+
237
+ if final_state and final_state.get("messages"):
238
+ # The final answer should be the content of the last AI message that is not a tool call
239
+ for msg in reversed(final_state["messages"]):
240
+ if msg.type == "ai" and not msg.tool_calls: # Check for AI message without tool calls
241
+ final_answer_content = msg.content
242
+ logger.info(f"Extracted final answer for {task_id}: '{final_answer_content[:100]}...' ")
243
+ break
244
+ elif msg.type == "system" and "Error: Agent LLM not initialized" in msg.content: # Check for our specific error
245
+ final_answer_content = msg.content
246
+ break
247
+ else: # If loop finishes without break (no suitable AI message found)
248
+ logger.warning(f"No suitable final AI message found for task {task_id}. Last messages: {final_state.get('messages')}")
249
+ # Fallback or specific error message.
250
+ # For now, use the last message content if any, or keep the default error.
251
+ if final_state.get("messages"):
252
+ final_answer_content = final_state["messages"][-1].content # Best guess
253
+ else:
254
+ logger.error(f"Graph did not return messages in final_state for task {task_id}. Final state: {final_state}")
255
+
256
+ except Exception as e:
257
+ logger.error(f"Error during LangGraph agent execution for task_id {task_id}: {e}", exc_info=True)
258
+ final_answer_content = f"Error during agent execution: {str(e)}"
259
+
260
+ # 4. Save and return the final answer
261
+ self._save_answer(task_id, question, final_answer_content)
262
+ return final_answer_content, False # False because it's newly generated/processed by graph
answers/1f975693-876d-457b-a649-393859e79bf3.json ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ {
2
+ "task_id": "1f975693-876d-457b-a649-393859e79bf3",
3
+ "question": "Hi, I was out sick from my classes on Friday, so I'm trying to figure out what I need to study for my Calculus mid-term next week. My friend from class sent me an audio recording of Professor Willowbrook giving out the recommended reading for the test, but my headphones are broken :(\n\nCould you please listen to the recording for me and tell me the page numbers I'm supposed to go over? I've attached a file called Homework.mp3 that has the recording. Please provide just the page numbers as a comma-delimited list. And please provide the list in ascending order.",
4
+ "answer": "132,133,134,197,245"
5
+ }
answers/2d83110e-a098-4ebb-9987-066c06fa42d0.json ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ {
2
+ "task_id": "2d83110e-a098-4ebb-9987-066c06fa42d0",
3
+ "question": ".rewsna eht sa \"tfel\" drow eht fo etisoppo eht etirw ,ecnetnes siht dnatsrednu uoy fI",
4
+ "answer": "right"
5
+ }
answers/305ac316-eef6-4446-960a-92d80d542f82.json ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ {
2
+ "task_id": "305ac316-eef6-4446-960a-92d80d542f82",
3
+ "question": "Who did the actor who played Ray in the Polish-language version of Everybody Loves Raymond play in Magda M.? Give only the first name.",
4
+ "answer": "Piotr"
5
+ }
answers/3cef3a44-215e-4aed-8e3b-b1e3f08063b7.json ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ {
2
+ "task_id": "3cef3a44-215e-4aed-8e3b-b1e3f08063b7",
3
+ "question": "I'm making a grocery list for my mom, but she's a professor of botany and she's a real stickler when it comes to categorizing things. I need to add different foods to different categories on the grocery list, but if I make a mistake, she won't buy anything inserted in the wrong category. Here's the list I have so far:\n\nmilk, eggs, flour, whole bean coffee, Oreos, sweet potatoes, fresh basil, plums, green beans, rice, corn, bell pepper, whole allspice, acorns, broccoli, celery, zucchini, lettuce, peanuts\n\nI need to make headings for the fruits and vegetables. Could you please create a list of just the vegetables from my list? If you could do that, then I can figure out how to categorize the rest of the list into the appropriate categories. But remember that my mom is a real stickler, so make sure that no botanical fruits end up on the vegetable list, or she won't get them when she's at the store. Please alphabetize the list of vegetables, and place each item in a comma separated list.",
4
+ "answer": "broccoli,celery,lettuce,sweet potatoes,zucchini"
5
+ }
answers/3f57289b-8c60-48be-bd80-01f8099ca449.json ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ {
2
+ "task_id": "3f57289b-8c60-48be-bd80-01f8099ca449",
3
+ "question": "How many at bats did the Yankee with the most walks in the 1977 regular season have that same season?",
4
+ "answer": "582"
5
+ }
answers/4fc2f1ae-8625-45b5-ab34-ad4433bc21f8.json ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ {
2
+ "task_id": "4fc2f1ae-8625-45b5-ab34-ad4433bc21f8",
3
+ "question": "Who nominated the only Featured Article on English Wikipedia about a dinosaur that was promoted in November 2016?",
4
+ "answer": "FunkMonk"
5
+ }
answers/5a0c1adf-205e-4841-a666-7c3ef95def9d.json ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ {
2
+ "task_id": "5a0c1adf-205e-4841-a666-7c3ef95def9d",
3
+ "question": "What is the first name of the only Malko Competition recipient from the 20th Century (after 1977) whose nationality on record is a country that no longer exists?",
4
+ "answer": "Claus"
5
+ }
answers/6f37996b-2ac7-44b0-8e68-6d28256631b4.json ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ {
2
+ "task_id": "6f37996b-2ac7-44b0-8e68-6d28256631b4",
3
+ "question": "Given this table defining * on the set S = {a, b, c, d, e}\n\n|*|a|b|c|d|e|\n|---|---|---|---|---|---|\n|a|a|b|c|b|d|\n|b|b|c|a|e|c|\n|c|c|a|b|b|a|\n|d|b|e|b|e|d|\n|e|d|b|a|d|c|\n\nprovide the subset of S involved in any possible counter-examples that prove * is not commutative. Provide your answer as a comma separated list of the elements in the set in alphabetical order.",
4
+ "answer": "a,b,c,d,e"
5
+ }
answers/7bd855d8-463d-4ed5-93ca-5fe35145f733.json ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ {
2
+ "task_id": "7bd855d8-463d-4ed5-93ca-5fe35145f733",
3
+ "question": "The attached Excel file contains the sales of menu items for a local fast-food chain. What were the total sales that the chain made from food (not including drinks)? Express your answer in USD with two decimal places.",
4
+ "answer": "89706.00"
5
+ }
answers/840bfca7-4f7b-481a-8794-c560c340185d.json ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ {
2
+ "task_id": "840bfca7-4f7b-481a-8794-c560c340185d",
3
+ "question": "On June 6, 2023, an article by Carolyn Collins Petersen was published in Universe Today. This article mentions a team that produced a paper about their observations, linked at the bottom of the article. Find this paper. Under what NASA award number was the work performed by R. G. Arendt supported by?",
4
+ "answer": "NNX17AF26G"
5
+ }
answers/8e867cd7-cff9-4e6c-867a-ff5ddc2550be.json ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ {
2
+ "task_id": "8e867cd7-cff9-4e6c-867a-ff5ddc2550be",
3
+ "question": "How many studio albums were published by Mercedes Sosa between 2000 and 2009 (included)? You can use the latest 2022 version of english wikipedia.",
4
+ "answer": "5"
5
+ }
answers/99c9cc74-fdc8-46c6-8f8d-3ce2d3bfeea3.json ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ {
2
+ "task_id": "99c9cc74-fdc8-46c6-8f8d-3ce2d3bfeea3",
3
+ "question": "Hi, I'm making a pie but I could use some help with my shopping list. I have everything I need for the crust, but I'm not sure about the filling. I got the recipe from my friend Aditi, but she left it as a voice memo and the speaker on my phone is buzzing so I can't quite make out what she's saying. Could you please listen to the recipe and list all of the ingredients that my friend described? I only want the ingredients for the filling, as I have everything I need to make my favorite pie crust. I've attached the recipe as Strawberry pie.mp3.\n\nIn your response, please only list the ingredients, not any measurements. So if the recipe calls for \"a pinch of salt\" or \"two cups of ripe strawberries\" the ingredients on the list would be \"salt\" and \"ripe strawberries\".\n\nPlease format your response as a comma separated list of ingredients. Also, please alphabetize the ingredients.",
4
+ "answer": "cornstarch, granulated sugar, lemon juice, ripe strawberries, vanilla extract"
5
+ }
answers/9d191bce-651d-4746-be2d-7ef8ecadb9c2.json ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ {
2
+ "task_id": "9d191bce-651d-4746-be2d-7ef8ecadb9c2",
3
+ "question": "Examine the video at https://www.youtube.com/watch?v=1htKBjuUWec.\n\nWhat does Teal'c say in response to the question \"Isn't that hot?\"",
4
+ "answer": "Extremely."
5
+ }
answers/a0c07678-e491-4bbc-8f0b-07405144218f.json ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ {
2
+ "task_id": "a0c07678-e491-4bbc-8f0b-07405144218f",
3
+ "question": "Who are the pitchers with the number before and after Taish\u014d Tamai's number as of July 2023? Give them to me in the form Pitcher Before, Pitcher After, use their last names only, in Roman characters.",
4
+ "answer": "Hasegawa, VerHagen"
5
+ }
answers/a1e91b78-d3d8-4675-bb8d-62741b4b68a6.json ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ {
2
+ "task_id": "a1e91b78-d3d8-4675-bb8d-62741b4b68a6",
3
+ "question": "In the video https://www.youtube.com/watch?v=L1vXCYZAYYM, what is the highest number of bird species to be on camera simultaneously?",
4
+ "answer": "I cannot determine the highest number of bird species on camera simultaneously in the video."
5
+ }
answers/bda648d7-d618-4883-88f4-3466eabd860e.json ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ {
2
+ "task_id": "bda648d7-d618-4883-88f4-3466eabd860e",
3
+ "question": "Where were the Vietnamese specimens described by Kuznetzov in Nedoshivina's 2010 paper eventually deposited? Just give me the city name without abbreviations.",
4
+ "answer": "Saint Petersburg"
5
+ }
answers/cabe07ed-9eca-40ea-8ead-410ef5e83f91.json ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ {
2
+ "task_id": "cabe07ed-9eca-40ea-8ead-410ef5e83f91",
3
+ "question": "What is the surname of the equine veterinarian mentioned in 1.E Exercises from the chemistry materials licensed by Marisa Alviar-Agnew & Henry Agnew under the CK-12 license in LibreText's Introductory Chemistry materials as compiled 08/21/2023?",
4
+ "answer": "Franco"
5
+ }
answers/cca530fc-4052-43b2-b130-b30968d8aa44.json ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ {
2
+ "task_id": "cca530fc-4052-43b2-b130-b30968d8aa44",
3
+ "question": "Review the chess position provided in the image. It is black's turn. Provide the correct next move for black which guarantees a win. Please provide your response in algebraic notation.",
4
+ "answer": "Qb1+"
5
+ }
answers/cf106601-ab4f-4af9-b045-5295fe67b37d.json ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ {
2
+ "task_id": "cf106601-ab4f-4af9-b045-5295fe67b37d",
3
+ "question": "What country had the least number of athletes at the 1928 Summer Olympics? If there's a tie for a number of athletes, return the first in alphabetical order. Give the IOC country code as your answer.",
4
+ "answer": "PAN"
5
+ }
answers/f918266a-b3e0-4914-865d-4faa564f1aef.json ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ {
2
+ "task_id": "f918266a-b3e0-4914-865d-4faa564f1aef",
3
+ "question": "What is the final numeric output from the attached Python code?",
4
+ "answer": "0"
5
+ }
app.py CHANGED
@@ -1,103 +1,82 @@
1
  import os
2
  import gradio as gr
3
- import requests
4
  import inspect
5
  import pandas as pd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
 
7
- # (Keep Constants as is)
8
  # --- Constants ---
9
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
10
 
11
  # --- Basic Agent Definition ---
12
  # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
13
- class BasicAgent:
14
- def __init__(self):
15
- print("BasicAgent initialized.")
16
- def __call__(self, question: str) -> str:
17
- print(f"Agent received question (first 50 chars): {question[:50]}...")
18
- fixed_answer = "This is a default answer."
19
- print(f"Agent returning fixed answer: {fixed_answer}")
20
- return fixed_answer
21
-
22
- def run_and_submit_all( profile: gr.OAuthProfile | None):
23
- """
24
- Fetches all questions, runs the BasicAgent on them, submits all answers,
25
- and displays the results.
26
- """
27
- # --- Determine HF Space Runtime URL and Repo URL ---
28
- space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
29
-
30
- if profile:
31
- username= f"{profile.username}"
32
- print(f"User logged in: {username}")
33
- else:
34
- print("User not logged in.")
35
- return "Please Login to Hugging Face with the button.", None
36
-
37
- api_url = DEFAULT_API_URL
38
- questions_url = f"{api_url}/questions"
39
- submit_url = f"{api_url}/submit"
40
 
41
- # 1. Instantiate Agent ( modify this part to create your agent)
 
42
  try:
43
- agent = BasicAgent()
44
- except Exception as e:
45
- print(f"Error instantiating agent: {e}")
46
- return f"Error initializing agent: {e}", None
47
- # In the case of an app running as a hugging Face space, this link points toward your codebase ( usefull for others so please keep it public)
48
- agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
49
- print(agent_code)
 
50
 
51
- # 2. Fetch Questions
52
- print(f"Fetching questions from: {questions_url}")
53
- try:
54
- response = requests.get(questions_url, timeout=15)
55
- response.raise_for_status()
56
- questions_data = response.json()
57
- if not questions_data:
58
- print("Fetched questions list is empty.")
59
- return "Fetched questions list is empty or invalid format.", None
60
- print(f"Fetched {len(questions_data)} questions.")
61
- except requests.exceptions.RequestException as e:
62
- print(f"Error fetching questions: {e}")
63
- return f"Error fetching questions: {e}", None
64
- except requests.exceptions.JSONDecodeError as e:
65
- print(f"Error decoding JSON response from questions endpoint: {e}")
66
- print(f"Response text: {response.text[:500]}")
67
- return f"Error decoding server response for questions: {e}", None
68
- except Exception as e:
69
- print(f"An unexpected error occurred fetching questions: {e}")
70
- return f"An unexpected error occurred fetching questions: {e}", None
71
-
72
- # 3. Run your Agent
73
- results_log = []
74
- answers_payload = []
75
- print(f"Running agent on {len(questions_data)} questions...")
76
- for item in questions_data:
77
- task_id = item.get("task_id")
78
- question_text = item.get("question")
79
- if not task_id or question_text is None:
80
- print(f"Skipping item with missing task_id or question: {item}")
81
- continue
82
- try:
83
- submitted_answer = agent(question_text)
84
- answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
85
- results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
86
- except Exception as e:
87
- print(f"Error running agent on task {task_id}: {e}")
88
- results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
89
-
90
- if not answers_payload:
91
- print("Agent did not produce any answers to submit.")
92
- return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
93
 
94
- # 4. Prepare Submission
95
- submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
96
- status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
97
- print(status_update)
 
98
 
99
- # 5. Submit
100
- print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
101
  try:
102
  response = requests.post(submit_url, json=submission_data, timeout=60)
103
  response.raise_for_status()
@@ -109,7 +88,7 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
109
  f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
110
  f"Message: {result_data.get('message', 'No message received.')}"
111
  )
112
- print("Submission successful.")
113
  results_df = pd.DataFrame(results_log)
114
  return final_status, results_df
115
  except requests.exceptions.HTTPError as e:
@@ -120,27 +99,125 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
120
  except requests.exceptions.JSONDecodeError:
121
  error_detail += f" Response: {e.response.text[:500]}"
122
  status_message = f"Submission Failed: {error_detail}"
123
- print(status_message)
124
  results_df = pd.DataFrame(results_log)
125
  return status_message, results_df
126
  except requests.exceptions.Timeout:
127
  status_message = "Submission Failed: The request timed out."
128
- print(status_message)
129
  results_df = pd.DataFrame(results_log)
130
  return status_message, results_df
131
  except requests.exceptions.RequestException as e:
132
  status_message = f"Submission Failed: Network error - {e}"
133
- print(status_message)
134
  results_df = pd.DataFrame(results_log)
135
  return status_message, results_df
136
  except Exception as e:
137
  status_message = f"An unexpected error occurred during submission: {e}"
138
- print(status_message)
139
  results_df = pd.DataFrame(results_log)
140
  return status_message, results_df
141
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
142
 
143
- # --- Build Gradio Interface using Blocks ---
144
  with gr.Blocks() as demo:
145
  gr.Markdown("# Basic Agent Evaluation Runner")
146
  gr.Markdown(
@@ -163,7 +240,6 @@ with gr.Blocks() as demo:
163
  run_button = gr.Button("Run Evaluation & Submit All Answers")
164
 
165
  status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
166
- # Removed max_rows=10 from DataFrame constructor
167
  results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
168
 
169
  run_button.click(
@@ -172,25 +248,24 @@ with gr.Blocks() as demo:
172
  )
173
 
174
  if __name__ == "__main__":
175
- print("\n" + "-"*30 + " App Starting " + "-"*30)
176
- # Check for SPACE_HOST and SPACE_ID at startup for information
177
  space_host_startup = os.getenv("SPACE_HOST")
178
- space_id_startup = os.getenv("SPACE_ID") # Get SPACE_ID at startup
179
 
180
  if space_host_startup:
181
- print(f"SPACE_HOST found: {space_host_startup}")
182
- print(f" Runtime URL should be: https://{space_host_startup}.hf.space")
183
  else:
184
- print("ℹ️ SPACE_HOST environment variable not found (running locally?).")
185
 
186
- if space_id_startup: # Print repo URLs if SPACE_ID is found
187
- print(f"SPACE_ID found: {space_id_startup}")
188
- print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
189
- print(f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
190
  else:
191
- print("ℹ️ SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
192
 
193
- print("-"*(60 + len(" App Starting ")) + "\n")
194
 
195
- print("Launching Gradio Interface for Basic Agent Evaluation...")
196
  demo.launch(debug=True, share=False)
 
1
  import os
2
  import gradio as gr
 
3
  import inspect
4
  import pandas as pd
5
+ import requests
6
+ import logging
7
+ import datetime
8
+ import json # Added for saving submission data
9
+
10
+
11
+ log_timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
12
+ log_file_name = f"evaluation_run_{log_timestamp}.log"
13
+
14
+
15
+ logger = logging.getLogger("eval_logger")
16
+ logger.setLevel(logging.INFO)
17
+ file_handler = logging.FileHandler(log_file_name)
18
+ formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(name)s - %(module)s - %(funcName)s - %(lineno)d - %(message)s')
19
+ file_handler.setFormatter(formatter)
20
+ logger.addHandler(file_handler)
21
+
22
+
23
+ logger.info("Logging setup complete. Log file: %s", log_file_name)
24
+
25
+
26
+ from dataset_helper import fetch_all_questions, download_file # fetch_random_question is also available if needed
27
+
28
+
29
+ from agent import LangGraphAgent
30
 
 
31
  # --- Constants ---
32
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
33
 
34
  # --- Basic Agent Definition ---
35
  # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
36
+ # class BasicAgent: # Moved to agent.py
37
+ # def __init__(self, api_url: str):
38
+ # print("BasicAgent initialized.")
39
+ # self.api_url = api_url # Store api_url for potential use in downloading files
40
+ #
41
+ # def __call__(self, task_id: str, question: str, file_name: str | None) -> str:
42
+ # print(f"Agent received task_id: {task_id}, question (first 50 chars): {question[:50]}...")
43
+ # if file_name:
44
+ # print(f"Question has an associated file: {file_name}")
45
+ # # Example: Download the file if needed by the agent's logic
46
+ # # local_file_path = download_file(self.api_url, task_id, file_name)
47
+ # # if local_file_path:
48
+ # # print(f"File {file_name} downloaded to {local_file_path}")
49
+ # # # Agent would then process this file
50
+ # # else:
51
+ # # print(f"Failed to download {file_name} for task {task_id}")
52
+ # # return "Error: Could not download associated file."
53
+ #
54
+ # # Current placeholder answer
55
+ # fixed_answer = "This is a default answer from BasicAgent."
56
+ # print(f"Agent returning fixed answer: {fixed_answer}")
57
+ # return fixed_answer
 
 
 
 
 
58
 
59
+ def _submit_answers_to_api(submit_url: str, submission_data: dict, results_log: list, logger_instance: logging.Logger) -> tuple[str, pd.DataFrame]:
60
+ """Handles the submission of answers to the API and processes the response."""
61
  try:
62
+ submission_log_timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S_%f")
63
+ submission_file_name = f"submission_payload_{submission_log_timestamp}.json"
64
+
65
+ # Create an 'submissions' directory if it doesn't exist
66
+ submissions_dir = "submissions"
67
+ if not os.path.exists(submissions_dir):
68
+ os.makedirs(submissions_dir)
69
+ logger_instance.info(f"Created directory: {submissions_dir}")
70
 
71
+ submission_file_path = os.path.join(submissions_dir, submission_file_name)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
72
 
73
+ with open(submission_file_path, 'w') as f:
74
+ json.dump(submission_data, f, indent=4)
75
+ logger_instance.info(f"Submission payload saved to: {submission_file_path}")
76
+ except Exception as e:
77
+ logger_instance.error(f"Failed to save submission payload: {e}", exc_info=True)
78
 
79
+ logger_instance.info(f"Submitting {len(submission_data.get('answers', []))} answers to: {submit_url}")
 
80
  try:
81
  response = requests.post(submit_url, json=submission_data, timeout=60)
82
  response.raise_for_status()
 
88
  f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
89
  f"Message: {result_data.get('message', 'No message received.')}"
90
  )
91
+ logger_instance.info(f"Submission successful: {final_status}")
92
  results_df = pd.DataFrame(results_log)
93
  return final_status, results_df
94
  except requests.exceptions.HTTPError as e:
 
99
  except requests.exceptions.JSONDecodeError:
100
  error_detail += f" Response: {e.response.text[:500]}"
101
  status_message = f"Submission Failed: {error_detail}"
102
+ logger_instance.error(status_message, exc_info=True)
103
  results_df = pd.DataFrame(results_log)
104
  return status_message, results_df
105
  except requests.exceptions.Timeout:
106
  status_message = "Submission Failed: The request timed out."
107
+ logger_instance.error(status_message, exc_info=True)
108
  results_df = pd.DataFrame(results_log)
109
  return status_message, results_df
110
  except requests.exceptions.RequestException as e:
111
  status_message = f"Submission Failed: Network error - {e}"
112
+ logger_instance.error(status_message, exc_info=True)
113
  results_df = pd.DataFrame(results_log)
114
  return status_message, results_df
115
  except Exception as e:
116
  status_message = f"An unexpected error occurred during submission: {e}"
117
+ logger_instance.error(status_message, exc_info=True)
118
  results_df = pd.DataFrame(results_log)
119
  return status_message, results_df
120
 
121
+ def run_and_submit_all(profile: gr.OAuthProfile | None):
122
+ """
123
+ Fetches all questions, runs the BasicAgent on them, submits all answers,
124
+ and displays the results.
125
+ """
126
+ logger.info("run_and_submit_all started.")
127
+ space_id = os.getenv("SPACE_ID")
128
+
129
+ if profile:
130
+ username = f"{profile.username}"
131
+ logger.info(f"User logged in: {username}")
132
+ else:
133
+ logger.warning("User not logged in.")
134
+ return "Please Login to Hugging Face with the button.", None
135
+
136
+ api_url = DEFAULT_API_URL
137
+ submit_url = f"{api_url}/submit"
138
+
139
+ try:
140
+ logger.info("Initializing agent...")
141
+ global agent
142
+ agent = LangGraphAgent(api_url=DEFAULT_API_URL, answers_dir="answers")
143
+ logger.info("Agent initialized.")
144
+ except Exception as e:
145
+ logger.error(f"Error instantiating agent: {e}", exc_info=True)
146
+ return f"Error initializing agent: {e}", None
147
+
148
+ agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
149
+ logger.info(f"Agent code URL: {agent_code}")
150
+
151
+ logger.info(f"Fetching questions using dataset_helper from: {api_url}")
152
+ questions_data = fetch_all_questions(api_url)
153
+
154
+ if questions_data is None:
155
+ logger.error("Failed to fetch questions via dataset_helper. questions_data is None.")
156
+ return "Error fetching questions. Please check the logs.", None
157
+
158
+ total_questions_fetched = len(questions_data)
159
+ logger.info(f"Fetched {total_questions_fetched} questions via dataset_helper.")
160
+ if not questions_data:
161
+ logger.warning("Fetched questions list is empty (0 questions).")
162
+ return "Fetched questions list is empty. Nothing to process.", pd.DataFrame(results_log if 'results_log' in locals() else [])
163
+
164
+ results_log = []
165
+ answers_payload = []
166
+ successful_answers_count = 0
167
+ answers_from_cache_count = 0
168
+ logger.info(f"Running agent on {total_questions_fetched} questions...")
169
+ for item_index, item in enumerate(questions_data):
170
+ task_id = item.get("task_id")
171
+ question_text = item.get("question")
172
+ file_name = item.get("file_name")
173
+ logger.info(f"Processing question {item_index + 1}/{total_questions_fetched}, task_id: {task_id}")
174
+
175
+ if not task_id or question_text is None:
176
+ logger.warning(f"Skipping item {item_index + 1} with missing task_id or question: {item}")
177
+ results_log.append({"Task ID": task_id if task_id else "MISSING_ID", "Question": question_text if question_text else "MISSING_QUESTION", "Associated File": file_name if file_name else "None", "Submitted Answer": "SKIPPED - Missing data", "From Cache": "N/A"})
178
+ continue
179
+ try:
180
+ submitted_answer_tuple = agent(task_id, question_text, file_name) # Returns (answer, from_cache)
181
+ submitted_answer, from_cache = submitted_answer_tuple
182
+
183
+ answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
184
+ results_log.append({"Task ID": task_id, "Question": question_text, "Associated File": file_name if file_name else "None", "Submitted Answer": submitted_answer, "From Cache": from_cache})
185
+ successful_answers_count += 1
186
+ if from_cache:
187
+ answers_from_cache_count += 1
188
+ logger.info(f"Agent successfully processed task_id: {task_id} (from cache)")
189
+ else:
190
+ logger.info(f"Agent successfully processed task_id: {task_id} (newly generated)")
191
+ except Exception as e:
192
+ logger.error(f"Error running agent on task {task_id}: {e}", exc_info=True)
193
+ results_log.append({"Task ID": task_id, "Question": question_text, "Associated File": file_name if file_name else "None", "Submitted Answer": f"AGENT ERROR: {e}", "From Cache": False})
194
+
195
+ logger.info(f"Agent finished processing. Successfully generated/retrieved answers for {successful_answers_count}/{total_questions_fetched} questions. {answers_from_cache_count} answers were from cache.")
196
+
197
+ if not answers_payload:
198
+ logger.warning("Agent did not produce any answers to submit (all attempts might have failed or been skipped).")
199
+ return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
200
+
201
+ submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
202
+
203
+ summary_line = f"Agent processed {total_questions_fetched} questions. Successfully generated/retrieved {successful_answers_count} answers ({answers_from_cache_count} from cache)."
204
+ logger.info(summary_line)
205
+
206
+ # --- TEMPORARILY BYPASS SUBMISSION FOR TESTING ---
207
+ # logger.warning("SUBMISSION TO API IS CURRENTLY BYPASSED FOR TESTING.")
208
+ # bypassed_status_message = (
209
+ # f"SUBMISSION BYPASSED. {summary_line}\\n"
210
+ # f"User: '{username}'. Results log is available. Submission data prepared but not sent."
211
+ # )
212
+ # results_df = pd.DataFrame(results_log)
213
+ # return bypassed_status_message, results_df
214
+ # --- END OF TEMPORARY BYPASS ---
215
+
216
+ # Call the refactored submission method, passing the global logger instance
217
+ # Note: If re-enabling submission, ensure the summary_line is incorporated into the _submit_answers_to_api or its return message.
218
+ return _submit_answers_to_api(submit_url, submission_data, results_log, logger)
219
+
220
 
 
221
  with gr.Blocks() as demo:
222
  gr.Markdown("# Basic Agent Evaluation Runner")
223
  gr.Markdown(
 
240
  run_button = gr.Button("Run Evaluation & Submit All Answers")
241
 
242
  status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
 
243
  results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
244
 
245
  run_button.click(
 
248
  )
249
 
250
  if __name__ == "__main__":
251
+ logger.info("App Starting...")
 
252
  space_host_startup = os.getenv("SPACE_HOST")
253
+ space_id_startup = os.getenv("SPACE_ID")
254
 
255
  if space_host_startup:
256
+ logger.info(f"SPACE_HOST found: {space_host_startup}")
257
+ logger.info(f" Runtime URL should be: https://{space_host_startup}.hf.space")
258
  else:
259
+ logger.info("ℹ️ SPACE_HOST environment variable not found (running locally?).")
260
 
261
+ if space_id_startup:
262
+ logger.info(f"SPACE_ID found: {space_id_startup}")
263
+ logger.info(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
264
+ logger.info(f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
265
  else:
266
+ logger.info("ℹ️ SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
267
 
268
+ logger.info("-"*(60 + len(" App Starting ")) + "\n")
269
 
270
+ logger.info("Launching Gradio Interface for Basic Agent Evaluation...")
271
  demo.launch(debug=True, share=False)
dataset_helper.py ADDED
@@ -0,0 +1,172 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import requests
3
+ import shutil
4
+ import logging
5
+
6
+ logger = logging.getLogger("eval_logger")
7
+
8
+
9
+ def fetch_all_questions(api_url: str) -> list[dict] | None:
10
+ """
11
+ Fetches all questions from the API.
12
+
13
+ Args:
14
+ api_url: The base URL of the scoring API.
15
+
16
+ Returns:
17
+ A list of question dictionaries, or None if an error occurs.
18
+ """
19
+ questions_url = f"{api_url}/questions"
20
+ logger.info(f"Fetching all questions from: {questions_url}")
21
+ try:
22
+ response = requests.get(questions_url, timeout=15)
23
+ response.raise_for_status()
24
+ questions_data = response.json()
25
+ if not questions_data:
26
+ logger.warning("Fetched questions list is empty.")
27
+ return None
28
+ logger.info(f"Fetched {len(questions_data)} questions successfully.")
29
+ return questions_data
30
+ except requests.exceptions.RequestException as e:
31
+ logger.error(f"Error fetching all questions: {e}", exc_info=True)
32
+ return None
33
+ except requests.exceptions.JSONDecodeError as e:
34
+ logger.error(f"Error decoding JSON response from questions endpoint: {e}", exc_info=True)
35
+ logger.error(f"Response text: {response.text[:500] if response else 'No response'}")
36
+ return None
37
+ except Exception as e:
38
+ logger.error(f"An unexpected error occurred fetching all questions: {e}", exc_info=True)
39
+ return None
40
+
41
+ def fetch_random_question(api_url: str) -> dict | None:
42
+ """
43
+ Fetches a single random question from the API.
44
+
45
+ Args:
46
+ api_url: The base URL of the scoring API.
47
+
48
+ Returns:
49
+ A dictionary representing a single question, or None if an error occurs.
50
+ """
51
+ random_question_url = f"{api_url}/random-question"
52
+ logger.info(f"Fetching random question from: {random_question_url}")
53
+ try:
54
+ response = requests.get(random_question_url, timeout=15)
55
+ response.raise_for_status()
56
+ question_data = response.json()
57
+ if not question_data:
58
+ logger.warning("Fetched random question is empty.")
59
+ return None
60
+ logger.info(f"Fetched random question successfully: {question_data.get('task_id')}")
61
+ return question_data
62
+ except requests.exceptions.RequestException as e:
63
+ logger.error(f"Error fetching random question: {e}", exc_info=True)
64
+ return None
65
+ except requests.exceptions.JSONDecodeError as e:
66
+ logger.error(f"Error decoding JSON response from random question endpoint: {e}", exc_info=True)
67
+ logger.error(f"Response text: {response.text[:500] if response else 'No response'}")
68
+ return None
69
+ except Exception as e:
70
+ logger.error(f"An unexpected error occurred fetching random question: {e}", exc_info=True)
71
+ return None
72
+
73
+ def download_file(api_url: str, task_id: str, file_name: str, download_dir: str = "downloads") -> str | None:
74
+ """
75
+ Downloads a specific file associated with a given task ID.
76
+
77
+ Args:
78
+ api_url: The base URL of the scoring API.
79
+ task_id: The ID of the task for which to download the file.
80
+ file_name: The name of the file to be saved.
81
+ download_dir: The directory where the file should be saved. Defaults to "downloads".
82
+
83
+ Returns:
84
+ The local path to the downloaded file, or None if an error occurs.
85
+ """
86
+ if not file_name:
87
+ logger.info(f"No file_name provided for task_id {task_id}. Skipping download.")
88
+ return None
89
+
90
+ file_url = f"{api_url}/files/{task_id}"
91
+
92
+ os.makedirs(download_dir, exist_ok=True)
93
+
94
+ local_file_path = os.path.join(download_dir, file_name)
95
+
96
+ if os.path.exists(local_file_path):
97
+ logger.info(f"File already exists at {local_file_path}. Skipping download.")
98
+ return local_file_path
99
+
100
+ logger.info(f"Downloading file for task_id {task_id} from: {file_url} to {local_file_path}")
101
+ try:
102
+ with requests.get(file_url, stream=True, timeout=30) as r:
103
+ r.raise_for_status()
104
+ with open(local_file_path, 'wb') as f:
105
+ shutil.copyfileobj(r.raw, f)
106
+ logger.info(f"File downloaded successfully: {local_file_path}")
107
+ return local_file_path
108
+ except requests.exceptions.RequestException as e:
109
+ logger.error(f"Error downloading file for task_id {task_id}: {e}", exc_info=True)
110
+ if os.path.exists(local_file_path):
111
+ os.remove(local_file_path)
112
+ return None
113
+ except Exception as e:
114
+ logger.error(f"An unexpected error occurred downloading file for task_id {task_id}: {e}", exc_info=True)
115
+ if os.path.exists(local_file_path):
116
+ os.remove(local_file_path)
117
+ return None
118
+
119
+ if __name__ == '__main__':
120
+ print("--- Testing dataset_helper.py directly ---")
121
+ print("NOTE: For full logging, run through app.py. This direct test uses print statements.")
122
+
123
+ test_api_url = "https://agents-course-unit4-scoring.hf.space"
124
+
125
+ print("\n--- Testing fetch_all_questions ---")
126
+ questions = fetch_all_questions(test_api_url)
127
+ if questions:
128
+ print(f"Successfully fetched {len(questions)} questions. First question task_id: {questions[0].get('task_id')}")
129
+ else:
130
+ print("Failed to fetch all questions.")
131
+
132
+ print("\n--- Testing fetch_random_question ---")
133
+ random_q = fetch_random_question(test_api_url)
134
+ if random_q:
135
+ print(f"Successfully fetched random question: {random_q.get('question')[:50]}...")
136
+ else:
137
+ print("Failed to fetch random question.")
138
+
139
+ print("\n--- Testing download_file (example with a known task_id and file_name if available) ---")
140
+ if questions:
141
+ test_task_with_file = None
142
+ test_file_name = None
143
+ for q_item in questions:
144
+ if q_item.get("file_name"):
145
+ test_task_with_file = q_item.get("task_id")
146
+ test_file_name = q_item.get("file_name")
147
+ break
148
+
149
+ if test_task_with_file and test_file_name:
150
+ print(f"Attempting to download file for task_id: {test_task_with_file}, file_name: {test_file_name}")
151
+ downloaded_path = download_file(test_api_url, test_task_with_file, test_file_name)
152
+ if downloaded_path:
153
+ print(f"File downloaded to: {downloaded_path}")
154
+ else:
155
+ print(f"Failed to download file for task_id: {test_task_with_file}")
156
+ else:
157
+ print("No question with an associated file found in the first batch of questions to test download.")
158
+ else:
159
+ print("Skipping download_file test as fetching questions failed.")
160
+
161
+ print("\n--- Testing download_file (with a task_id that might not have a file or invalid file_name) ---")
162
+ if questions and questions[0].get("file_name") == "":
163
+ task_id_no_file = questions[0].get("task_id")
164
+ file_name_empty = questions[0].get("file_name")
165
+ print(f"Attempting to download file for task_id: {task_id_no_file} (expected to skip due to empty file_name)")
166
+ path_no_file = download_file(test_api_url, task_id_no_file, file_name_empty)
167
+ if path_no_file is None:
168
+ print("Correctly skipped download or failed as expected for task with no file_name.")
169
+ else:
170
+ print(f"Unexpectedly downloaded something to {path_no_file} for a task with no file_name.")
171
+ else:
172
+ print("Skipping test for task with no file_name (either no questions or first question has a file).")
downloads/1f975693-876d-457b-a649-393859e79bf3.mp3 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:200f767e732b49efef5c05d128903ee4d2c34e66fdce7f5593ac123b2e637673
3
+ size 280868
downloads/7bd855d8-463d-4ed5-93ca-5fe35145f733.xlsx ADDED
Binary file (5.29 kB). View file
 
downloads/99c9cc74-fdc8-46c6-8f8d-3ce2d3bfeea3.mp3 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b218c951c1f888f0bbe6f46c080f57afc7c9348fffc7ba4da35749ff1e2ac40f
3
+ size 179304
downloads/cca530fc-4052-43b2-b130-b30968d8aa44.png ADDED
downloads/f918266a-b3e0-4914-865d-4faa564f1aef.py ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from random import randint
2
+ import time
3
+
4
+ class UhOh(Exception):
5
+ pass
6
+
7
+ class Hmm:
8
+ def __init__(self):
9
+ self.value = randint(-100, 100)
10
+
11
+ def Yeah(self):
12
+ if self.value == 0:
13
+ return True
14
+ else:
15
+ raise UhOh()
16
+
17
+ def Okay():
18
+ while True:
19
+ yield Hmm()
20
+
21
+ def keep_trying(go, first_try=True):
22
+ maybe = next(go)
23
+ try:
24
+ if maybe.Yeah():
25
+ return maybe.value
26
+ except UhOh:
27
+ if first_try:
28
+ print("Working...")
29
+ print("Please wait patiently...")
30
+ time.sleep(0.1)
31
+ return keep_trying(go, first_try=False)
32
+
33
+ if __name__ == "__main__":
34
+ go = Okay()
35
+ print(f"{keep_trying(go)}")
requirements.txt CHANGED
@@ -1,2 +1,16 @@
1
- gradio
2
- requests
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ gradio[oauth]>=4.44.1
2
+ requests
3
+ pandas
4
+ langchain
5
+ langgraph
6
+ langchain_openai
7
+ langchain_core
8
+ langchain_community
9
+ duckduckgo-search
10
+ python-dotenv
11
+ assemblyai
12
+ wikipedia
13
+ openpyxl
14
+ tabulate
15
+ youtube-transcript-api
16
+ langchain-google-genai
tools.py ADDED
@@ -0,0 +1,314 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ import os
3
+ import base64
4
+ import pandas as pd
5
+ import io
6
+ import contextlib
7
+
8
+ from langchain_core.messages import HumanMessage
9
+ from langchain_openai import ChatOpenAI
10
+ from langchain_community.tools import DuckDuckGoSearchRun
11
+ from langchain_community.document_loaders import AssemblyAIAudioTranscriptLoader
12
+ from langchain_community.tools import WikipediaQueryRun
13
+ from langchain_community.utilities import WikipediaAPIWrapper
14
+ from langchain_core.tools import tool
15
+ from langchain_google_genai import ChatGoogleGenerativeAI
16
+
17
+ logger = logging.getLogger("eval_logger")
18
+
19
+
20
+ try:
21
+ tools_llm = ChatOpenAI(model="gpt-4o", temperature=0)
22
+ except Exception as e:
23
+ logger.error(f"Failed to initialize tools_llm (OpenAI gpt-4o) in tools.py: {e}. Ensure OPENAI_API_KEY is set.", exc_info=True)
24
+ tools_llm = None
25
+
26
+
27
+ GEMINI_SHARED_MODEL_NAME = "gemini-2.5-pro-preview-05-06"
28
+ try:
29
+ gemini_llm = ChatGoogleGenerativeAI(
30
+ model=GEMINI_SHARED_MODEL_NAME,
31
+ temperature=0,
32
+ timeout=360 # 6-minute timeout
33
+ )
34
+ logger.info(f"Successfully initialized shared Gemini model: {GEMINI_SHARED_MODEL_NAME} with a 360s timeout.")
35
+ except Exception as e:
36
+ logger.error(f"Failed to initialize shared_gemini_llm in tools.py (model: {GEMINI_SHARED_MODEL_NAME}): {e}. Ensure GOOGLE_API_KEY is set and valid, and the model name is correct/available.", exc_info=True)
37
+ gemini_llm = None
38
+
39
+
40
+ try:
41
+ tools_llm = ChatOpenAI(model="gpt-4o", temperature=0)
42
+ except Exception as e:
43
+ logger.error(f"Failed to initialize tools_llm in tools.py (ChatOpenAI with gpt-4o): {e}. Ensure OPENAI_API_KEY is set and .env is loaded.", exc_info=True)
44
+ tools_llm = None # Set to None so the app can still load, but tool will fail
45
+
46
+ @tool
47
+ def analyse_image(img_path: str, question: str) -> str:
48
+ """
49
+ Analyses a **locally stored** image file to answer a specific question using a multimodal model.
50
+ IMPORTANT: This tool expects a local file path for 'img_path' and cannot process web URLs directly.
51
+ Args:
52
+ img_path: Local path to the image file (e.g., /path/to/your/image.png).
53
+ question: The question the user is trying to answer by analysing this image.
54
+ Returns:
55
+ A string containing the relevant information extracted from the image to answer the question,
56
+ or an error message if analysis fails.
57
+ """
58
+ if not tools_llm:
59
+ return "Error: Vision LLM (gpt-4o) not initialized in tools.py. Cannot analyse image."
60
+ if not os.path.exists(img_path):
61
+ # This check is more critical now that we emphasize local paths.
62
+ return f"Error: Image file not found at local path: {img_path}. This tool requires a local file path."
63
+
64
+ logger.info(f"Attempting to analyse image: {img_path} for question: '{question}'")
65
+ try:
66
+ with open(img_path, "rb") as image_file:
67
+ image_bytes = image_file.read()
68
+ image_base64 = base64.b64encode(image_bytes).decode("utf-8")
69
+
70
+ image_type = os.path.splitext(img_path)[1].lower()
71
+ if image_type == '.jpg':
72
+ image_type = '.jpeg'
73
+ if image_type not in ['.png', '.jpeg', '.gif', '.webp']:
74
+ return f"Error: Unsupported image type '{image_type}' for gpt-4o vision. Supported: PNG, JPEG, GIF, WEBP."
75
+
76
+ prompt_text = f"Analyse this image to answer the following question: '{question}'. Focus on extracting only the information directly relevant to this question. Return only the extracted information, with no additional explanations or commentary."
77
+ message = HumanMessage(
78
+ content=[
79
+ {"type": "text", "text": prompt_text},
80
+ {"type": "image_url", "image_url": {"url": f"data:image/{image_type[1:]};base64,{image_base64}"}},
81
+ ]
82
+ )
83
+ response = tools_llm.invoke([message])
84
+ extracted_text = response.content
85
+ logger.info(f"Successfully analysed {img_path} for question '{question}'. Response length: {len(extracted_text)}")
86
+ return extracted_text.strip()
87
+ except Exception as e:
88
+ logger.error(f"Error analysing image {img_path} for question '{question}': {e}", exc_info=True)
89
+ return f"Error during image analysis for question '{question}': {str(e)}"
90
+
91
+ @tool
92
+ def analyse_audio(audio_path: str, question: str) -> str:
93
+ """
94
+ Transcribes a **locally stored** audio file using AssemblyAI and then analyses the transcript
95
+ with a multimodal model (gpt-4o) to answer a specific question.
96
+ IMPORTANT: This tool expects a local file path for 'audio_path' (e.g., /path/to/your/audio.mp3)
97
+ and **cannot process web URLs (like YouTube links) directly.**
98
+ Args:
99
+ audio_path: Local path to the audio file (e.g., /path/to/your/audio.mp3).
100
+ question: The question the user is trying to answer by analysing this audio.
101
+ Returns:
102
+ A string containing the relevant information extracted from the audio to answer the question,
103
+ or an error message if analysis fails.
104
+ """
105
+ logger.info(f"Attempting to analyse audio from local path: {audio_path} for question: '{question}'")
106
+ if not tools_llm: # vision_llm (gpt-4o) is used for the Q&A part
107
+ return "Error: LLM (gpt-4o) for Q&A not initialized in tools.py. Cannot analyse audio transcript."
108
+ if not audio_path:
109
+ return "Error: Audio file path not provided."
110
+ if not os.path.exists(audio_path):
111
+ return f"Error: Audio file not found at local path: {audio_path}. This tool requires a local file path."
112
+
113
+ try:
114
+ logger.info(f"Loading/transcribing audio from local file: {audio_path} using AssemblyAI.")
115
+ loader = AssemblyAIAudioTranscriptLoader(file_path=audio_path) # AssemblyAI loader primarily works with local paths for reliability.
116
+ docs = loader.load()
117
+
118
+ if not docs or not docs[0].page_content:
119
+ logger.error(f"AssemblyAI transcription failed or returned empty for {audio_path}.")
120
+ return f"Error: Transcription failed or returned empty content for {audio_path}."
121
+
122
+ transcript = docs[0].page_content
123
+ logger.info(f"Successfully transcribed audio from {audio_path}. Transcript length: {len(transcript)}")
124
+
125
+ qa_prompt_text = (
126
+ f"The following is a transcript of an audio file: \n\nTranscript:\n{transcript}\n\n---\n\n"
127
+ f"Based SOLELY on the information in the transcript above, answer the following question: '{question}'. "
128
+ f"Provide only the direct answer as extracted or inferred from the transcript, with no additional commentary."
129
+ )
130
+
131
+ message = HumanMessage(content=qa_prompt_text)
132
+ response = tools_llm.invoke([message])
133
+ answer = response.content
134
+ logger.info(f"Successfully analysed transcript from {audio_path} for question '{question}'. Answer length: {len(answer)}")
135
+ return answer.strip()
136
+
137
+ except Exception as e:
138
+ logger.error(f"Error analysing audio {audio_path} for question '{question}': {e}", exc_info=True)
139
+ if "api key" in str(e).lower() or "authenticate" in str(e).lower():
140
+ return f"Error during audio analysis: AssemblyAI authentication failed. Please check your ASSEMBLYAI_API_KEY. Original error: {str(e)}"
141
+ return f"Error during audio analysis for question '{question}': {str(e)}"
142
+
143
+ @tool
144
+ def execute_python_code_from_file(file_path: str, question: str) -> str:
145
+ """
146
+ Reads the content of a **locally stored** Python file and uses a powerful LLM (gpt-4o)
147
+ to answer a specific question about the Python code (e.g., its output, functionality, or errors).
148
+ IMPORTANT: This tool expects a local file path for 'file_path' and cannot process web URLs directly.
149
+ It does NOT actually execute the code, but rather analyses it textually.
150
+ Args:
151
+ file_path: Local path to the Python file (e.g., /path/to/your/script.py).
152
+ question: The question the user is trying to answer about this Python code.
153
+ Returns:
154
+ A string containing the LLM's analysis or answer about the Python code, or an error message.
155
+ """
156
+ logger.info(f"Attempting to analyse Python file: {file_path} for question: '{question}'")
157
+ if not tools_llm: # vision_llm (gpt-4o) is used for the analysis
158
+ return "Error: LLM (gpt-4o) for code analysis not initialized in tools.py."
159
+ if not file_path:
160
+ return "Error: Python file path not provided."
161
+ if not os.path.exists(file_path):
162
+ return f"Error: Python file not found at local path: {file_path}. This tool requires a local file path."
163
+ if not file_path.lower().endswith('.py'):
164
+ return f"Error: File at {file_path} is not a Python (.py) file."
165
+
166
+ try:
167
+ with open(file_path, 'r', encoding='utf-8') as f:
168
+ python_code_content = f.read()
169
+
170
+ logger.info(f"Successfully read Python file {file_path}. Content length: {len(python_code_content)}")
171
+
172
+ analysis_prompt_text = (
173
+ f"The following is the content of a Python file: \n\nPython Code:\n```python\n{python_code_content}\n```\n\n---\n\n"
174
+ f"Based SOLELY on the Python code provided above, answer the following question: '{question}'. "
175
+ f"If the question asks for the output, predict the output. If it asks about functionality, describe it. "
176
+ f"Provide only the direct answer or analysis, with no additional commentary or explanations unless the question asks for it."
177
+ )
178
+
179
+ message = HumanMessage(content=analysis_prompt_text)
180
+ response = tools_llm.invoke([message]) # Using vision_llm (gpt-4o) for this analysis
181
+ answer = response.content
182
+ logger.info(f"Successfully analysed Python code from {file_path} for question '{question}'. Answer length: {len(answer)}")
183
+ return answer.strip()
184
+
185
+ except Exception as e:
186
+ logger.error(f"Error analysing Python file {file_path} for question '{question}': {e}", exc_info=True)
187
+ return f"Error during Python file analysis for question '{question}': {str(e)}"
188
+
189
+ @tool
190
+ def execute_pandas_script_for_excel(excel_file_path: str, python_code: str) -> str:
191
+ """
192
+ Executes a given Python script (which should use pandas) to perform analysis on an Excel file.
193
+ The script MUST load the Excel file using the provided 'excel_file_path' variable.
194
+ The script MUST print its final answer to standard output. The print output will be returned as the result.
195
+ This tool is for calculations, data manipulation, and specific lookups within the Excel file.
196
+
197
+ Args:
198
+ excel_file_path: The path to the Excel file that the script will process.
199
+ python_code: A string containing the Python script to execute.
200
+ Example:
201
+ '''
202
+ import pandas as pd
203
+ df = pd.read_excel(excel_file_path, sheet_name=0)
204
+ # Perform analysis ...
205
+ final_answer = df["SomeColumn"].sum() # Example operation
206
+ print(final_answer)
207
+ '''
208
+ Returns:
209
+ The standard output from the executed script (which should be the answer), or an error message if execution fails.
210
+ """
211
+ logger.info(f"Attempting to execute pandas script for Excel file: {excel_file_path}")
212
+ logger.debug(f"Python code to execute:\n{python_code}")
213
+
214
+ if not os.path.exists(excel_file_path):
215
+ return f"Error: Excel file not found at {excel_file_path}"
216
+
217
+ # Prepare the local namespace for exec, including pandas and the file path
218
+ local_namespace = {
219
+ "pd": pd,
220
+ "excel_file_path": excel_file_path,
221
+ "__builtins__": __builtins__ # Ensure basic builtins are available
222
+ }
223
+
224
+ # Capture stdout
225
+ stdout_capture = io.StringIO()
226
+ try:
227
+ with contextlib.redirect_stdout(stdout_capture):
228
+ exec(python_code, {"__builtins__": __builtins__}, local_namespace) # Provide pandas in globals, path in locals
229
+ output = stdout_capture.getvalue().strip()
230
+ logger.info(f"Successfully executed pandas script. Output: '{output}'")
231
+ if not output: # If the script printed nothing, it might indicate an issue or missing print().
232
+ return "Script executed successfully but produced no output. Ensure the script prints the final answer."
233
+ return output
234
+ except Exception as e:
235
+ logger.error(f"Error executing pandas script: {e}", exc_info=True)
236
+ # Provide a more detailed error message back to the LLM
237
+ import traceback
238
+ tb_str = traceback.format_exc()
239
+ return f"Error during script execution: {str(e)}\nTraceback:\n{tb_str}"
240
+
241
+ @tool
242
+ def analyse_youtube(youtube_url: str, question: str) -> str:
243
+ """
244
+ Analyzes a YouTube video to answer a specific question.
245
+ This tool is intended for questions that require understanding the visual content of the video.
246
+ It sends the YouTube URL directly to the shared Gemini model.
247
+
248
+ Args:
249
+ youtube_url: The full URL of the YouTube video (e.g., https://www.youtube.com/watch?v=...).
250
+ question: The question to answer based on the video's content.
251
+ Returns:
252
+ A string containing the answer from the shared Gemini model, or an error message if analysis fails.
253
+ """
254
+ logger.info(f"Attempting to analyse YouTube video: {youtube_url} with shared Gemini model ({GEMINI_SHARED_MODEL_NAME}) for question: '{question}'")
255
+ if not gemini_llm:
256
+ return f"Error: Shared Gemini LLM ({GEMINI_SHARED_MODEL_NAME}) not initialized in tools.py. Cannot analyse YouTube video."
257
+
258
+ try:
259
+
260
+ prompt = f"Video URL: {youtube_url}\n\nQuestion: {question}\n\nBased on the video at the URL, please provide the answer."
261
+ message = HumanMessage(content=prompt)
262
+
263
+ response = gemini_llm.invoke([message])
264
+ answer = response.content
265
+ logger.info(f"Successfully analysed YouTube video {youtube_url} with shared Gemini. Answer: {answer[:200]}...")
266
+ return answer.strip()
267
+ except Exception as e:
268
+ logger.error(f"Error analysing YouTube video {youtube_url} with shared Gemini ({GEMINI_SHARED_MODEL_NAME}): {e}", exc_info=True)
269
+ return f"Error during YouTube video analysis with shared Gemini: {str(e)}"
270
+
271
+ @tool
272
+ def deep_analysis_with_gemini(question: str) -> str:
273
+ """
274
+ Performs a deep analysis of a complex question using a powerful shared Gemini model.
275
+ Use this tool for questions that are multifaceted, require deep reasoning,
276
+ or for historical queries where standard search tools might be insufficient after initial attempts.
277
+ This tool directly passes the question to a shared Gemini model for a comprehensive answer.
278
+
279
+ Args:
280
+ question: The complex question to be analyzed.
281
+ Returns:
282
+ A string containing the detailed answer from the shared Gemini model, or an error message.
283
+ """
284
+ logger.info(f"Attempting deep analysis with shared Gemini model ({GEMINI_SHARED_MODEL_NAME}) for question: '{question}'")
285
+ if not gemini_llm:
286
+ return f"Error: Shared Gemini LLM ({GEMINI_SHARED_MODEL_NAME}) not initialized in tools.py."
287
+
288
+ try:
289
+ message = HumanMessage(content=question)
290
+ response = gemini_llm.invoke([message])
291
+ answer = response.content
292
+ logger.info(f"Successfully performed deep analysis with shared Gemini. Answer length: {len(answer)}")
293
+ return answer.strip()
294
+ except Exception as e:
295
+ logger.error(f"Error during deep analysis with shared Gemini ({GEMINI_SHARED_MODEL_NAME}): {e}", exc_info=True)
296
+ return f"Error during deep analysis with shared Gemini: {str(e)}"
297
+
298
+ # Initialize other tools
299
+ search_tool = DuckDuckGoSearchRun()
300
+
301
+ wikipedia_tool = WikipediaQueryRun(api_wrapper=WikipediaAPIWrapper())
302
+
303
+ TOOLS = [
304
+ analyse_image,
305
+ search_tool,
306
+ analyse_audio,
307
+ execute_python_code_from_file,
308
+ wikipedia_tool,
309
+ execute_pandas_script_for_excel,
310
+ analyse_youtube,
311
+ deep_analysis_with_gemini,
312
+ ]
313
+
314
+ logger.info(f"Tools initialized in tools.py: {[tool.name if hasattr(tool, 'name') else tool.__name__ for tool in TOOLS]}")