raaec commited on
Commit
7ee22fe
·
verified ·
1 Parent(s): 24dfa55

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +292 -131
app.py CHANGED
@@ -1,119 +1,247 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
  import os
2
- import inspect
 
 
3
  import gradio as gr
4
  import requests
5
  import pandas as pd
6
  from langchain_core.messages import HumanMessage
7
- from agent import build_graph
8
 
 
9
 
 
 
 
 
 
 
 
10
 
11
- # (Keep Constants as is)
12
  # --- Constants ---
13
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
14
-
15
- # --- Basic Agent Definition ---
16
- # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
17
 
18
 
19
  class BasicAgent:
20
- """A langgraph agent."""
 
 
 
 
 
 
 
 
 
21
  def __init__(self):
22
- print("BasicAgent initialized.")
 
23
  self.graph = build_graph()
24
-
25
  def __call__(self, question: str) -> str:
26
- print(f"Agent received question (first 50 chars): {question[:50]}...")
 
 
 
 
 
 
 
 
 
 
27
  # Wrap the question in a HumanMessage from langchain_core
28
  messages = [HumanMessage(content=question)]
 
 
29
  messages = self.graph.invoke({"messages": messages})
 
 
30
  answer = messages['messages'][-1].content
31
- return answer[14:]
 
 
32
 
33
 
34
- def run_and_submit_all( profile: gr.OAuthProfile | None):
35
  """
36
- Fetches all questions, runs the BasicAgent on them, submits all answers,
37
- and displays the results.
 
 
 
 
 
 
 
 
38
  """
39
- # --- Determine HF Space Runtime URL and Repo URL ---
40
- space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
41
-
42
- if profile:
43
- username= f"{profile.username}"
44
- print(f"User logged in: {username}")
45
- else:
46
- print("User not logged in.")
47
- return "Please Login to Hugging Face with the button.", None
48
-
49
- api_url = DEFAULT_API_URL
50
  questions_url = f"{api_url}/questions"
51
- submit_url = f"{api_url}/submit"
 
 
 
 
 
 
 
 
 
 
52
 
53
- # 1. Instantiate Agent ( modify this part to create your agent)
54
- try:
55
- agent = BasicAgent()
56
- except Exception as e:
57
- print(f"Error instantiating agent: {e}")
58
- return f"Error initializing agent: {e}", None
59
- # In the case of an app running as a hugging Face space, this link points toward your codebase ( usefull for others so please keep it public)
60
- agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
61
- print(agent_code)
62
 
63
- # 2. Fetch Questions
64
- print(f"Fetching questions from: {questions_url}")
65
- try:
66
- response = requests.get(questions_url, timeout=15)
67
- response.raise_for_status()
68
- questions_data = response.json()
69
- if not questions_data:
70
- print("Fetched questions list is empty.")
71
- return "Fetched questions list is empty or invalid format.", None
72
- print(f"Fetched {len(questions_data)} questions.")
73
- except requests.exceptions.RequestException as e:
74
- print(f"Error fetching questions: {e}")
75
- return f"Error fetching questions: {e}", None
76
- except requests.exceptions.JSONDecodeError as e:
77
- print(f"Error decoding JSON response from questions endpoint: {e}")
78
- print(f"Response text: {response.text[:500]}")
79
- return f"Error decoding server response for questions: {e}", None
80
- except Exception as e:
81
- print(f"An unexpected error occurred fetching questions: {e}")
82
- return f"An unexpected error occurred fetching questions: {e}", None
83
-
84
- # 3. Run your Agent
85
  results_log = []
86
  answers_payload = []
87
- print(f"Running agent on {len(questions_data)} questions...")
 
 
88
  for item in questions_data:
89
  task_id = item.get("task_id")
90
  question_text = item.get("question")
 
91
  if not task_id or question_text is None:
92
- print(f"Skipping item with missing task_id or question: {item}")
93
  continue
 
94
  try:
95
  submitted_answer = agent(question_text)
96
- answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
97
- results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
 
 
 
 
 
 
 
 
 
 
 
 
98
  except Exception as e:
99
- print(f"Error running agent on task {task_id}: {e}")
100
- results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
 
 
 
 
 
 
 
 
101
 
102
- if not answers_payload:
103
- print("Agent did not produce any answers to submit.")
104
- return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
105
 
106
- # 4. Prepare Submission
107
- submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
108
- status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
109
- print(status_update)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
110
 
111
- # 5. Submit
112
- print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
113
  try:
114
- response = requests.post(submit_url, json=submission_data, timeout=60)
115
- response.raise_for_status()
116
- result_data = response.json()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
117
  final_status = (
118
  f"Submission Successful!\n"
119
  f"User: {result_data.get('username')}\n"
@@ -121,86 +249,119 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
121
  f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
122
  f"Message: {result_data.get('message', 'No message received.')}"
123
  )
124
- print("Submission successful.")
125
  results_df = pd.DataFrame(results_log)
126
  return final_status, results_df
 
127
  except requests.exceptions.HTTPError as e:
 
128
  error_detail = f"Server responded with status {e.response.status_code}."
129
  try:
130
  error_json = e.response.json()
131
  error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
132
  except requests.exceptions.JSONDecodeError:
133
  error_detail += f" Response: {e.response.text[:500]}"
 
134
  status_message = f"Submission Failed: {error_detail}"
135
- print(status_message)
136
- results_df = pd.DataFrame(results_log)
 
137
  return status_message, results_df
 
138
  except requests.exceptions.Timeout:
139
- status_message = "Submission Failed: The request timed out."
140
- print(status_message)
141
- results_df = pd.DataFrame(results_log)
142
- return status_message, results_df
143
- except requests.exceptions.RequestException as e:
144
- status_message = f"Submission Failed: Network error - {e}"
145
- print(status_message)
146
- results_df = pd.DataFrame(results_log)
147
  return status_message, results_df
 
148
  except Exception as e:
149
- status_message = f"An unexpected error occurred during submission: {e}"
150
- print(status_message)
151
- results_df = pd.DataFrame(results_log)
 
152
  return status_message, results_df
153
 
154
 
155
- # --- Build Gradio Interface using Blocks ---
156
- with gr.Blocks() as demo:
157
- gr.Markdown("# Basic Agent Evaluation Runner")
158
- gr.Markdown(
159
- """
160
- **Instructions:**
161
- 1. Please clone this space, then modify the code to define your agent's logic, the tools, the necessary packages, etc ...
162
- 2. Log in to your Hugging Face account using the button below. This uses your HF username for submission.
163
- 3. Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
164
- ---
165
- **Disclaimers:**
166
- Once clicking on the "submit button, it can take quite some time ( this is the time for the agent to go through all the questions).
167
- This space provides a basic setup and is intentionally sub-optimal to encourage you to develop your own, more robust solution. For instance for the delay process of the submit button, a solution could be to cache the answers and submit in a seperate action or even to answer the questions in async.
168
- """
169
- )
 
 
 
 
 
 
 
 
 
 
 
170
 
171
- gr.LoginButton()
172
 
173
- run_button = gr.Button("Run Evaluation & Submit All Answers")
 
 
 
 
 
 
 
 
 
174
 
175
- status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
176
- # Removed max_rows=10 from DataFrame constructor
177
- results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
 
 
 
178
 
179
- run_button.click(
180
- fn=run_and_submit_all,
181
- outputs=[status_output, results_table]
182
- )
183
 
184
- if __name__ == "__main__":
185
- print("\n" + "-"*30 + " App Starting " + "-"*30)
186
- # Check for SPACE_HOST and SPACE_ID at startup for information
187
- space_host_startup = os.getenv("SPACE_HOST")
188
- space_id_startup = os.getenv("SPACE_ID") # Get SPACE_ID at startup
189
-
190
- if space_host_startup:
191
- print(f"SPACE_HOST found: {space_host_startup}")
192
- print(f" Runtime URL should be: https://{space_host_startup}.hf.space")
 
 
193
  else:
194
- print("ℹ️ SPACE_HOST environment variable not found (running locally?).")
195
-
196
- if space_id_startup: # Print repo URLs if SPACE_ID is found
197
- print(f"SPACE_ID found: {space_id_startup}")
198
- print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
199
- print(f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
 
 
200
  else:
201
- print("ℹ️ SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
 
 
202
 
203
- print("-"*(60 + len(" App Starting ")) + "\n")
204
 
205
- print("Launching Gradio Interface for Basic Agent Evaluation...")
 
 
 
 
 
 
206
  demo.launch(debug=True, share=False)
 
1
+ """
2
+ Agent Evaluation Runner
3
+ ======================
4
+ This module implements a framework for evaluating LLM agents against a set of questions
5
+ and submitting the results to a scoring server.
6
+
7
+ Main components:
8
+ - BasicAgent: The agent implementation that processes questions
9
+ - Evaluation functions: For running and submitting results
10
+ - Gradio interface: For user interaction
11
+ """
12
+
13
  import os
14
+ import logging
15
+ from typing import Tuple, List, Dict, Any, Optional
16
+
17
  import gradio as gr
18
  import requests
19
  import pandas as pd
20
  from langchain_core.messages import HumanMessage
 
21
 
22
+ from agent import build_graph
23
 
24
+ # Configure logging
25
+ logging.basicConfig(
26
+ level=logging.INFO,
27
+ format="%(asctime)s - %(levelname)s - %(message)s",
28
+ datefmt="%Y-%m-%d %H:%M:%S"
29
+ )
30
+ logger = logging.getLogger(__name__)
31
 
 
32
  # --- Constants ---
33
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
34
+ REQUEST_TIMEOUT = 60 # seconds
 
 
35
 
36
 
37
  class BasicAgent:
38
+ """
39
+ A LangGraph-based agent that answers questions using a graph-based workflow.
40
+
41
+ This agent takes natural language questions, processes them through a
42
+ predefined graph workflow, and returns the answer.
43
+
44
+ Attributes:
45
+ graph: The LangGraph workflow that processes the questions
46
+ """
47
+
48
  def __init__(self):
49
+ """Initialize the agent with a graph-based workflow."""
50
+ logger.info("Initializing BasicAgent")
51
  self.graph = build_graph()
52
+
53
  def __call__(self, question: str) -> str:
54
+ """
55
+ Process a question and return an answer.
56
+
57
+ Args:
58
+ question: The natural language question to process
59
+
60
+ Returns:
61
+ The agent's answer to the question
62
+ """
63
+ logger.info(f"Processing question (first 50 chars): {question[:50]}...")
64
+
65
  # Wrap the question in a HumanMessage from langchain_core
66
  messages = [HumanMessage(content=question)]
67
+
68
+ # Process through the graph
69
  messages = self.graph.invoke({"messages": messages})
70
+
71
+ # Extract and clean the answer
72
  answer = messages['messages'][-1].content
73
+
74
+ # Remove the "FINAL ANSWER:" prefix if present
75
+ return answer[14:] if answer.startswith("FINAL ANSWER:") else answer
76
 
77
 
78
+ def fetch_questions(api_url: str) -> List[Dict[str, Any]]:
79
  """
80
+ Fetch questions from the evaluation server.
81
+
82
+ Args:
83
+ api_url: Base URL of the evaluation API
84
+
85
+ Returns:
86
+ List of question data dictionaries
87
+
88
+ Raises:
89
+ requests.exceptions.RequestException: If there's an error fetching questions
90
  """
 
 
 
 
 
 
 
 
 
 
 
91
  questions_url = f"{api_url}/questions"
92
+ logger.info(f"Fetching questions from: {questions_url}")
93
+
94
+ response = requests.get(questions_url, timeout=REQUEST_TIMEOUT)
95
+ response.raise_for_status()
96
+
97
+ questions_data = response.json()
98
+ if not questions_data:
99
+ raise ValueError("Fetched questions list is empty or invalid format")
100
+
101
+ logger.info(f"Successfully fetched {len(questions_data)} questions")
102
+ return questions_data
103
 
 
 
 
 
 
 
 
 
 
104
 
105
+ def run_agent_on_questions(
106
+ agent: BasicAgent,
107
+ questions_data: List[Dict[str, Any]]
108
+ ) -> Tuple[List[Dict[str, Any]], List[Dict[str, Any]]]:
109
+ """
110
+ Run the agent on a list of questions.
111
+
112
+ Args:
113
+ agent: The agent to run
114
+ questions_data: List of question data dictionaries
115
+
116
+ Returns:
117
+ Tuple of (answers_payload, results_log)
118
+ """
 
 
 
 
 
 
 
 
119
  results_log = []
120
  answers_payload = []
121
+
122
+ logger.info(f"Running agent on {len(questions_data)} questions...")
123
+
124
  for item in questions_data:
125
  task_id = item.get("task_id")
126
  question_text = item.get("question")
127
+
128
  if not task_id or question_text is None:
129
+ logger.warning(f"Skipping item with missing task_id or question: {item}")
130
  continue
131
+
132
  try:
133
  submitted_answer = agent(question_text)
134
+
135
+ # Prepare answer for submission
136
+ answers_payload.append({
137
+ "task_id": task_id,
138
+ "submitted_answer": submitted_answer
139
+ })
140
+
141
+ # Log result for display
142
+ results_log.append({
143
+ "Task ID": task_id,
144
+ "Question": question_text,
145
+ "Submitted Answer": submitted_answer
146
+ })
147
+
148
  except Exception as e:
149
+ logger.error(f"Error running agent on task {task_id}: {e}", exc_info=True)
150
+
151
+ # Log error in results
152
+ results_log.append({
153
+ "Task ID": task_id,
154
+ "Question": question_text,
155
+ "Submitted Answer": f"AGENT ERROR: {e}"
156
+ })
157
+
158
+ return answers_payload, results_log
159
 
 
 
 
160
 
161
+ def submit_answers(
162
+ api_url: str,
163
+ username: str,
164
+ agent_code: str,
165
+ answers_payload: List[Dict[str, Any]]
166
+ ) -> Dict[str, Any]:
167
+ """
168
+ Submit answers to the evaluation server.
169
+
170
+ Args:
171
+ api_url: Base URL of the evaluation API
172
+ username: Hugging Face username
173
+ agent_code: URL to the agent code repository
174
+ answers_payload: List of answer dictionaries
175
+
176
+ Returns:
177
+ Response data from the server
178
+
179
+ Raises:
180
+ requests.exceptions.RequestException: If there's an error during submission
181
+ """
182
+ submit_url = f"{api_url}/submit"
183
+
184
+ # Prepare submission data
185
+ submission_data = {
186
+ "username": username.strip(),
187
+ "agent_code": agent_code,
188
+ "answers": answers_payload
189
+ }
190
+
191
+ logger.info(f"Submitting {len(answers_payload)} answers to: {submit_url}")
192
+
193
+ # Submit answers
194
+ response = requests.post(submit_url, json=submission_data, timeout=REQUEST_TIMEOUT)
195
+ response.raise_for_status()
196
+
197
+ result_data = response.json()
198
+ logger.info("Submission successful")
199
+
200
+ return result_data
201
+
202
 
203
+ def run_and_submit_all(profile: Optional[gr.OAuthProfile] = None) -> Tuple[str, pd.DataFrame]:
204
+ """
205
+ Fetches all questions, runs the BasicAgent on them, submits all answers,
206
+ and displays the results.
207
+
208
+ Args:
209
+ profile: Gradio OAuth profile containing user information
210
+
211
+ Returns:
212
+ Tuple of (status_message, results_dataframe)
213
+ """
214
+ # Check if user is logged in
215
+ if not profile:
216
+ logger.warning("User not logged in")
217
+ return "Please Login to Hugging Face with the button.", None
218
+
219
+ username = profile.username
220
+ logger.info(f"User logged in: {username}")
221
+
222
+ # Get the space ID for linking to code
223
+ space_id = os.getenv("SPACE_ID")
224
+ api_url = DEFAULT_API_URL
225
+ agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
226
+
227
  try:
228
+ # 1. Instantiate Agent
229
+ agent = BasicAgent()
230
+
231
+ # 2. Fetch Questions
232
+ questions_data = fetch_questions(api_url)
233
+
234
+ # 3. Run Agent on Questions
235
+ answers_payload, results_log = run_agent_on_questions(agent, questions_data)
236
+
237
+ if not answers_payload:
238
+ logger.warning("Agent did not produce any answers to submit")
239
+ return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
240
+
241
+ # 4. Submit Answers
242
+ result_data = submit_answers(api_url, username, agent_code, answers_payload)
243
+
244
+ # 5. Format and Return Results
245
  final_status = (
246
  f"Submission Successful!\n"
247
  f"User: {result_data.get('username')}\n"
 
249
  f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
250
  f"Message: {result_data.get('message', 'No message received.')}"
251
  )
252
+
253
  results_df = pd.DataFrame(results_log)
254
  return final_status, results_df
255
+
256
  except requests.exceptions.HTTPError as e:
257
+ # Handle HTTP errors with detailed error information
258
  error_detail = f"Server responded with status {e.response.status_code}."
259
  try:
260
  error_json = e.response.json()
261
  error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
262
  except requests.exceptions.JSONDecodeError:
263
  error_detail += f" Response: {e.response.text[:500]}"
264
+
265
  status_message = f"Submission Failed: {error_detail}"
266
+ logger.error(status_message)
267
+
268
+ results_df = pd.DataFrame(results_log if 'results_log' in locals() else [])
269
  return status_message, results_df
270
+
271
  except requests.exceptions.Timeout:
272
+ status_message = f"Submission Failed: The request timed out after {REQUEST_TIMEOUT} seconds"
273
+ logger.error(status_message)
274
+
275
+ results_df = pd.DataFrame(results_log if 'results_log' in locals() else [])
 
 
 
 
276
  return status_message, results_df
277
+
278
  except Exception as e:
279
+ status_message = f"An unexpected error occurred: {str(e)}"
280
+ logger.error(status_message, exc_info=True)
281
+
282
+ results_df = pd.DataFrame(results_log if 'results_log' in locals() else [])
283
  return status_message, results_df
284
 
285
 
286
+ def create_gradio_interface() -> gr.Blocks:
287
+ """
288
+ Create and configure the Gradio interface.
289
+
290
+ Returns:
291
+ Configured Gradio Blocks interface
292
+ """
293
+ with gr.Blocks() as demo:
294
+ gr.Markdown("# Agent Evaluation Runner")
295
+ gr.Markdown(
296
+ """
297
+ ## Instructions
298
+
299
+ 1. **Clone this space** and modify the code to define your agent's logic, tools, and dependencies
300
+ 2. **Log in to your Hugging Face account** using the button below (required for submission)
301
+ 3. **Run Evaluation** to fetch questions, run your agent, and submit answers
302
+
303
+ ## Important Notes
304
+
305
+ - The evaluation process may take several minutes to complete
306
+ - This agent framework is intentionally minimal to allow for your own improvements
307
+ - Consider implementing caching or async processing for better performance
308
+ """
309
+ )
310
+
311
+ gr.LoginButton()
312
 
313
+ run_button = gr.Button("Run Evaluation & Submit All Answers", variant="primary")
314
 
315
+ status_output = gr.Textbox(
316
+ label="Run Status / Submission Result",
317
+ lines=5,
318
+ interactive=False
319
+ )
320
+
321
+ results_table = gr.DataFrame(
322
+ label="Questions and Agent Answers",
323
+ wrap=True
324
+ )
325
 
326
+ run_button.click(
327
+ fn=run_and_submit_all,
328
+ outputs=[status_output, results_table]
329
+ )
330
+
331
+ return demo
332
 
 
 
 
 
333
 
334
+ def check_environment() -> None:
335
+ """
336
+ Check and log environment variables at startup.
337
+ """
338
+ logger.info("-" * 30 + " App Starting " + "-" * 30)
339
+
340
+ # Check for SPACE_HOST
341
+ space_host = os.getenv("SPACE_HOST")
342
+ if space_host:
343
+ logger.info(f"✅ SPACE_HOST found: {space_host}")
344
+ logger.info(f" Runtime URL should be: https://{space_host}.hf.space")
345
  else:
346
+ logger.info("ℹ️ SPACE_HOST environment variable not found (running locally?).")
347
+
348
+ # Check for SPACE_ID
349
+ space_id = os.getenv("SPACE_ID")
350
+ if space_id:
351
+ logger.info(f" SPACE_ID found: {space_id}")
352
+ logger.info(f" Repo URL: https://huggingface.co/spaces/{space_id}")
353
+ logger.info(f" Repo Tree URL: https://huggingface.co/spaces/{space_id}/tree/main")
354
  else:
355
+ logger.info("ℹ️ SPACE_ID environment variable not found (running locally?).")
356
+
357
+ logger.info("-" * (60 + len(" App Starting ")) + "\n")
358
 
 
359
 
360
+ if __name__ == "__main__":
361
+ # Check environment at startup
362
+ check_environment()
363
+
364
+ # Create and launch Gradio interface
365
+ logger.info("Launching Gradio Interface for Agent Evaluation...")
366
+ demo = create_gradio_interface()
367
  demo.launch(debug=True, share=False)