Sonu313131 commited on
Commit
d646f48
·
verified ·
1 Parent(s): 98f0864

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +84 -107
app.py CHANGED
@@ -1,183 +1,160 @@
1
  import os
2
  import gradio as gr
3
  import requests
4
- import inspect
5
  import pandas as pd
6
  import asyncio
7
- from smolagents import ToolCallingAgent, InferenceClientModel, HfApiModel
8
- from smolagents import DuckDuckGoSearchTool, Tool, CodeAgent
9
  from huggingface_hub import login
 
10
 
 
11
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 
 
12
 
 
13
  login(token=os.environ["HUGGINGFACEHUB_API_TOKEN"])
14
 
 
15
  search_tool = DuckDuckGoSearchTool()
16
 
 
 
17
  async def run_and_submit_all(profile: gr.OAuthProfile | None):
 
18
  try:
19
  agent = CodeAgent(
20
  tools=[search_tool],
21
  model=InferenceClientModel(model="mistralai/Magistral-Small-2506"),
22
  max_steps=5,
23
- verbosity_level=2
24
  )
25
  except Exception as e:
26
  return f"Error initializing agent: {e}", None
27
 
28
- space_id = os.getenv("SPACE_ID")
 
29
  agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
30
 
31
- questions_url = f"{DEFAULT_API_URL}/questions"
32
  try:
33
- response = requests.get(questions_url, timeout=15)
34
  response.raise_for_status()
35
  questions_data = response.json()
36
  if not questions_data:
37
- return "Fetched questions list is empty or invalid format.", None
38
  except Exception as e:
39
  return f"Error fetching questions: {e}", None
40
 
41
- results_log = []
42
  answers_payload = []
 
43
  loop = asyncio.get_event_loop()
44
 
45
  for item in questions_data:
46
  task_id = item.get("task_id")
47
- question_text = item.get("question")
48
- if not task_id or question_text is None:
49
  continue
50
 
 
 
 
 
 
 
 
 
 
 
 
51
  try:
52
- system_prompt = (
53
- "You are a general AI assistant. I will ask you a question. "
54
- "Report your thoughts, and finish your answer with the following template: "
55
- "FINAL ANSWER: [YOUR FINAL ANSWER]. YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. "
56
- "If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. "
57
- "If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. "
58
- "If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.\n\n"
59
- )
60
- full_prompt = system_prompt + f"Question: {question_text.strip()}"
61
-
62
- agent_result = await loop.run_in_executor(None, agent, full_prompt)
63
-
64
- # Try to extract final answer depending on type of result
65
- # ✅ CLEAN AND PARSE AGENT OUTPUT
66
- # ✅ CLEAN AND PARSE AGENT OUTPUT
67
  if isinstance(agent_result, dict) and "final_answer" in agent_result:
68
  final_answer = str(agent_result["final_answer"]).strip()
69
-
70
  elif isinstance(agent_result, str):
71
- output = agent_result.strip()
72
-
73
- # Remove any boilerplate text like: "Here is the final answer from your managed agent ..."
74
- if "Here is the final answer from your managed agent" in output:
75
- output = output.split(":", 1)[-1].strip()
76
-
77
- # Extract only the text after "FINAL ANSWER:"
78
- if "FINAL ANSWER:" in output:
79
- _, final_answer = output.rsplit("FINAL ANSWER:", 1)
80
  final_answer = final_answer.strip()
81
  else:
82
- final_answer = output # fallback
83
-
84
  else:
85
  final_answer = str(agent_result).strip()
86
 
 
 
 
87
 
 
 
88
 
89
- answers_payload.append({
90
- "task_id": task_id,
91
- "model_answer": final_answer
92
- })
93
-
94
- results_log.append({
95
- "Task ID": task_id,
96
- "Question": question_text,
97
- "Submitted Answer": final_answer
98
- })
99
 
100
- except Exception as e:
101
- print(f"Error running agent on task {task_id}: {e}")
102
- results_log.append({
103
- "Task ID": task_id,
104
- "Question": question_text,
105
- "Submitted Answer": f"AGENT ERROR: {e}"
106
- })
107
-
108
- if not answers_payload:
109
- return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
110
 
 
111
  username = profile.username if profile else "unknown"
112
- submit_url = f"{DEFAULT_API_URL}/submit"
113
- cleaned_answers = []
114
- for entry in answers_payload:
115
- if isinstance(entry.get("task_id"), str) and isinstance(entry.get("model_answer"), str):
116
- cleaned_answers.append(entry)
117
- else:
118
- print(f"[WARNING] Skipping invalid answer: {entry}")
119
-
120
- # ✅ PREPARE SUBMISSION DATA
121
  submission_data = {
122
  "username": username.strip(),
123
  "agent_code": agent_code,
124
- "answers": cleaned_answers
125
  }
126
-
127
- # DEBUG PRINT
128
- import json
129
- print("[DEBUG] Submission Payload:")
130
- print(json.dumps(submission_data, indent=2))
131
-
132
- # ✅ SUBMIT
133
  try:
134
- response = requests.post(submit_url, json=submission_data, timeout=60)
135
  response.raise_for_status()
136
  result_data = response.json()
 
137
  final_status = (
138
- f"Submission Successful!\n"
139
  f"User: {result_data.get('username')}\n"
140
- f"Overall Score: {result_data.get('score', 'N/A')}% "
141
- f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
142
- f"Message: {result_data.get('message', 'No message received.')}"
143
  )
144
- results_df = pd.DataFrame(results_log)
145
- return final_status, results_df
146
-
147
  except Exception as e:
148
- status_message = f"Submission Failed: {e}"
149
- results_df = pd.DataFrame(results_log)
150
- return status_message, results_df
151
 
 
 
152
  with gr.Blocks() as demo:
153
- gr.Markdown("# Basic Agent Evaluation Runner")
154
  gr.Markdown("""
155
  **Instructions:**
156
- 1. Clone this space and define your agent logic.
157
- 2. Log in to your Hugging Face account.
158
- 3. Click 'Run Evaluation & Submit All Answers'.
159
- ---
160
- **Note:**
161
- The run may take time. Async is now used to improve responsiveness.
162
  """)
163
 
164
  gr.LoginButton()
165
-
166
  run_button = gr.Button("Run Evaluation & Submit All Answers")
167
- status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
168
- results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
169
 
170
  run_button.click(fn=run_and_submit_all, outputs=[status_output, results_table])
171
 
172
- if __name__ == "__main__":
173
- print("\n" + "-"*30 + " App Starting " + "-"*30)
174
- space_host_startup = os.getenv("SPACE_HOST")
175
- space_id_startup = os.getenv("SPACE_ID")
176
 
177
- if space_host_startup:
178
- print(f"✅ SPACE_HOST: https://{space_host_startup}.hf.space")
179
- if space_id_startup:
180
- print(f"✅ SPACE_ID: https://huggingface.co/spaces/{space_id_startup}")
181
-
182
- print("Launching Gradio Interface...")
183
- demo.launch(debug=True, share=False)
 
1
  import os
2
  import gradio as gr
3
  import requests
 
4
  import pandas as pd
5
  import asyncio
6
+ import json
7
+ import concurrent.futures
8
  from huggingface_hub import login
9
+ from smolagents import CodeAgent, InferenceClientModel, DuckDuckGoSearchTool
10
 
11
+ # --- Constants ---
12
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
13
+ QUESTIONS_URL = f"{DEFAULT_API_URL}/questions"
14
+ SUBMIT_URL = f"{DEFAULT_API_URL}/submit"
15
 
16
+ # --- Hugging Face Login ---
17
  login(token=os.environ["HUGGINGFACEHUB_API_TOKEN"])
18
 
19
+ # --- Define Tools ---
20
  search_tool = DuckDuckGoSearchTool()
21
 
22
+
23
+ # --- Main Function ---
24
  async def run_and_submit_all(profile: gr.OAuthProfile | None):
25
+ # Initialize Agent
26
  try:
27
  agent = CodeAgent(
28
  tools=[search_tool],
29
  model=InferenceClientModel(model="mistralai/Magistral-Small-2506"),
30
  max_steps=5,
31
+ verbosity_level=2
32
  )
33
  except Exception as e:
34
  return f"Error initializing agent: {e}", None
35
 
36
+ # Get Space ID for agent_code link
37
+ space_id = os.getenv("SPACE_ID", "unknown")
38
  agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
39
 
40
+ # Fetch questions
41
  try:
42
+ response = requests.get(QUESTIONS_URL, timeout=15)
43
  response.raise_for_status()
44
  questions_data = response.json()
45
  if not questions_data:
46
+ return "No questions received.", None
47
  except Exception as e:
48
  return f"Error fetching questions: {e}", None
49
 
50
+ # Prepare results
51
  answers_payload = []
52
+ results_log = []
53
  loop = asyncio.get_event_loop()
54
 
55
  for item in questions_data:
56
  task_id = item.get("task_id")
57
+ question = item.get("question")
58
+ if not task_id or not question:
59
  continue
60
 
61
+ system_prompt = (
62
+ "You are a general AI assistant. I will ask you a question. "
63
+ "Report your thoughts, and finish your answer with the following template: "
64
+ "FINAL ANSWER: [YOUR FINAL ANSWER]. YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. "
65
+ "If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. "
66
+ "If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. "
67
+ "If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.\n\n"
68
+ )
69
+ prompt = system_prompt + f"Question: {question.strip()}"
70
+
71
+ # Run agent with timeout
72
  try:
73
+ with concurrent.futures.ThreadPoolExecutor() as executor:
74
+ future = executor.submit(agent, prompt)
75
+ agent_result = await loop.run_in_executor(None, future.result, 60) # timeout=60s
76
+
77
+ # Clean model output
 
 
 
 
 
 
 
 
 
 
78
  if isinstance(agent_result, dict) and "final_answer" in agent_result:
79
  final_answer = str(agent_result["final_answer"]).strip()
 
80
  elif isinstance(agent_result, str):
81
+ response_text = agent_result.strip()
82
+
83
+ # Remove known boilerplate
84
+ if "Here is the final answer from your managed agent" in response_text:
85
+ response_text = response_text.split(":", 1)[-1].strip()
86
+
87
+ # Extract final answer
88
+ if "FINAL ANSWER:" in response_text:
89
+ _, final_answer = response_text.rsplit("FINAL ANSWER:", 1)
90
  final_answer = final_answer.strip()
91
  else:
92
+ final_answer = response_text
 
93
  else:
94
  final_answer = str(agent_result).strip()
95
 
96
+ except Exception as e:
97
+ print(f"[ERROR] Task {task_id} failed: {e}")
98
+ final_answer = f"AGENT ERROR: {e}"
99
 
100
+ answers_payload.append({"task_id": task_id, "model_answer": final_answer})
101
+ results_log.append({"Task ID": task_id, "Question": question, "Submitted Answer": final_answer})
102
 
103
+ # Clean invalid entries
104
+ valid_answers = [a for a in answers_payload if isinstance(a["task_id"], str) and isinstance(a["model_answer"], str)]
 
 
 
 
 
 
 
 
105
 
106
+ if not valid_answers:
107
+ return "Agent produced no valid answers.", pd.DataFrame(results_log)
 
 
 
 
 
 
 
 
108
 
109
+ # Prepare submission
110
  username = profile.username if profile else "unknown"
 
 
 
 
 
 
 
 
 
111
  submission_data = {
112
  "username": username.strip(),
113
  "agent_code": agent_code,
114
+ "answers": valid_answers
115
  }
116
+
117
+ print("[DEBUG] Submission Payload:\n", json.dumps(submission_data, indent=2))
118
+
 
 
 
 
119
  try:
120
+ response = requests.post(SUBMIT_URL, json=submission_data, timeout=60)
121
  response.raise_for_status()
122
  result_data = response.json()
123
+
124
  final_status = (
125
+ f"Submission Successful\n"
126
  f"User: {result_data.get('username')}\n"
127
+ f"Score: {result_data.get('score', 'N/A')}% "
128
+ f"({result_data.get('correct_count')}/{result_data.get('total_attempted')})\n"
129
+ f"Message: {result_data.get('message', 'No message.')}"
130
  )
131
+ return final_status, pd.DataFrame(results_log)
132
+
 
133
  except Exception as e:
134
+ return f"Submission Failed: {e}", pd.DataFrame(results_log)
 
 
135
 
136
+
137
+ # --- Gradio UI ---
138
  with gr.Blocks() as demo:
139
+ gr.Markdown("# Agent Evaluation Interface")
140
  gr.Markdown("""
141
  **Instructions:**
142
+ 1. Clone and customize the agent logic.
143
+ 2. Log in to Hugging Face.
144
+ 3. Click "Run Evaluation" to test and submit your answers.
 
 
 
145
  """)
146
 
147
  gr.LoginButton()
 
148
  run_button = gr.Button("Run Evaluation & Submit All Answers")
149
+ status_output = gr.Textbox(label="Status", lines=5, interactive=False)
150
+ results_table = gr.DataFrame(label="Agent Answers", wrap=True)
151
 
152
  run_button.click(fn=run_and_submit_all, outputs=[status_output, results_table])
153
 
 
 
 
 
154
 
155
+ # --- App Launch ---
156
+ if __name__ == "__main__":
157
+ print("\n--- Launching Gradio Space ---")
158
+ print(f"✅ SPACE_HOST: {os.getenv('SPACE_HOST')}")
159
+ print(f"✅ SPACE_ID: {os.getenv('SPACE_ID')}")
160
+ demo.launch(debug=True, share=False)