Final_Assignment_Agents_Course

Running

App Files Files Community

Sonu313131 commited on 5 days ago

Commit

d646f48

verified ·

1 Parent(s): 98f0864

Update app.py

Browse files

Files changed (1) hide show

app.py +84 -107

app.py CHANGED Viewed

@@ -1,183 +1,160 @@
 import os
 import gradio as gr
 import requests
-import inspect
 import pandas as pd
 import asyncio
-from smolagents import ToolCallingAgent, InferenceClientModel, HfApiModel
-from smolagents import DuckDuckGoSearchTool, Tool, CodeAgent
 from huggingface_hub import login
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 login(token=os.environ["HUGGINGFACEHUB_API_TOKEN"])
 search_tool = DuckDuckGoSearchTool()
 async def run_and_submit_all(profile: gr.OAuthProfile | None):
     try:
         agent = CodeAgent(
             tools=[search_tool],
             model=InferenceClientModel(model="mistralai/Magistral-Small-2506"),
             max_steps=5,
-            verbosity_level=2
         )
     except Exception as e:
         return f"Error initializing agent: {e}", None
-    space_id = os.getenv("SPACE_ID")
     agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
-    questions_url = f"{DEFAULT_API_URL}/questions"
     try:
-        response = requests.get(questions_url, timeout=15)
         response.raise_for_status()
         questions_data = response.json()
         if not questions_data:
-            return "Fetched questions list is empty or invalid format.", None
     except Exception as e:
         return f"Error fetching questions: {e}", None
-    results_log = []
     answers_payload = []
     loop = asyncio.get_event_loop()
     for item in questions_data:
         task_id = item.get("task_id")
-        question_text = item.get("question")
-        if not task_id or question_text is None:
             continue
         try:
-            system_prompt = (
-                "You are a general AI assistant. I will ask you a question. "
-                "Report your thoughts, and finish your answer with the following template: "
-                "FINAL ANSWER: [YOUR FINAL ANSWER]. YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. "
-                "If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. "
-                "If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. "
-                "If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.\n\n"
-            )
-            full_prompt = system_prompt + f"Question: {question_text.strip()}"
-            agent_result = await loop.run_in_executor(None, agent, full_prompt)
-            # Try to extract final answer depending on type of result
-            # ✅ CLEAN AND PARSE AGENT OUTPUT
-            # ✅ CLEAN AND PARSE AGENT OUTPUT
             if isinstance(agent_result, dict) and "final_answer" in agent_result:
                 final_answer = str(agent_result["final_answer"]).strip()
             elif isinstance(agent_result, str):
-                output = agent_result.strip()
-                # ✅ Remove any boilerplate text like: "Here is the final answer from your managed agent ..."
-                if "Here is the final answer from your managed agent" in output:
-                    output = output.split(":", 1)[-1].strip()
-                # ✅ Extract only the text after "FINAL ANSWER:"
-                if "FINAL ANSWER:" in output:
-                    _, final_answer = output.rsplit("FINAL ANSWER:", 1)
                     final_answer = final_answer.strip()
                 else:
-                    final_answer = output  # fallback
             else:
                 final_answer = str(agent_result).strip()
-            answers_payload.append({
-                "task_id": task_id,
-                "model_answer": final_answer
-            })
-            results_log.append({
-                "Task ID": task_id,
-                "Question": question_text,
-                "Submitted Answer": final_answer
-            })
-        except Exception as e:
-            print(f"Error running agent on task {task_id}: {e}")
-            results_log.append({
-                "Task ID": task_id,
-                "Question": question_text,
-                "Submitted Answer": f"AGENT ERROR: {e}"
-            })
-    if not answers_payload:
-        return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
     username = profile.username if profile else "unknown"
-    submit_url = f"{DEFAULT_API_URL}/submit"
-    cleaned_answers = []
-    for entry in answers_payload:
-        if isinstance(entry.get("task_id"), str) and isinstance(entry.get("model_answer"), str):
-            cleaned_answers.append(entry)
-        else:
-            print(f"[WARNING] Skipping invalid answer: {entry}")
-    # ✅ PREPARE SUBMISSION DATA
     submission_data = {
         "username": username.strip(),
         "agent_code": agent_code,
-        "answers": cleaned_answers
     }
-    # ✅ DEBUG PRINT
-    import json
-    print("[DEBUG] Submission Payload:")
-    print(json.dumps(submission_data, indent=2))
-    # ✅ SUBMIT
     try:
-        response = requests.post(submit_url, json=submission_data, timeout=60)
         response.raise_for_status()
         result_data = response.json()
         final_status = (
-            f"Submission Successful!\n"
             f"User: {result_data.get('username')}\n"
-            f"Overall Score: {result_data.get('score', 'N/A')}% "
-            f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
-            f"Message: {result_data.get('message', 'No message received.')}"
         )
-        results_df = pd.DataFrame(results_log)
-        return final_status, results_df
     except Exception as e:
-        status_message = f"Submission Failed: {e}"
-        results_df = pd.DataFrame(results_log)
-        return status_message, results_df
 with gr.Blocks() as demo:
-    gr.Markdown("# Basic Agent Evaluation Runner")
     gr.Markdown("""
     **Instructions:**
-    1. Clone this space and define your agent logic.
-    2. Log in to your Hugging Face account.
-    3. Click 'Run Evaluation & Submit All Answers'.
-    ---
-    **Note:**
-    The run may take time. Async is now used to improve responsiveness.
     """)
     gr.LoginButton()
     run_button = gr.Button("Run Evaluation & Submit All Answers")
-    status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
-    results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
     run_button.click(fn=run_and_submit_all, outputs=[status_output, results_table])
-if __name__ == "__main__":
-    print("\n" + "-"*30 + " App Starting " + "-"*30)
-    space_host_startup = os.getenv("SPACE_HOST")
-    space_id_startup = os.getenv("SPACE_ID")
-    if space_host_startup:
-        print(f"✅ SPACE_HOST: https://{space_host_startup}.hf.space")
-    if space_id_startup:
-        print(f"✅ SPACE_ID: https://huggingface.co/spaces/{space_id_startup}")
-    print("Launching Gradio Interface...")
-    demo.launch(debug=True, share=False)

 import os
 import gradio as gr
 import requests
 import pandas as pd
 import asyncio
+import json
+import concurrent.futures
 from huggingface_hub import login
+from smolagents import CodeAgent, InferenceClientModel, DuckDuckGoSearchTool
+# --- Constants ---
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
+QUESTIONS_URL = f"{DEFAULT_API_URL}/questions"
+SUBMIT_URL = f"{DEFAULT_API_URL}/submit"
+# --- Hugging Face Login ---
 login(token=os.environ["HUGGINGFACEHUB_API_TOKEN"])
+# --- Define Tools ---
 search_tool = DuckDuckGoSearchTool()
+# --- Main Function ---
 async def run_and_submit_all(profile: gr.OAuthProfile | None):
+    # Initialize Agent
     try:
         agent = CodeAgent(
             tools=[search_tool],
             model=InferenceClientModel(model="mistralai/Magistral-Small-2506"),
             max_steps=5,
+            verbosity_level=2
         )
     except Exception as e:
         return f"Error initializing agent: {e}", None
+    # Get Space ID for agent_code link
+    space_id = os.getenv("SPACE_ID", "unknown")
     agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
+    # Fetch questions
     try:
+        response = requests.get(QUESTIONS_URL, timeout=15)
         response.raise_for_status()
         questions_data = response.json()
         if not questions_data:
+            return "No questions received.", None
     except Exception as e:
         return f"Error fetching questions: {e}", None
+    # Prepare results
     answers_payload = []
+    results_log = []
     loop = asyncio.get_event_loop()
     for item in questions_data:
         task_id = item.get("task_id")
+        question = item.get("question")
+        if not task_id or not question:
             continue
+        system_prompt = (
+            "You are a general AI assistant. I will ask you a question. "
+            "Report your thoughts, and finish your answer with the following template: "
+            "FINAL ANSWER: [YOUR FINAL ANSWER]. YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. "
+            "If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. "
+            "If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. "
+            "If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.\n\n"
+        )
+        prompt = system_prompt + f"Question: {question.strip()}"
+        # Run agent with timeout
         try:
+            with concurrent.futures.ThreadPoolExecutor() as executor:
+                future = executor.submit(agent, prompt)
+                agent_result = await loop.run_in_executor(None, future.result, 60)  # timeout=60s
+            # Clean model output
             if isinstance(agent_result, dict) and "final_answer" in agent_result:
                 final_answer = str(agent_result["final_answer"]).strip()
             elif isinstance(agent_result, str):
+                response_text = agent_result.strip()
+                # Remove known boilerplate
+                if "Here is the final answer from your managed agent" in response_text:
+                    response_text = response_text.split(":", 1)[-1].strip()
+                # Extract final answer
+                if "FINAL ANSWER:" in response_text:
+                    _, final_answer = response_text.rsplit("FINAL ANSWER:", 1)
                     final_answer = final_answer.strip()
                 else:
+                    final_answer = response_text
             else:
                 final_answer = str(agent_result).strip()
+        except Exception as e:
+            print(f"[ERROR] Task {task_id} failed: {e}")
+            final_answer = f"AGENT ERROR: {e}"
+        answers_payload.append({"task_id": task_id, "model_answer": final_answer})
+        results_log.append({"Task ID": task_id, "Question": question, "Submitted Answer": final_answer})
+    # Clean invalid entries
+    valid_answers = [a for a in answers_payload if isinstance(a["task_id"], str) and isinstance(a["model_answer"], str)]
+    if not valid_answers:
+        return "Agent produced no valid answers.", pd.DataFrame(results_log)
+    # Prepare submission
     username = profile.username if profile else "unknown"
     submission_data = {
         "username": username.strip(),
         "agent_code": agent_code,
+        "answers": valid_answers
     }
+    print("[DEBUG] Submission Payload:\n", json.dumps(submission_data, indent=2))
     try:
+        response = requests.post(SUBMIT_URL, json=submission_data, timeout=60)
         response.raise_for_status()
         result_data = response.json()
         final_status = (
+            f"✅ Submission Successful\n"
             f"User: {result_data.get('username')}\n"
+            f"Score: {result_data.get('score', 'N/A')}% "
+            f"({result_data.get('correct_count')}/{result_data.get('total_attempted')})\n"
+            f"Message: {result_data.get('message', 'No message.')}"
         )
+        return final_status, pd.DataFrame(results_log)
     except Exception as e:
+        return f"Submission Failed: {e}", pd.DataFrame(results_log)
+# --- Gradio UI ---
 with gr.Blocks() as demo:
+    gr.Markdown("# Agent Evaluation Interface")
     gr.Markdown("""
     **Instructions:**
+    1. Clone and customize the agent logic.
+    2. Log in to Hugging Face.
+    3. Click "Run Evaluation" to test and submit your answers.
     """)
     gr.LoginButton()
     run_button = gr.Button("Run Evaluation & Submit All Answers")
+    status_output = gr.Textbox(label="Status", lines=5, interactive=False)
+    results_table = gr.DataFrame(label="Agent Answers", wrap=True)
     run_button.click(fn=run_and_submit_all, outputs=[status_output, results_table])
+# --- App Launch ---
+if __name__ == "__main__":
+    print("\n--- Launching Gradio Space ---")
+    print(f"✅ SPACE_HOST: {os.getenv('SPACE_HOST')}")
+    print(f"✅ SPACE_ID: {os.getenv('SPACE_ID')}")
+    demo.launch(debug=True, share=False)