|
import os |
|
import gradio as gr |
|
import requests |
|
import pandas as pd |
|
from transformers import pipeline |
|
|
|
|
|
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space" |
|
HF_MODEL_NAME = "facebook/bart-large-mnli" |
|
|
|
|
|
class BasicAgent: |
|
def __init__(self): |
|
print("Initializing Agent...") |
|
try: |
|
self.llm = pipeline( |
|
"text-generation", |
|
model=HF_MODEL_NAME, |
|
device_map="auto" |
|
) |
|
except Exception as e: |
|
print(f"LLM initialization failed: {e}") |
|
self.llm = None |
|
|
|
def __call__(self, question: str) -> str: |
|
if not self.llm: |
|
return "Default answer (LLM not available)" |
|
|
|
try: |
|
response = self.llm(question, max_length=100) |
|
return response[0]['generated_text'] |
|
except Exception as e: |
|
return f"Error: {str(e)}" |
|
|
|
def run_and_submit_all(): |
|
"""Simplified version that works with Gradio auth""" |
|
|
|
username = os.getenv("GRADIO_AUTH_USERNAME") |
|
if not username: |
|
return "Please login first", None |
|
|
|
space_id = os.getenv("SPACE_ID") |
|
api_url = DEFAULT_API_URL |
|
agent = BasicAgent() |
|
|
|
|
|
try: |
|
response = requests.get(f"{api_url}/questions", timeout=15) |
|
questions = response.json() |
|
except Exception as e: |
|
return f"Failed to get questions: {str(e)}", None |
|
|
|
|
|
results = [] |
|
answers = [] |
|
for q in questions: |
|
try: |
|
answer = agent(q.get("question", "")) |
|
answers.append({ |
|
"task_id": q.get("task_id"), |
|
"submitted_answer": answer |
|
}) |
|
results.append({ |
|
"Task ID": q.get("task_id"), |
|
"Question": q.get("question"), |
|
"Answer": answer |
|
}) |
|
except Exception as e: |
|
results.append({ |
|
"Task ID": q.get("task_id"), |
|
"Question": q.get("question"), |
|
"Answer": f"Error: {str(e)}" |
|
}) |
|
|
|
|
|
try: |
|
response = requests.post( |
|
f"{api_url}/submit", |
|
json={ |
|
"username": username, |
|
"agent_code": f"https://huggingface.co/spaces/{space_id}", |
|
"answers": answers |
|
}, |
|
timeout=60 |
|
) |
|
result = response.json() |
|
return ( |
|
f"Success! Score: {result.get('score', 'N/A')}%\n" |
|
f"Correct: {result.get('correct_count', 0)}/{result.get('total_attempted', 0)}", |
|
pd.DataFrame(results) |
|
) |
|
except Exception as e: |
|
return f"Submission failed: {str(e)}", pd.DataFrame(results) |
|
|
|
|
|
with gr.Blocks() as demo: |
|
gr.Markdown("# LLM Agent Evaluation") |
|
|
|
with gr.Accordion("Instructions", open=False): |
|
gr.Markdown(""" |
|
1. Click the login button |
|
2. Authorize with your Hugging Face account |
|
3. Click 'Run Evaluation' |
|
""") |
|
|
|
gr.LoginButton() |
|
|
|
run_btn = gr.Button("Run Evaluation", variant="primary") |
|
status = gr.Textbox(label="Status") |
|
results = gr.DataFrame(label="Results", wrap=True) |
|
|
|
run_btn.click( |
|
fn=run_and_submit_all, |
|
outputs=[status, results] |
|
) |
|
|
|
if __name__ == "__main__": |
|
demo.launch(auth_message="Please login with your Hugging Face account") |