Spaces:

schoolkithub
/

choko

Running

App Files Files Community

choko / app.py

schoolkithub

Update app.py

d6e0d11 verified 12 days ago

raw

history blame contribute delete

10.3 kB

	import os
	import re
	import gradio as gr
	import requests
	import pandas as pd
	from huggingface_hub import InferenceClient
	from duckduckgo_search import DDGS
	import wikipediaapi
	from bs4 import BeautifulSoup
	import pdfplumber
	import pytube

	# === CONFIG ===
	DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
	HF_TOKEN = os.environ.get("HF_TOKEN")

	ADVANCED_MODELS = [
	"deepseek-ai/DeepSeek-R1",
	"deepseek-ai/DeepSeek-V2-Chat",
	"Qwen/Qwen2-72B-Instruct",
	"mistralai/Mixtral-8x22B-Instruct-v0.1",
	"meta-llama/Meta-Llama-3-70B-Instruct"
	]

	wiki_api = wikipediaapi.Wikipedia(language="en", user_agent="SmartAgent/1.0 (chockqoteewy@gmail.com)")

	# === UTILS ===
	def extract_links(text):
	if not text:
	return []
	url_pattern = re.compile(r'(https?://[^\s\)\],]+)')
	return url_pattern.findall(text)

	def download_file(url, out_dir="tmp_files"):
	os.makedirs(out_dir, exist_ok=True)
	filename = url.split("/")[-1].split("?")[0]
	local_path = os.path.join(out_dir, filename)
	try:
	r = requests.get(url, timeout=30)
	r.raise_for_status()
	with open(local_path, "wb") as f:
	f.write(r.content)
	return local_path
	except Exception:
	return None

	def summarize_excel(file_path):
	try:
	df = pd.read_excel(file_path)
	# Heuristic: Sum column with "total" or "sales" in name, excluding drinks
	df.columns = [col.lower() for col in df.columns]
	item_col = next((col for col in df.columns if "item" in col or "menu" in col), None)
	total_col = next((col for col in df.columns if "total" in col or "sales" in col or "amount" in col), None)
	if not item_col or not total_col:
	return f"Excel columns: {', '.join(df.columns)}. Could not find item/total columns."
	df_food = df[~df[item_col].str.lower().str.contains("drink\|beverage\|soda\|juice", na=False)]
	total = df_food[total_col].astype(float).sum()
	return f"{total:.2f}"
	except Exception as e:
	return f"Excel error: {e}"

	def summarize_csv(file_path):
	try:
	df = pd.read_csv(file_path)
	# Same logic as summarize_excel
	df.columns = [col.lower() for col in df.columns]
	item_col = next((col for col in df.columns if "item" in col or "menu" in col), None)
	total_col = next((col for col in df.columns if "total" in col or "sales" in col or "amount" in col), None)
	if not item_col or not total_col:
	return f"CSV columns: {', '.join(df.columns)}. Could not find item/total columns."
	df_food = df[~df[item_col].str.lower().str.contains("drink\|beverage\|soda\|juice", na=False)]
	total = df_food[total_col].astype(float).sum()
	return f"{total:.2f}"
	except Exception as e:
	return f"CSV error: {e}"

	def summarize_pdf(file_path):
	try:
	with pdfplumber.open(file_path) as pdf:
	first_page = pdf.pages[0].extract_text()
	return f"PDF text sample: {first_page[:1000]}"
	except Exception as e:
	return f"PDF error: {e}"

	def summarize_txt(file_path):
	try:
	with open(file_path, encoding='utf-8') as f:
	txt = f.read()
	return f"TXT file sample: {txt[:1000]}"
	except Exception as e:
	return f"TXT error: {e}"

	def analyze_file(file_path):
	file_path = file_path.lower()
	if file_path.endswith((".xlsx", ".xls")):
	return summarize_excel(file_path)
	elif file_path.endswith(".csv"):
	return summarize_csv(file_path)
	elif file_path.endswith(".pdf"):
	return summarize_pdf(file_path)
	elif file_path.endswith(".txt"):
	return summarize_txt(file_path)
	else:
	return f"Unsupported file type: {file_path}"

	def analyze_webpage(url):
	try:
	r = requests.get(url, timeout=20)
	soup = BeautifulSoup(r.text, "lxml")
	title = soup.title.string if soup.title else "No title"
	paragraphs = [p.get_text() for p in soup.find_all("p")]
	article_sample = "\n".join(paragraphs[:5])
	return f"Webpage Title: {title}\nContent sample:\n{article_sample[:1000]}"
	except Exception as e:
	return f"Webpage error: {e}"

	def analyze_youtube(url):
	try:
	yt = pytube.YouTube(url)
	captions = yt.captions.get_by_language_code('en')
	if captions:
	text = captions.generate_srt_captions()
	return f"YouTube Transcript sample: {text[:800]}"
	else:
	return f"No English captions found for {url}"
	except Exception as e:
	return f"YouTube error: {e}"

	def duckduckgo_search(query):
	try:
	with DDGS() as ddgs:
	results = [r for r in ddgs.text(query, max_results=3)]
	bodies = [r.get("body", "") for r in results if r.get("body")]
	return "\n".join(bodies) if bodies else None
	except Exception:
	return None

	def wikipedia_search(query):
	try:
	page = wiki_api.page(query)
	if page.exists() and page.summary:
	return page.summary
	except Exception:
	return None
	return None

	def llm_conversational(query):
	for model_id in ADVANCED_MODELS:
	try:
	hf_client = InferenceClient(model_id, token=HF_TOKEN)
	result = hf_client.conversational(
	messages=[{"role": "user", "content": query}],
	max_new_tokens=384,
	)
	if isinstance(result, dict) and "generated_text" in result:
	return result["generated_text"]
	elif hasattr(result, "generated_text"):
	return result.generated_text
	elif isinstance(result, str):
	return result
	except Exception:
	continue
	return "LLM error: No advanced conversational models succeeded."

	# === TASK-SPECIFIC HANDLERS (expandable) ===
	def handle_grocery_vegetables(question):
	"""Extract vegetables from a list in the question."""
	match = re.search(r"list I have so far: (.*)", question)
	if not match:
	return "Could not parse item list."
	items = [i.strip().lower() for i in match.group(1).split(",")]
	vegetables = [
	"broccoli", "celery", "lettuce", "zucchini", "green beans", "sweet potatoes", "bell pepper"
	]
	result = sorted([item for item in items if item in vegetables])
	return ", ".join(result)

	# === MAIN AGENT ===
	class SmartAgent:
	def __call__(self, question: str) -> str:
	# Task: Grocery vegetables
	if "vegetables" in question.lower() and "categorize" in question.lower():
	return handle_grocery_vegetables(question)
	# Download and analyze any file links
	links = extract_links(question)
	for url in links:
	if url.endswith((".xlsx", ".xls", ".csv", ".pdf", ".txt")):
	local = download_file(url)
	if local:
	return analyze_file(local)
	elif "youtube.com" in url or "youtu.be" in url:
	return analyze_youtube(url)
	else:
	return analyze_webpage(url)
	# Wikipedia
	wiki_result = wikipedia_search(question)
	if wiki_result:
	return wiki_result
	# DuckDuckGo
	ddg_result = duckduckgo_search(question)
	if ddg_result:
	return ddg_result
	# Top LLMs
	return llm_conversational(question)

	# === SUBMISSION LOGIC ===
	def run_and_submit_all(profile: gr.OAuthProfile \| None):
	space_id = os.getenv("SPACE_ID")
	if profile:
	username = profile.username
	else:
	return "Please Login to Hugging Face with the button.", None

	questions_url = f"{DEFAULT_API_URL}/questions"
	submit_url = f"{DEFAULT_API_URL}/submit"

	agent = SmartAgent()
	agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"

	try:
	response = requests.get(questions_url, timeout=20)
	response.raise_for_status()
	questions_data = response.json()
	except Exception as e:
	return f"Error fetching questions: {e}", None

	results_log = []
	answers_payload = []

	for item in questions_data:
	task_id = item.get("task_id")
	question_text = item.get("question")
	if not task_id or not question_text:
	continue
	submitted_answer = agent(question_text)
	answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
	results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})

	if not answers_payload:
	return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)

	submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}

	try:
	response = requests.post(submit_url, json=submission_data, timeout=90)
	response.raise_for_status()
	result_data = response.json()
	final_status = (
	f"Submission Successful!\n"
	f"User: {result_data.get('username')}\n"
	f"Overall Score: {result_data.get('score', 'N/A')}% "
	f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
	f"Message: {result_data.get('message', 'No message received.')}"
	)
	results_df = pd.DataFrame(results_log)
	return final_status, results_df
	except Exception as e:
	return f"Submission Failed: {e}", pd.DataFrame(results_log)

	# === GRADIO UI ===
	with gr.Blocks() as demo:
	gr.Markdown("# Smart Agent Evaluation Runner")
	gr.Markdown("""
	Instructions:
	1. Clone this space, define your agent logic, tools, packages, etc.
	2. Log in to Hugging Face.
	3. Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
	""")
	gr.LoginButton()
	run_button = gr.Button("Run Evaluation & Submit All Answers")
	status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
	results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
	run_button.click(fn=run_and_submit_all, outputs=[status_output, results_table])

	if __name__ == "__main__":
	demo.launch(debug=True, share=False)