Spaces:

ZennyKenny
/

DocsSearch

Build error

App Files Files Community

DocsSearch / app.py

ZennyKenny

Update app.py

0bb000c verified 10 months ago

raw

history blame contribute delete

2.7 kB

	import streamlit as st
	import requests
	from bs4 import BeautifulSoup
	import trafilatura
	from smolagents import create_agent

	# Streamlit UI
	def main():
	st.set_page_config(page_title="AI Documentation Assistant", layout="wide")
	st.title("📖 AI Documentation Assistant")

	st.write("Enter the top-level URL of your documentation, and I'll find the most relevant article to answer your question.")

	# User input
	doc_url = st.text_input("🔗 Documentation URL (Homepage)", "https://example.com/docs")
	user_question = st.text_area("❓ Your Question", "How do I reset my password?")

	if st.button("🔍 Find Answer"):
	with st.spinner("Searching for relevant information..."):
	article_url, extracted_text = find_relevant_article(doc_url, user_question)
	if article_url:
	answer = generate_answer(user_question, extracted_text)

	st.success("✅ Answer Found!")
	st.write(answer)
	st.write(f"[🔗 Read Full Article]({article_url})")
	else:
	st.error("⚠️ No relevant articles found.")

	# Step 3 & 4: Crawling and Finding the Most Relevant Article
	def find_relevant_article(base_url, question):
	"""Crawls the top-domain docs, finds the most relevant article, and extracts text."""
	response = requests.get(base_url)
	if response.status_code != 200:
	return None, None

	soup = BeautifulSoup(response.text, "html.parser")
	links = [a['href'] for a in soup.find_all('a', href=True) if base_url in a['href']]

	best_match = None
	best_text = ""

	for link in links[:10]: # Limit to first 10 links for now
	page_text = trafilatura.extract(requests.get(link).text)
	if page_text and question.lower() in page_text.lower():
	best_match = link
	best_text = page_text
	break # Stop at first good match

	return best_match, best_text

	# Step 5: Generate Answer using `smolagents`
	def generate_answer(question, context):
	"""Defines an AI agent to generate answers based on documentation context."""

	def answer_logic(state):
	"""Agent logic to answer based on context."""
	return f"Based on the documentation, here is my answer: {state['context'][:500]}..." # Truncating for brevity

	# Create the agent
	agent = create_agent(
	name="QA_Agent",
	description="Answers questions based on documentation content.",
	process=answer_logic,
	)

	# Run the agent
	response = agent({"context": context, "question": question})
	return response

	if __name__ == "__main__":
	main()