DocsSearch / app.py
ZennyKenny's picture
Update app.py
0bb000c verified
import streamlit as st
import requests
from bs4 import BeautifulSoup
import trafilatura
from smolagents import create_agent
# Streamlit UI
def main():
st.set_page_config(page_title="AI Documentation Assistant", layout="wide")
st.title("πŸ“– AI Documentation Assistant")
st.write("Enter the top-level URL of your documentation, and I'll find the most relevant article to answer your question.")
# User input
doc_url = st.text_input("πŸ”— Documentation URL (Homepage)", "https://example.com/docs")
user_question = st.text_area("❓ Your Question", "How do I reset my password?")
if st.button("πŸ” Find Answer"):
with st.spinner("Searching for relevant information..."):
article_url, extracted_text = find_relevant_article(doc_url, user_question)
if article_url:
answer = generate_answer(user_question, extracted_text)
st.success("βœ… Answer Found!")
st.write(answer)
st.write(f"[πŸ”— Read Full Article]({article_url})")
else:
st.error("⚠️ No relevant articles found.")
# Step 3 & 4: Crawling and Finding the Most Relevant Article
def find_relevant_article(base_url, question):
"""Crawls the top-domain docs, finds the most relevant article, and extracts text."""
response = requests.get(base_url)
if response.status_code != 200:
return None, None
soup = BeautifulSoup(response.text, "html.parser")
links = [a['href'] for a in soup.find_all('a', href=True) if base_url in a['href']]
best_match = None
best_text = ""
for link in links[:10]: # Limit to first 10 links for now
page_text = trafilatura.extract(requests.get(link).text)
if page_text and question.lower() in page_text.lower():
best_match = link
best_text = page_text
break # Stop at first good match
return best_match, best_text
# Step 5: Generate Answer using `smolagents`
def generate_answer(question, context):
"""Defines an AI agent to generate answers based on documentation context."""
def answer_logic(state):
"""Agent logic to answer based on context."""
return f"Based on the documentation, here is my answer: {state['context'][:500]}..." # Truncating for brevity
# Create the agent
agent = create_agent(
name="QA_Agent",
description="Answers questions based on documentation content.",
process=answer_logic,
)
# Run the agent
response = agent({"context": context, "question": question})
return response
if __name__ == "__main__":
main()