import streamlit as st import requests from bs4 import BeautifulSoup def main(): st.title("Website Content Extractor") # Get website URL from user input url = st.text_input("Enter a URL:", "") if st.button("Proceed"): if not url: st.warning("URL is empty.") else: extract_text(url) def extract_text(url): try: # Fetch and extract website content with st.spinner("Loading website data..."): html_content = get_website_text(url) st.subheader("Website Content:") if html_content: st.write(html_content) else: st.error("Error: Could not extract content.") except Exception as e: st.error(f"Error: {e}") def get_website_text(url): try: # Send GET request to the URL response = requests.get(url) response.raise_for_status() # Will raise an exception for bad responses (4xx, 5xx) # Parse the HTML content with BeautifulSoup soup = BeautifulSoup(response.text, 'html.parser') # Extract and clean text from the website texts = soup.stripped_strings # Extracts all text and removes extra spaces/newlines return '\n'.join(texts) # Join all text pieces into a single string except requests.exceptions.RequestException as e: st.error(f"Error fetching URL: {e}") return None if __name__ == "__main__": main()