selenium_web_scrape

Running

selenium_web_scrape / app.py

Update app.py

9f38b3a verified 5 months ago

1.52 kB

	import streamlit as st
	import requests
	from bs4 import BeautifulSoup

	def main():
	st.title("Website Content Extractor")

	# Get website URL from user input
	url = st.text_input("Enter a URL:", "")

	if st.button("Proceed"):
	if not url:
	st.warning("URL is empty.")
	else:
	extract_text(url)


	def extract_text(url):
	try:
	# Fetch and extract website content
	with st.spinner("Loading website data..."):
	html_content = get_website_text(url)

	st.subheader("Website Content:")
	if html_content:
	st.write(html_content)
	else:
	st.error("Error: Could not extract content.")

	except Exception as e:
	st.error(f"Error: {e}")


	def get_website_text(url):
	try:
	# Send GET request to the URL
	response = requests.get(url)
	response.raise_for_status() # Will raise an exception for bad responses (4xx, 5xx)

	# Parse the HTML content with BeautifulSoup
	soup = BeautifulSoup(response.text, 'html.parser')

	# Extract and clean text from the website
	texts = soup.stripped_strings # Extracts all text and removes extra spaces/newlines
	return '\n'.join(texts) # Join all text pieces into a single string

	except requests.exceptions.RequestException as e:
	st.error(f"Error fetching URL: {e}")
	return None


	if __name__ == "__main__":
	main()