File size: 1,523 Bytes
b6e91ad 9f38b3a b6e91ad 9f38b3a b6e91ad 9f38b3a b6e91ad 9f38b3a b6e91ad 9f38b3a b6e91ad 9f38b3a 984bb90 9f38b3a b6e91ad 9f38b3a b6e91ad 9f38b3a b6e91ad 9f38b3a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 |
import streamlit as st
import requests
from bs4 import BeautifulSoup
def main():
st.title("Website Content Extractor")
# Get website URL from user input
url = st.text_input("Enter a URL:", "")
if st.button("Proceed"):
if not url:
st.warning("URL is empty.")
else:
extract_text(url)
def extract_text(url):
try:
# Fetch and extract website content
with st.spinner("Loading website data..."):
html_content = get_website_text(url)
st.subheader("Website Content:")
if html_content:
st.write(html_content)
else:
st.error("Error: Could not extract content.")
except Exception as e:
st.error(f"Error: {e}")
def get_website_text(url):
try:
# Send GET request to the URL
response = requests.get(url)
response.raise_for_status() # Will raise an exception for bad responses (4xx, 5xx)
# Parse the HTML content with BeautifulSoup
soup = BeautifulSoup(response.text, 'html.parser')
# Extract and clean text from the website
texts = soup.stripped_strings # Extracts all text and removes extra spaces/newlines
return '\n'.join(texts) # Join all text pieces into a single string
except requests.exceptions.RequestException as e:
st.error(f"Error fetching URL: {e}")
return None
if __name__ == "__main__":
main() |