|
import streamlit as st |
|
import requests |
|
from bs4 import BeautifulSoup |
|
|
|
def main(): |
|
st.title("Website Content Extractor") |
|
|
|
|
|
url = st.text_input("Enter a URL:", "") |
|
|
|
if st.button("Proceed"): |
|
if not url: |
|
st.warning("URL is empty.") |
|
else: |
|
extract_text(url) |
|
|
|
|
|
def extract_text(url): |
|
try: |
|
|
|
with st.spinner("Loading website data..."): |
|
html_content = get_website_text(url) |
|
|
|
st.subheader("Website Content:") |
|
if html_content: |
|
st.write(html_content) |
|
else: |
|
st.error("Error: Could not extract content.") |
|
|
|
except Exception as e: |
|
st.error(f"Error: {e}") |
|
|
|
|
|
def get_website_text(url): |
|
try: |
|
|
|
response = requests.get(url) |
|
response.raise_for_status() |
|
|
|
|
|
soup = BeautifulSoup(response.text, 'html.parser') |
|
|
|
|
|
texts = soup.stripped_strings |
|
return '\n'.join(texts) |
|
|
|
except requests.exceptions.RequestException as e: |
|
st.error(f"Error fetching URL: {e}") |
|
return None |
|
|
|
|
|
if __name__ == "__main__": |
|
main() |