joko333's picture
Add error handling for model loading and improve analysis feedback
1e4dfde
raw
history blame
3.28 kB
import streamlit as st
import pandas as pd
import re
from utils.prediction import predict_sentence
def split_sentences_regex(text):
# Clean the text
text = re.sub(r'[\n\r]', ' ', text) # Remove newlines
text = re.sub(r'["\']', '', text) # Remove quotes
text = re.sub(r'\s+', ' ', text) # Normalize whitespace
# More aggressive pattern that looks for sentence endings
#pattern = r'[.!?]+[\s]+|[.!?]+$'
pattern = r'[.]'
# Split and clean resulting sentences
sentences = [s.strip() for s in re.split(pattern, text) if s]
# Filter out empty strings but keep sentences that don't start with capitals
return [s for s in sentences if len(s) > 0]
def split_sentences_with_abbrev(text):
# Common abbreviations to ignore
abbreviations = {'mr.', 'mrs.', 'dr.', 'sr.', 'jr.', 'vs.', 'e.g.', 'i.e.', 'etc.'}
# Split initially by potential sentence endings
parts = text.split('. ')
sentences = []
current = parts[0]
for part in parts[1:]:
# Check if the previous part ends with an abbreviation
ends_with_abbrev = any(current.lower().endswith(abbr) for abbr in abbreviations)
if ends_with_abbrev:
current = current + '. ' + part
else:
sentences.append(current)
current = part
sentences.append(current)
return sentences
def show_analysis():
st.title("Text Analysis")
# Check model loading state
if not all(key in st.session_state for key in ['model', 'label_encoder', 'tokenizer']):
st.warning("Model components not found in session state")
st.info("Please go to the Home page first to load the model")
return
if any(st.session_state[key] is None for key in ['model', 'label_encoder', 'tokenizer']):
st.error("One or more model components failed to load")
return
# Get model components
model = st.session_state.model
label_encoder = st.session_state.label_encoder
tokenizer = st.session_state.tokenizer
# Text input section
st.header("Analyze Your Text")
user_text = st.text_area("Enter your text here (multiple sentences allowed):", height=150)
if st.button("Analyze"):
if not user_text:
st.warning("Please enter some text to analyze")
return
with st.spinner("Analyzing text..."):
try:
# Split text into sentences
sentences = split_sentences_regex(user_text)
results = []
# Process each sentence
for sentence in sentences:
label, confidence = predict_sentence(model, sentence, tokenizer, label_encoder)
results.append({
"Sentence": sentence,
"Label": label,
"Confidence": f"{confidence:.2%}"
})
# Display results
df = pd.DataFrame(results)
st.dataframe(df)
except Exception as e:
st.error(f"Analysis failed: {str(e)}")
if __name__ == "__main__":
show_analysis()