joko333's picture
Store model components in session state and remove example analysis section
2705cb4
raw
history blame
3.03 kB
import streamlit as st
import pandas as pd
import re
from utils.prediction import predict_sentence
def split_sentences_regex(text):
# Clean the text
text = re.sub(r'[\n\r]', ' ', text) # Remove newlines
text = re.sub(r'["\']', '', text) # Remove quotes
text = re.sub(r'\s+', ' ', text) # Normalize whitespace
# More aggressive pattern that looks for sentence endings
#pattern = r'[.!?]+[\s]+|[.!?]+$'
pattern = r'[.]'
# Split and clean resulting sentences
sentences = [s.strip() for s in re.split(pattern, text) if s]
# Filter out empty strings but keep sentences that don't start with capitals
return [s for s in sentences if len(s) > 0]
def split_sentences_with_abbrev(text):
# Common abbreviations to ignore
abbreviations = {'mr.', 'mrs.', 'dr.', 'sr.', 'jr.', 'vs.', 'e.g.', 'i.e.', 'etc.'}
# Split initially by potential sentence endings
parts = text.split('. ')
sentences = []
current = parts[0]
for part in parts[1:]:
# Check if the previous part ends with an abbreviation
ends_with_abbrev = any(current.lower().endswith(abbr) for abbr in abbreviations)
if ends_with_abbrev:
current = current + '. ' + part
else:
sentences.append(current)
current = part
sentences.append(current)
return sentences
def show_analysis():
st.title("Text Analysis")
st.write("Use this section to analyze the logical structure of your text.")
try:
if 'model' not in st.session_state:
st.error("Please initialize the model from the home page first.")
return
model = st.session_state.model
label_encoder = st.session_state.label_encoder
tokenizer = st.session_state.tokenizer
# Text input section
st.header("Analyze Your Text")
user_text = st.text_area("Enter your text here (multiple sentences allowed):", height=150)
if st.button("Analyze"):
if user_text:
# Split and analyze sentences
sentences = split_sentences_regex(user_text)
st.subheader("Analysis Results:")
for i, sentence in enumerate(sentences, 1):
with st.container():
label, confidence = predict_sentence(
model, sentence, tokenizer, label_encoder
)
if label not in ("Unknown", "Error"):
st.write("---")
st.write(f"**Sentence:** {sentence}")
st.write(f"**Predicted:** {label}")
st.progress(confidence)
else:
st.warning("Please enter some text to analyze.")
except Exception as e:
st.error(f"Error: {str(e)}")
if __name__ == "__main__":
show_analysis()