Spaces:
Sleeping
Sleeping
import streamlit as st | |
import pandas as pd | |
import re | |
from utils.prediction import predict_sentence | |
def split_sentences_regex(text): | |
# Clean the text | |
text = re.sub(r'[\n\r]', ' ', text) # Remove newlines | |
text = re.sub(r'["\']', '', text) # Remove quotes | |
text = re.sub(r'\s+', ' ', text) # Normalize whitespace | |
# More aggressive pattern that looks for sentence endings | |
#pattern = r'[.!?]+[\s]+|[.!?]+$' | |
pattern = r'[.]' | |
# Split and clean resulting sentences | |
sentences = [s.strip() for s in re.split(pattern, text) if s] | |
# Filter out empty strings but keep sentences that don't start with capitals | |
return [s for s in sentences if len(s) > 0] | |
def split_sentences_with_abbrev(text): | |
# Common abbreviations to ignore | |
abbreviations = {'mr.', 'mrs.', 'dr.', 'sr.', 'jr.', 'vs.', 'e.g.', 'i.e.', 'etc.'} | |
# Split initially by potential sentence endings | |
parts = text.split('. ') | |
sentences = [] | |
current = parts[0] | |
for part in parts[1:]: | |
# Check if the previous part ends with an abbreviation | |
ends_with_abbrev = any(current.lower().endswith(abbr) for abbr in abbreviations) | |
if ends_with_abbrev: | |
current = current + '. ' + part | |
else: | |
sentences.append(current) | |
current = part | |
sentences.append(current) | |
return sentences | |
def show_analysis(): | |
st.title("Text Analysis") | |
# Check model loading state | |
if not all(key in st.session_state for key in ['model', 'label_encoder', 'tokenizer']): | |
st.warning("Model components not found in session state") | |
st.info("Please go to the Home page first to load the model") | |
return | |
if any(st.session_state[key] is None for key in ['model', 'label_encoder', 'tokenizer']): | |
st.error("One or more model components failed to load") | |
return | |
# Get model components | |
model = st.session_state.model | |
label_encoder = st.session_state.label_encoder | |
tokenizer = st.session_state.tokenizer | |
# Text input section | |
st.header("Analyze Your Text") | |
user_text = st.text_area("Enter your text here (multiple sentences allowed):", height=150) | |
if st.button("Analyze"): | |
if not user_text: | |
st.warning("Please enter some text to analyze") | |
return | |
with st.spinner("Analyzing text..."): | |
try: | |
# Split text into sentences | |
sentences = split_sentences_regex(user_text) | |
results = [] | |
# Process each sentence | |
for sentence in sentences: | |
label, confidence = predict_sentence(model, sentence, tokenizer, label_encoder) | |
results.append({ | |
"Sentence": sentence, | |
"Label": label, | |
"Confidence": f"{confidence:.2%}" | |
}) | |
# Display results | |
df = pd.DataFrame(results) | |
st.dataframe(df) | |
except Exception as e: | |
st.error(f"Analysis failed: {str(e)}") | |
if __name__ == "__main__": | |
show_analysis() |