import streamlit as st import pandas as pd import re from utils.prediction import predict_sentence def split_sentences_regex(text): # Clean the text text = re.sub(r'[\n\r]', ' ', text) # Remove newlines text = re.sub(r'["\']', '', text) # Remove quotes text = re.sub(r'\s+', ' ', text) # Normalize whitespace # More aggressive pattern that looks for sentence endings #pattern = r'[.!?]+[\s]+|[.!?]+$' pattern = r'[.]' # Split and clean resulting sentences sentences = [s.strip() for s in re.split(pattern, text) if s] # Filter out empty strings but keep sentences that don't start with capitals return [s for s in sentences if len(s) > 0] def split_sentences_with_abbrev(text): # Common abbreviations to ignore abbreviations = {'mr.', 'mrs.', 'dr.', 'sr.', 'jr.', 'vs.', 'e.g.', 'i.e.', 'etc.'} # Split initially by potential sentence endings parts = text.split('. ') sentences = [] current = parts[0] for part in parts[1:]: # Check if the previous part ends with an abbreviation ends_with_abbrev = any(current.lower().endswith(abbr) for abbr in abbreviations) if ends_with_abbrev: current = current + '. ' + part else: sentences.append(current) current = part sentences.append(current) return sentences def show_analysis(): st.title("Text Analysis") # Check model loading state if not all(key in st.session_state for key in ['model', 'label_encoder', 'tokenizer']): st.warning("Model components not found in session state") st.info("Please go to the Home page first to load the model") return if any(st.session_state[key] is None for key in ['model', 'label_encoder', 'tokenizer']): st.error("One or more model components failed to load") return # Get model components model = st.session_state.model label_encoder = st.session_state.label_encoder tokenizer = st.session_state.tokenizer # Text input section st.header("Analyze Your Text") user_text = st.text_area("Enter your text here (multiple sentences allowed):", height=150) if st.button("Analyze"): if not user_text: st.warning("Please enter some text to analyze") return with st.spinner("Analyzing text..."): try: # Split text into sentences sentences = split_sentences_regex(user_text) results = [] # Process each sentence for sentence in sentences: label, confidence = predict_sentence(model, sentence, tokenizer, label_encoder) results.append({ "Sentence": sentence, "Label": label, "Confidence": f"{confidence:.2%}" }) # Display results df = pd.DataFrame(results) st.dataframe(df) except Exception as e: st.error(f"Analysis failed: {str(e)}") if __name__ == "__main__": show_analysis()