Spaces:

joko333
/

logical_structure_analysis

Sleeping

App Files Files Community

logical_structure_analysis / pages /Analysis.py

joko333

Implement sentence analysis functionality in Analysis page; add BiLSTM model and prediction utilities

ca5c473 9 months ago

raw

history blame

3.99 kB

	import streamlit as st
	import pandas as pd
	import re
	from utils.prediction import predict_sentence

	def split_sentences_regex(text):
	# Clean the text
	text = re.sub(r'[\n\r]', ' ', text) # Remove newlines
	text = re.sub(r'["\']', '', text) # Remove quotes
	text = re.sub(r'\s+', ' ', text) # Normalize whitespace

	# More aggressive pattern that looks for sentence endings
	#pattern = r'[.!?]+[\s]+\|[.!?]+$'
	pattern = r'[.]'
	# Split and clean resulting sentences
	sentences = [s.strip() for s in re.split(pattern, text) if s]

	# Filter out empty strings but keep sentences that don't start with capitals
	return [s for s in sentences if len(s) > 0]

	def split_sentences_with_abbrev(text):
	# Common abbreviations to ignore
	abbreviations = {'mr.', 'mrs.', 'dr.', 'sr.', 'jr.', 'vs.', 'e.g.', 'i.e.', 'etc.'}

	# Split initially by potential sentence endings
	parts = text.split('. ')
	sentences = []
	current = parts[0]

	for part in parts[1:]:
	# Check if the previous part ends with an abbreviation
	ends_with_abbrev = any(current.lower().endswith(abbr) for abbr in abbreviations)

	if ends_with_abbrev:
	current = current + '. ' + part
	else:
	sentences.append(current)
	current = part

	sentences.append(current)
	return sentences

	def show_analysis():
	st.title("Text Analysis")
	st.write("Use this section to analyze the logical structure of your text.")

	try:
	if 'model' not in st.session_state:
	st.error("Please initialize the model from the home page first.")
	return

	model = st.session_state.model
	label_encoder = st.session_state.label_encoder
	tokenizer = st.session_state.tokenizer

	# Text input section
	st.header("Analyze Your Text")
	user_text = st.text_area("Enter your text here (multiple sentences allowed):", height=150)

	if st.button("Analyze"):
	if user_text:
	# Split and analyze sentences
	sentences = split_sentences_regex(user_text)

	st.subheader("Analysis Results:")
	for i, sentence in enumerate(sentences, 1):
	with st.container():
	label, confidence = predict_sentence(
	model, sentence, tokenizer, label_encoder
	)
	if label not in ("Unknown", "Error"):
	st.write("---")
	st.write(f"Sentence: {sentence}")
	st.write(f"Predicted: {label}")
	st.progress(confidence)
	else:
	st.warning("Please enter some text to analyze.")

	# Example Analysis Section
	st.header("Example Analysis")
	show_examples = st.checkbox("Show example analysis", key='show_examples')

	if show_examples:
	try:
	df = pd.read_csv('data/raw/history_01.csv')
	for sentence in df['Sentence'].head(5): # Limit to 5 examples
	with st.container():
	label, confidence = predict_sentence(
	model, sentence, tokenizer, label_encoder
	)
	if label not in ("Unknown", "Error"):
	st.write("---")
	st.write(f"Sentence: {sentence}")
	st.write(f"Predicted: {label}")
	st.progress(confidence)
	except FileNotFoundError:
	st.warning("Example file not found. Please check the data path.")

	except Exception as e:
	st.error(f"Error: {str(e)}")

	if __name__ == "__main__":
	show_analysis()