Spaces:

BugZoid
/

text-humanizer

Running

App Files Files Community

text-humanizer / app.py

BugZoid

Update app.py

bf2a95e verified 8 months ago

raw

history blame

4.07 kB

	import streamlit as st
	from transformers import T5ForConditionalGeneration, T5Tokenizer

	# Initialize session state for model if not already done
	if 'model_loaded' not in st.session_state:
	st.session_state.tokenizer = T5Tokenizer.from_pretrained("t5-base")
	st.session_state.model = T5ForConditionalGeneration.from_pretrained("t5-base")
	st.session_state.model_loaded = True

	def clean_generated_text(text):
	"""Remove comandos e limpa o texto gerado"""
	text = text.strip()

	# Lista de prefixos de comando para remover
	prefixes = [
	"reescreva o seguinte texto",
	"reescreva este texto",
	"reescreva o texto",
	"traduza",
	"humanize:",
	"humanizar:",
	"em português",
	"de forma mais natural"
	]

	# Remove os prefixos de comando
	text_lower = text.lower()
	for prefix in prefixes:
	if text_lower.startswith(prefix):
	text = text[len(prefix):].strip()
	text_lower = text.lower()

	# Capitaliza a primeira letra
	if text:
	text = text[0].upper() + text[1:]

	return text

	def humanize_text(text):
	"""Humaniza o texto mantendo coerência e tamanho"""
	prompt = f"reescreva em português natural, mantendo todas as informações: {text}"

	input_ids = st.session_state.tokenizer(
	prompt,
	return_tensors="pt",
	max_length=512,
	truncation=True
	).input_ids

	# Parâmetros ajustados para melhor coerência
	outputs = st.session_state.model.generate(
	input_ids,
	max_length=1024, # 512
	min_length=len(text.split()), # min_length=min_length,
	do_sample=True,
	temperature=0.3, # Reduzido para maior coerência
	top_p=0.95, # Ajustado para melhor seleção de palavras
	num_beams=3, # Reduzido para maior velocidade
	repetition_penalty=1.2,
	length_penalty=2.0 # Mantém incentivo para textos mais longos
	)
	result = st.session_state.tokenizer.decode(outputs[0], skip_special_tokens=True)
	result = clean_generated_text(result)

	# Garante tamanho mínimo
	while len(result.split()) < len(text.split()):
	result += " " + " ".join(text.split()[-(len(text.split()) - len(result.split())):])

	return result

	# UI Components
	st.set_page_config(page_title="Advanced Text Humanizer", page_icon="🤖")

	st.title("🤖 → 🧑 Humanizador de Texto Avançado")
	st.markdown("""
	Este aplicativo transforma textos robotizados em linguagem mais natural e humana,
	mantendo todas as informações originais e garantindo que o texto final seja pelo menos
	do mesmo tamanho que o original.
	""")

	# Input area with expanded capabilities
	input_text = st.text_area(
	"Cole seu texto de robô aqui:",
	height=150,
	help="Cole seu texto aqui para transformá-lo em uma versão mais natural e humana."
	)

	# Process button
	if st.button("Humanizar", type="primary"):
	if not input_text:
	st.warning("⚠️ Por favor, cole um texto primeiro!")
	else:
	with st.spinner("Processando o texto..."):
	try:
	final_text = humanize_text(input_text)

	# Display results
	st.success("✨ Texto humanizado:")
	col1, col2 = st.columns(2)

	with col1:
	st.text("Original:")
	st.info(input_text)
	st.write(f"Palavras: {len(input_text.split())}")

	with col2:
	st.text("Resultado:")
	st.info(final_text)
	st.write(f"Palavras: {len(final_text.split())}")

	except Exception as e:
	st.error(f"❌ Erro no processamento: {str(e)}")
	# Footer
	st.markdown("---")
	st.markdown(
	"""
	<div style='text-align: center'>
	<small>Desenvolvido com ❤️ usando Streamlit e Transformers</small>
	</div>
	""",
	unsafe_allow_html=True
	)