Spaces:

BugZoid
/

text-humanizer

Running

File size: 4,066 Bytes

d665c22
bf2a95e
d665c22
bf2a95e
 
 
 
 
fcb0322
bf2a95e
 
 
987baef
21bb05d
bf2a95e
21bb05d
 
 
bf2a95e
21bb05d
 
bf2a95e
 
21bb05d
bf2a95e
 
 
 
 
 
 
21bb05d
 
bf2a95e
 
21bb05d
bf2a95e
223938e
bf2a95e
 
 
ee25ef1
bf2a95e
 
fcb0322
bf2a95e
fcb0322
21bb05d
bf2a95e
 
 
21bb05d
bf2a95e
 
fcb0322
bf2a95e
 
 
 
 
fcb0322
bf2a95e
ee25ef1
 
bf2a95e
 
 
fcb0322
bf2a95e
987baef
fcb0322
 
987baef
8bf558e
fcb0322
8bf558e
ee25ef1
 
fcb0322
 
 
 
 
 
8bf558e
fcb0322
 
bf2a95e
fcb0322
 
bf2a95e
987baef
fcb0322
 
bf2a95e
fcb0322
 
 
bf2a95e
 
 
 
fcb0322
bf2a95e
 
 
 
 
 
fcb0322
 
bf2a95e
fcb0322

import streamlit as st
from transformers import T5ForConditionalGeneration, T5Tokenizer

# Initialize session state for model if not already done
if 'model_loaded' not in st.session_state:
    st.session_state.tokenizer = T5Tokenizer.from_pretrained("t5-base")
    st.session_state.model = T5ForConditionalGeneration.from_pretrained("t5-base")
    st.session_state.model_loaded = True
    
    def clean_generated_text(text):
    """Remove comandos e limpa o texto gerado"""
    text = text.strip()

    # Lista de prefixos de comando para remover
    prefixes = [
        "reescreva o seguinte texto",
        "reescreva este texto",
        "reescreva o texto",
        "traduza",
        "humanize:",
        "humanizar:",
        "em português",
        "de forma mais natural"
    ]

# Remove os prefixos de comando
    text_lower = text.lower()
    for prefix in prefixes:
        if text_lower.startswith(prefix):
            text = text[len(prefix):].strip()
            text_lower = text.lower()
    
    # Capitaliza a primeira letra
    if text:
        text = text[0].upper() + text[1:]
    
    return text

    def humanize_text(text):
    """Humaniza o texto mantendo coerência e tamanho"""
    prompt = f"reescreva em português natural, mantendo todas as informações: {text}"
    
    input_ids = st.session_state.tokenizer(
        prompt,
        return_tensors="pt",
        max_length=512,
        truncation=True
    ).input_ids

    # Parâmetros ajustados para melhor coerência
    outputs = st.session_state.model.generate(
        input_ids,
        max_length=1024,  # 512
        min_length=len(text.split()), # min_length=min_length,
        do_sample=True,
        temperature=0.3,      # Reduzido para maior coerência
        top_p=0.95,          # Ajustado para melhor seleção de palavras
        num_beams=3,         # Reduzido para maior velocidade
        repetition_penalty=1.2,
        length_penalty=2.0    # Mantém incentivo para textos mais longos
    )
    result = st.session_state.tokenizer.decode(outputs[0], skip_special_tokens=True)
    result = clean_generated_text(result)
    
    # Garante tamanho mínimo
    while len(result.split()) < len(text.split()):
        result += " " + " ".join(text.split()[-(len(text.split()) - len(result.split())):])
    
    return result

# UI Components
st.set_page_config(page_title="Advanced Text Humanizer", page_icon="🤖")

st.title("🤖 → 🧑 Humanizador de Texto Avançado")
st.markdown("""
Este aplicativo transforma textos robotizados em linguagem mais natural e humana, 
mantendo todas as informações originais e garantindo que o texto final seja pelo menos 
do mesmo tamanho que o original.
""")

# Input area with expanded capabilities
input_text = st.text_area(
    "Cole seu texto de robô aqui:",
    height=150,
    help="Cole seu texto aqui para transformá-lo em uma versão mais natural e humana."
)

# Process button
if st.button("Humanizar", type="primary"):
    if not input_text:
        st.warning("⚠️ Por favor, cole um texto primeiro!")
    else:
        with st.spinner("Processando o texto..."):
            try:
                final_text = humanize_text(input_text)
                
                # Display results
                st.success("✨ Texto humanizado:")
                col1, col2 = st.columns(2)
                
                with col1:
                    st.text("Original:")
                    st.info(input_text)
                    st.write(f"Palavras: {len(input_text.split())}")
                
                with col2:
                    st.text("Resultado:")
                    st.info(final_text)
                    st.write(f"Palavras: {len(final_text.split())}")
                
            except Exception as e:
                st.error(f"❌ Erro no processamento: {str(e)}")
# Footer
st.markdown("---")
st.markdown(
    """
    <div style='text-align: center'>
        <small>Desenvolvido com ❤️ usando Streamlit e Transformers</small>
    </div>
    """,
    unsafe_allow_html=True
)