Spaces:

BugZoid
/

text-humanizer

Running

File size: 4,057 Bytes

import streamlit as st
from transformers import (
    AutoTokenizer, 
    AutoModelForSeq2SeqLM,  # Correção aqui
    T5ForConditionalGeneration, 
    T5Tokenizer
)

# Initialize session state for models if not already done
if 'models_loaded' not in st.session_state:
    # Load the main T5 model and tokenizer (using t5-base for better quality)
    st.session_state.t5_tokenizer = T5Tokenizer.from_pretrained("t5-base")
    st.session_state.t5_model = T5ForConditionalGeneration.from_pretrained("t5-base")
    
    # Load the paraphrasing model and tokenizer
    st.session_state.paraphrase_tokenizer = AutoTokenizer.from_pretrained("facebook/bart-large-cnn")
    st.session_state.paraphrase_model = AutoModelForSeq2SeqLM.from_pretrained("facebook/bart-large-cnn")
    
    st.session_state.models_loaded = True

def paraphrase_text(text):
    """
    Apply paraphrasing to the input text using BART model
    """
    inputs = st.session_state.paraphrase_tokenizer.encode(
        text,
        return_tensors="pt",
        max_length=512,
        truncation=True
    )
    
    outputs = st.session_state.paraphrase_model.generate(
        inputs,
        max_length=512,
        do_sample=True,
        temperature=0.7,
        top_p=0.9
    )
    
    return st.session_state.paraphrase_tokenizer.decode(outputs[0], skip_special_tokens=True)

def humanize_text(text):
    """
    Humanize the input text using T5 model
    """
    input_ids = st.session_state.t5_tokenizer(
        f"humanize: {text}",
        return_tensors="pt",
        max_length=512,
        truncation=True
    ).input_ids
    
    outputs = st.session_state.t5_model.generate(
        input_ids,
        max_length=len(text) + 100,  # Dynamic length based on input
        do_sample=True,
        temperature=0.7,  # Increased creativity
        top_p=0.9,       # Nucleus sampling
        num_beams=4,     # Beam search for better quality
        no_repeat_ngram_size=2  # Avoid repetition
    )
    
    return st.session_state.t5_tokenizer.decode(outputs[0], skip_special_tokens=True)

# UI Components
st.set_page_config(page_title="Advanced Text Humanizer", page_icon="🤖")

st.title("🤖 → 🧑 Advanced Text Humanizer")
st.markdown("""
This app transforms robotic text into more natural, human-like language using 
advanced AI models. It combines T5 and BART models for better results.
""")

# Input area with expanded capabilities
input_text = st.text_area(
    "Cole seu texto de robô aqui:",
    height=150,
    help="Paste your text here to transform it into a more natural, human-like version."
)

# Advanced settings in sidebar
with st.sidebar:
    st.header("Advanced Settings")
    use_paraphrase = st.checkbox("Enable Paraphrasing", value=True)
    show_original = st.checkbox("Show Original Text", value=False)

# Process button with error handling
if st.button("Humanizar", type="primary"):
    if not input_text:
        st.warning("⚠️ Por favor, cole um texto de robô primeiro!")
    else:
        with st.spinner("Processando o texto..."):
            try:
                # First humanization pass
                humanized_text = humanize_text(input_text)
                
                # Optional paraphrasing pass
                if use_paraphrase:
                    final_text = paraphrase_text(humanized_text)
                else:
                    final_text = humanized_text
                
                # Display results
                st.success("✨ Texto humanizado:")
                if show_original:
                    st.text("Texto original:")
                    st.info(input_text)
                st.markdown("**Resultado:**")
                st.write(final_text)
                
            except Exception as e:
                st.error(f"❌ Ocorreu um erro durante o processamento: {str(e)}")

# Footer
st.markdown("---")
st.markdown(
    """
    <div style='text-align: center'>
        <small>Desenvolvido com ❤️ usando Streamlit e Transformers</small>
    </div>
    """,
    unsafe_allow_html=True
)