File size: 4,057 Bytes
d665c22
fcb0322
 
b564db7
fcb0322
 
 
d665c22
fcb0322
 
 
 
 
 
 
 
b564db7
fcb0322
 
987baef
fcb0322
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
987baef
fcb0322
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
987baef
fcb0322
 
987baef
fcb0322
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
987baef
fcb0322
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
import streamlit as st
from transformers import (
    AutoTokenizer, 
    AutoModelForSeq2SeqLM,  # Correção aqui
    T5ForConditionalGeneration, 
    T5Tokenizer
)

# Initialize session state for models if not already done
if 'models_loaded' not in st.session_state:
    # Load the main T5 model and tokenizer (using t5-base for better quality)
    st.session_state.t5_tokenizer = T5Tokenizer.from_pretrained("t5-base")
    st.session_state.t5_model = T5ForConditionalGeneration.from_pretrained("t5-base")
    
    # Load the paraphrasing model and tokenizer
    st.session_state.paraphrase_tokenizer = AutoTokenizer.from_pretrained("facebook/bart-large-cnn")
    st.session_state.paraphrase_model = AutoModelForSeq2SeqLM.from_pretrained("facebook/bart-large-cnn")
    
    st.session_state.models_loaded = True

def paraphrase_text(text):
    """
    Apply paraphrasing to the input text using BART model
    """
    inputs = st.session_state.paraphrase_tokenizer.encode(
        text,
        return_tensors="pt",
        max_length=512,
        truncation=True
    )
    
    outputs = st.session_state.paraphrase_model.generate(
        inputs,
        max_length=512,
        do_sample=True,
        temperature=0.7,
        top_p=0.9
    )
    
    return st.session_state.paraphrase_tokenizer.decode(outputs[0], skip_special_tokens=True)

def humanize_text(text):
    """
    Humanize the input text using T5 model
    """
    input_ids = st.session_state.t5_tokenizer(
        f"humanize: {text}",
        return_tensors="pt",
        max_length=512,
        truncation=True
    ).input_ids
    
    outputs = st.session_state.t5_model.generate(
        input_ids,
        max_length=len(text) + 100,  # Dynamic length based on input
        do_sample=True,
        temperature=0.7,  # Increased creativity
        top_p=0.9,       # Nucleus sampling
        num_beams=4,     # Beam search for better quality
        no_repeat_ngram_size=2  # Avoid repetition
    )
    
    return st.session_state.t5_tokenizer.decode(outputs[0], skip_special_tokens=True)

# UI Components
st.set_page_config(page_title="Advanced Text Humanizer", page_icon="🤖")

st.title("🤖 → 🧑 Advanced Text Humanizer")
st.markdown("""
This app transforms robotic text into more natural, human-like language using 
advanced AI models. It combines T5 and BART models for better results.
""")

# Input area with expanded capabilities
input_text = st.text_area(
    "Cole seu texto de robô aqui:",
    height=150,
    help="Paste your text here to transform it into a more natural, human-like version."
)

# Advanced settings in sidebar
with st.sidebar:
    st.header("Advanced Settings")
    use_paraphrase = st.checkbox("Enable Paraphrasing", value=True)
    show_original = st.checkbox("Show Original Text", value=False)

# Process button with error handling
if st.button("Humanizar", type="primary"):
    if not input_text:
        st.warning("⚠️ Por favor, cole um texto de robô primeiro!")
    else:
        with st.spinner("Processando o texto..."):
            try:
                # First humanization pass
                humanized_text = humanize_text(input_text)
                
                # Optional paraphrasing pass
                if use_paraphrase:
                    final_text = paraphrase_text(humanized_text)
                else:
                    final_text = humanized_text
                
                # Display results
                st.success("✨ Texto humanizado:")
                if show_original:
                    st.text("Texto original:")
                    st.info(input_text)
                st.markdown("**Resultado:**")
                st.write(final_text)
                
            except Exception as e:
                st.error(f"❌ Ocorreu um erro durante o processamento: {str(e)}")

# Footer
st.markdown("---")
st.markdown(
    """
    <div style='text-align: center'>
        <small>Desenvolvido com ❤️ usando Streamlit e Transformers</small>
    </div>
    """,
    unsafe_allow_html=True
)