Spaces:
Running
Running
File size: 4,057 Bytes
d665c22 fcb0322 b564db7 fcb0322 d665c22 fcb0322 b564db7 fcb0322 987baef fcb0322 987baef fcb0322 987baef fcb0322 987baef fcb0322 987baef fcb0322 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 |
import streamlit as st
from transformers import (
AutoTokenizer,
AutoModelForSeq2SeqLM, # Correção aqui
T5ForConditionalGeneration,
T5Tokenizer
)
# Initialize session state for models if not already done
if 'models_loaded' not in st.session_state:
# Load the main T5 model and tokenizer (using t5-base for better quality)
st.session_state.t5_tokenizer = T5Tokenizer.from_pretrained("t5-base")
st.session_state.t5_model = T5ForConditionalGeneration.from_pretrained("t5-base")
# Load the paraphrasing model and tokenizer
st.session_state.paraphrase_tokenizer = AutoTokenizer.from_pretrained("facebook/bart-large-cnn")
st.session_state.paraphrase_model = AutoModelForSeq2SeqLM.from_pretrained("facebook/bart-large-cnn")
st.session_state.models_loaded = True
def paraphrase_text(text):
"""
Apply paraphrasing to the input text using BART model
"""
inputs = st.session_state.paraphrase_tokenizer.encode(
text,
return_tensors="pt",
max_length=512,
truncation=True
)
outputs = st.session_state.paraphrase_model.generate(
inputs,
max_length=512,
do_sample=True,
temperature=0.7,
top_p=0.9
)
return st.session_state.paraphrase_tokenizer.decode(outputs[0], skip_special_tokens=True)
def humanize_text(text):
"""
Humanize the input text using T5 model
"""
input_ids = st.session_state.t5_tokenizer(
f"humanize: {text}",
return_tensors="pt",
max_length=512,
truncation=True
).input_ids
outputs = st.session_state.t5_model.generate(
input_ids,
max_length=len(text) + 100, # Dynamic length based on input
do_sample=True,
temperature=0.7, # Increased creativity
top_p=0.9, # Nucleus sampling
num_beams=4, # Beam search for better quality
no_repeat_ngram_size=2 # Avoid repetition
)
return st.session_state.t5_tokenizer.decode(outputs[0], skip_special_tokens=True)
# UI Components
st.set_page_config(page_title="Advanced Text Humanizer", page_icon="🤖")
st.title("🤖 → 🧑 Advanced Text Humanizer")
st.markdown("""
This app transforms robotic text into more natural, human-like language using
advanced AI models. It combines T5 and BART models for better results.
""")
# Input area with expanded capabilities
input_text = st.text_area(
"Cole seu texto de robô aqui:",
height=150,
help="Paste your text here to transform it into a more natural, human-like version."
)
# Advanced settings in sidebar
with st.sidebar:
st.header("Advanced Settings")
use_paraphrase = st.checkbox("Enable Paraphrasing", value=True)
show_original = st.checkbox("Show Original Text", value=False)
# Process button with error handling
if st.button("Humanizar", type="primary"):
if not input_text:
st.warning("⚠️ Por favor, cole um texto de robô primeiro!")
else:
with st.spinner("Processando o texto..."):
try:
# First humanization pass
humanized_text = humanize_text(input_text)
# Optional paraphrasing pass
if use_paraphrase:
final_text = paraphrase_text(humanized_text)
else:
final_text = humanized_text
# Display results
st.success("✨ Texto humanizado:")
if show_original:
st.text("Texto original:")
st.info(input_text)
st.markdown("**Resultado:**")
st.write(final_text)
except Exception as e:
st.error(f"❌ Ocorreu um erro durante o processamento: {str(e)}")
# Footer
st.markdown("---")
st.markdown(
"""
<div style='text-align: center'>
<small>Desenvolvido com ❤️ usando Streamlit e Transformers</small>
</div>
""",
unsafe_allow_html=True
) |