import gradio as gr import re import torch import asyncio from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline # Load T5 paraphrase model model_name = "Vamsi/T5_Paraphrase_Paws" tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModelForSeq2SeqLM.from_pretrained(model_name, torch_dtype=torch.float16) # Move model to CPU model.to("cpu") # Initialize paraphrase pipeline paraphrase_pipeline = pipeline( "text2text-generation", model=model, tokenizer=tokenizer, truncation=True ) def split_sentences(text): """Split text into sentences using regex (faster than nltk).""" return re.split(r'(?<=[.!?])\s+', text.strip()) async def paraphrase_text(text): """Paraphrases input text asynchronously while maintaining sentence structure.""" if not text.strip(): return "⚠️ Please enter some text to paraphrase." sentences = split_sentences(text) # Ensure batch processing formatted_input = [f"paraphrase: {sentence} " for sentence in sentences if sentence] # Run in async mode to handle multiple requests in parallel paraphrased_results = await asyncio.to_thread( paraphrase_pipeline, formatted_input, max_length=80, do_sample=True, temperature=0.7, top_p=0.85, top_k=50, repetition_penalty=1.2, num_return_sequences=1, batch_size=8 # Batch processing enabled ✅ ) # Extract and join paraphrased sentences paraphrased_sentences = [result["generated_text"] for result in paraphrased_results] return " ".join(paraphrased_sentences) # Define Gradio Interface with Non-Blocking Requests with gr.Blocks() as demo: gr.Markdown("# 🚀 Fast & Parallel T5 Paraphraser") input_box = gr.Textbox(label="Enter text", placeholder="Type your text to paraphrase...", lines=10) output_box = gr.Textbox(label="Paraphrased Text", lines=10) button = gr.Button("Paraphrase") button.click(paraphrase_text, inputs=input_box, outputs=output_box) # ✅ Fix queue issue & enable concurrent users demo.queue(concurrency_count=10).launch(share=True) # Allows 10 users to paraphrase simultaneously