Spaces:
Sleeping
Sleeping
File size: 3,881 Bytes
d83d604 98ac441 5813702 73fdc8f 6ed741d 5813702 861709c 6ed741d 5813702 73fdc8f 5813702 0c65a3b 98ac441 5813702 d83d604 98ac441 6ed741d 5813702 98ac441 5813702 98ac441 6ed741d 5813702 d83d604 5813702 6ed741d 5813702 6ed741d 5813702 6ed741d 5813702 6ed741d 5813702 6ed741d 5813702 f96da32 5813702 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 |
import gradio as gr
import re
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
# Model choices ordered by accuracy
model_choices = {
"Pegasus (google/pegasus-xsum)": "google/pegasus-xsum",
"BigBird-Pegasus (google/bigbird-pegasus-large-arxiv)": "google/bigbird-pegasus-large-arxiv",
"LongT5 Large (google/long-t5-tglobal-large)": "google/long-t5-tglobal-large",
"BART Large CNN (facebook/bart-large-cnn)": "facebook/bart-large-cnn",
"ProphetNet (microsoft/prophetnet-large-uncased-cnndm)": "microsoft/prophetnet-large-uncased-cnndm",
"LED (allenai/led-base-16384)": "allenai/led-base-16384",
"T5 Large (t5-large)": "t5-large",
"Flan-T5 Large (google/flan-t5-large)": "google/flan-t5-large",
"DistilBART CNN (sshleifer/distilbart-cnn-12-6)": "sshleifer/distilbart-cnn-12-6",
"DistilBART XSum (mrm8488/distilbart-xsum-12-6)": "mrm8488/distilbart-xsum-12-6",
"T5 Base (t5-base)": "t5-base",
"Flan-T5 Base (google/flan-t5-base)": "google/flan-t5-base",
"BART CNN SamSum (philschmid/bart-large-cnn-samsum)": "philschmid/bart-large-cnn-samsum",
"T5 SamSum (knkarthick/pegasus-samsum)": "knkarthick/pegasus-samsum",
"LongT5 Base (google/long-t5-tglobal-base)": "google/long-t5-tglobal-base",
"T5 Small (t5-small)": "t5-small",
"MBART (facebook/mbart-large-cc25)": "facebook/mbart-large-cc25",
"MarianMT (Helsinki-NLP/opus-mt-en-ro)": "Helsinki-NLP/opus-mt-en-ro",
"Falcon Instruct (tiiuae/falcon-7b-instruct)": "tiiuae/falcon-7b-instruct",
"BART ELI5 (yjernite/bart_eli5)": "yjernite/bart_eli5"
}
model_cache = {}
# Function to clean input text (remove special characters and extra spaces)
def clean_text(input_text):
# Replace special characters with a space
cleaned_text = re.sub(r'[^A-Za-z0-9\s]', ' ', input_text)
# Replace multiple spaces with a single space
cleaned_text = re.sub(r'\s+', ' ', cleaned_text)
# Strip leading and trailing spaces
cleaned_text = cleaned_text.strip()
return cleaned_text
# Load model and tokenizer
def load_model(model_name):
if model_name not in model_cache:
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
model_cache[model_name] = (tokenizer, model)
return model_cache[model_name]
# Summarize the text using a selected model
def summarize_text(input_text, model_label, char_limit):
if not input_text.strip():
return "Please enter some text."
# Clean the input text by removing special characters and extra spaces
input_text = clean_text(input_text)
model_name = model_choices[model_label]
tokenizer, model = load_model(model_name)
# Adjust the input format for T5 and FLAN models
if "t5" in model_name.lower() or "flan" in model_name.lower():
input_text = "summarize: " + input_text
inputs = tokenizer(input_text, return_tensors="pt", truncation=True)
summary_ids = model.generate(
inputs["input_ids"],
max_length=20, # Still approximate; can be tuned per model
min_length=5,
do_sample=False
)
summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
return summary[:char_limit] # Enforce character limit
# Gradio UI
iface = gr.Interface(
fn=summarize_text,
inputs=[
gr.Textbox(lines=6, label="Enter text to summarize"),
gr.Dropdown(choices=list(model_choices.keys()), label="Choose summarization model", value="Pegasus (google/pegasus-xsum)"),
gr.Slider(minimum=30, maximum=200, value=65, step=1, label="Max Character Limit")
],
outputs=gr.Textbox(lines=3, label="Summary (truncated to character limit)"),
title="Multi-Model Text Summarizer",
description="Summarize text using different Hugging Face models with a user-defined character limit."
)
iface.launch()
|