Spaces:
Running
Running
import torch | |
import gradio as gr | |
import pandas as pd | |
from datasets import Dataset | |
from transformers import ( | |
AutoTokenizer, | |
AutoModelForSequenceClassification, | |
TrainingArguments, | |
Trainer | |
) | |
# load dataset | |
df = pd.read_csv("dataset.csv") | |
dataset = Dataset.from_pandas(df) | |
# load tokenizer & model | |
model_name = "MoritzLaurer/mDeBERTa-v3-base-mnli-xnli" | |
tokenizer = AutoTokenizer.from_pretrained(model_name) | |
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2, ignore_mismatched_sizes=True) | |
# tokenize data | |
def preprocess(examples): | |
return tokenizer(examples["text"], truncation=True, padding=True) | |
tokenized_dataset = dataset.map(preprocess, batched=True) | |
# training arguments | |
training_args = TrainingArguments( | |
output_dir="./results", | |
per_device_train_batch_size=4, | |
num_train_epochs=3, | |
logging_steps=10, | |
save_strategy="no", | |
learning_rate=2e-5, | |
) | |
# train | |
trainer = Trainer( | |
model=model, | |
args=training_args, | |
train_dataset=tokenized_dataset, | |
tokenizer=tokenizer, | |
) | |
trainer.train() | |
# inference function for gradio | |
def classify(text): | |
inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True) | |
with torch.no_grad(): | |
outputs = model(**inputs) | |
probs = torch.softmax(outputs.logits, dim=1).numpy()[0] | |
return { | |
"ไม่เกี่ยวข้อง": float(probs[0]), | |
"จ้างงานรถขนส่ง": float(probs[1]), | |
} | |
# gradio interface | |
demo = gr.Interface( | |
fn=classify, | |
inputs=gr.Textbox(lines=3, label="ข้อความ"), | |
outputs=gr.Label(label="ผลการจำแนก"), | |
title="Text Classifier: Zero-Shot NLI", | |
description="กรุณาพิมพ์ข้อความเพื่อตรวจสอบว่าเป็นการว่าจ้างงานรถขนส่งหรือไม่" | |
) | |
if __name__ == "__main__": | |
demo.launch() |