indoSPAMApp / app.py
hasnanhaq's picture
Update app.py
dd273fe verified
import torch
import torch.nn as nn
from transformers import AutoModel, AutoTokenizer
import gradio as gr
from sklearn.preprocessing import LabelEncoder
import pandas as pd
# ===== Load Label Encoder =====
df = pd.read_csv("Dataset_new.csv",delimiter=";")
le = LabelEncoder()
le.fit(df["label"])
# ===== Define Model Class =====
class IndoBERTClassifier(nn.Module):
def __init__(self, model_name, num_labels):
super(IndoBERTClassifier, self).__init__()
self.bert = AutoModel.from_pretrained(model_name)
self.dropout = nn.Dropout(0.3)
self.classifier = nn.Linear(self.bert.config.hidden_size, num_labels)
def forward(self, input_ids, attention_mask, token_type_ids=None):
outputs = self.bert(
input_ids=input_ids,
attention_mask=attention_mask,
token_type_ids=token_type_ids
)
pooled = outputs.last_hidden_state[:, 0]
pooled = self.dropout(pooled)
logits = self.classifier(pooled)
return logits
# ===== Load Model and Tokenizer =====
tokenizer = AutoTokenizer.from_pretrained("indobenchmark/indobert-base-p1")
model = IndoBERTClassifier("indobenchmark/indobert-base-p1", num_labels=5)
model.load_state_dict(torch.load("pytorch_model.bin", map_location=torch.device("cpu")))
model.eval()
# ===== Prediction Function =====
def predict(text):
inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True)
with torch.no_grad():
logits = model(
input_ids=inputs["input_ids"],
attention_mask=inputs["attention_mask"],
token_type_ids=inputs.get("token_type_ids")
)
pred = torch.argmax(logits, dim=1).item()
label = le.inverse_transform([pred])[0]
return f"🚨 Kategori Deteksi:\n\nπŸ“Œ {label} (Label {pred})"
# ===== Gradio UI =====
with gr.Blocks() as demo:
gr.Markdown("## πŸ€– Deteksi Spam Penipuan Berbahasa Indonesia")
gr.Markdown("Masukkan kalimat pesan yang ingin diperiksa apakah termasuk penipuan, permintaan data diri, tautan mencurigakan, atau tawaran kerja palsu.")
with gr.Row():
input_text = gr.Textbox(lines=3, placeholder="Contoh: Selamat! Anda mendapatkan hadiah. Klik link ini.", label="πŸ’¬ Masukkan Kalimat")
with gr.Row():
output_text = gr.Textbox(label="πŸ“€ Hasil Klasifikasi")
run_button = gr.Button("πŸ” Deteksi")
run_button.click(fn=predict, inputs=input_text, outputs=output_text)
demo.launch()