indoSPAMApp / app.py
hasnanhaq's picture
Update app.py
b5394e7 verified
raw
history blame
2.48 kB
import torch
import torch.nn as nn
from transformers import AutoModel, AutoTokenizer
import gradio as gr
from sklearn.preprocessing import LabelEncoder
import pandas as pd
# ===== Load Label Encoder =====
df = pd.read_csv("Dataset_new.csv",delimiter=";")
le = LabelEncoder()
le.fit(df["label"])
# ===== Define Model Class =====
class IndoBERTClassifier(nn.Module):
def __init__(self, model_name, num_labels):
super(IndoBERTClassifier, self).__init__()
self.bert = AutoModel.from_pretrained(model_name)
self.dropout = nn.Dropout(0.3)
self.classifier = nn.Linear(self.bert.config.hidden_size, num_labels)
def forward(self, input_ids, attention_mask, token_type_ids=None):
outputs = self.bert(
input_ids=input_ids,
attention_mask=attention_mask,
token_type_ids=token_type_ids
)
pooled = outputs.last_hidden_state[:, 0]
pooled = self.dropout(pooled)
logits = self.classifier(pooled)
return logits
# ===== Load Model and Tokenizer =====
tokenizer = AutoTokenizer.from_pretrained("indobenchmark/indobert-base-p1")
model = IndoBERTClassifier("indobenchmark/indobert-base-p1", num_labels=4)
model.load_state_dict(torch.load("pytorch_model.bin", map_location=torch.device("cpu")))
model.eval()
# ===== Prediction Function =====
def predict(text):
inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True)
with torch.no_grad():
logits = model(
input_ids=inputs["input_ids"],
attention_mask=inputs["attention_mask"],
token_type_ids=inputs.get("token_type_ids")
)
pred = torch.argmax(logits, dim=1).item()
label = le.inverse_transform([pred])[0]
return f"🚨 Kategori Deteksi:\n\nπŸ“Œ {label} (Label {pred})"
# ===== Gradio UI =====
with gr.Blocks() as demo:
gr.Markdown("## πŸ€– Deteksi Spam Penipuan Berbahasa Indonesia")
gr.Markdown("Masukkan kalimat pesan yang ingin diperiksa apakah termasuk penipuan, permintaan data diri, tautan mencurigakan, atau tawaran kerja palsu.")
with gr.Row():
input_text = gr.Textbox(lines=3, placeholder="Contoh: Selamat! Anda mendapatkan hadiah. Klik link ini.", label="πŸ’¬ Masukkan Kalimat")
with gr.Row():
output_text = gr.Textbox(label="πŸ“€ Hasil Klasifikasi")
run_button = gr.Button("πŸ” Deteksi")
run_button.click(fn=predict, inputs=input_text, outputs=output_text)
demo.launch()