Spaces:

TugasDeeplearning
/

Multitask_Sentiment_MultiBahasa

Running

teguhsuby commited on 1 day ago

Commit

2fc050b

1 Parent(s): eb90797

Upload

Files changed (9) hide show

app.py ADDED Viewed

+import gradio as gr
+import torch
+import joblib
+from transformers import AutoTokenizer
+from dinstilBert import MultiTaskBERT
+model = MultiTaskBERT()
+model.load_state_dict(torch.load("model.pt", map_location="cpu"))
+model.eval()
+tokenizer = AutoTokenizer.from_pretrained("distilbert-base-multilingual-cased")
+le = joblib.load("label_encoder.pkl")
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+model.to(device)
+def predict(text):
+    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True).to(device)
+    with torch.no_grad():
+        sentiment_logits, lang_logits = model(inputs["input_ids"], inputs["attention_mask"])
+        pred_sentiment = sentiment_logits.argmax(dim=1).item()
+        pred_lang = lang_logits.argmax(dim=1).item()
+    sentiment_label = "positive" if pred_sentiment == 1 else "negative"
+    lang_label = le.inverse_transform([pred_lang])[0]
+    return sentiment_label, lang_label
+interface = gr.Interface(
+    fn=predict,
+    inputs=gr.Textbox(label="Masukkan Teks Dalam Bahasa (Inggris/Belanda/Spanyol/Perancis)"),
+    outputs=[
+        gr.Textbox(label="Prediksi Sentiment"),
+        gr.Textbox(label="Prediksi Bahasa")
+    ],
+    title="Multitask DistilBERT: Sentiment + Language",
+    description="Prediksi sentimen dan bahasa dari teks menggunakan model multitask DistilBERT."
+)
+interface.launch()

dinstilBert.py ADDED Viewed

+from transformers import AutoModel, AutoTokenizer
+import torch.nn as nn
+class MultiTaskBERT(nn.Module):
+    def __init__(self, num_lang_classes=4, num_sentiment_classes=2):
+        super().__init__()
+        self.bert = AutoModel.from_pretrained("distilbert-base-multilingual-cased")
+        self.dropout = nn.Dropout(0.3)
+        self.sentiment_head = nn.Linear(768, num_sentiment_classes)
+        self.lang_head = nn.Linear(768, num_lang_classes)
+    def forward(self, input_ids, attention_mask):
+        outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask)
+        pooled_output = outputs.last_hidden_state[:, 0]
+        pooled_output = self.dropout(pooled_output)
+        sentiment_logits = self.sentiment_head(pooled_output)
+        lang_logits = self.lang_head(pooled_output)
+        return sentiment_logits, lang_logits

label_encoder.pkl ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:5d807aacf466425ba71f7fd36b79cca5de98feef504ff31f280a9a293c94ee71
+size 493

model.pt ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:04990bd653e7fbefb47284f5cb46939dccc0922afef52d601ce879a202b8c745
+size 538989962

requirements.txt ADDED Viewed

+transformers
+torch
+gradio
+scikit-learn
+sentencepiece

special_tokens_map.json ADDED Viewed

+{
+  "cls_token": "[CLS]",
+  "mask_token": "[MASK]",
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "unk_token": "[UNK]"
+}

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json ADDED Viewed

+{
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "[CLS]",
+  "do_lower_case": false,
+  "mask_token": "[MASK]",
+  "model_max_length": 512,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "strip_accents": null,
+  "token": "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",
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "DistilBertTokenizer",
+  "unk_token": "[UNK]"
+}

vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff