File size: 2,462 Bytes
98eaec6
 
14658f7
98eaec6
 
1be3171
98eaec6
 
1be3171
98eaec6
1be3171
 
 
98eaec6
2e6e850
1be3171
 
 
 
 
 
 
 
3b2ba01
1be3171
98eaec6
 
1be3171
 
 
14658f7
1be3171
 
 
98eaec6
1be3171
 
 
 
98eaec6
1be3171
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
98eaec6
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
import torch
import torch.nn as nn
from transformers import AutoTokenizer, AutoModel, AutoModelForSeq2SeqLM
import gradio as gr

# Definisi model klasifikasi multitugas
class MultiTaskModel(nn.Module):
    def __init__(self, base_model_name, num_topic_classes, num_sentiment_classes):
        super().__init__()
        self.encoder = AutoModel.from_pretrained(base_model_name)
        hs = self.encoder.config.hidden_size
        self.topik_classifier = nn.Linear(hs, num_topic_classes)
        self.sentiment_classifier = nn.Linear(hs, num_sentiment_classes)

    def forward(self, input_ids, attention_mask, token_type_ids=None):
        out = self.encoder(input_ids=input_ids,
                           attention_mask=attention_mask,
                           token_type_ids=token_type_ids)
        pooled = out.last_hidden_state[:, 0]
        return self.topik_classifier(pooled), self.sentiment_classifier(pooled)

# Load tokenizer dan model klasifikasi
tokenizer = AutoTokenizer.from_pretrained("tokenizer")  # Folder tokenizer harus diupload
model = MultiTaskModel("indobenchmark/indobert-base-p1", num_topic_classes=5, num_sentiment_classes=3)
model.load_state_dict(torch.load("model.pt", map_location="cpu"))
model.eval()

# Load tokenizer dan model summarization
sum_tok = AutoTokenizer.from_pretrained("xTorch8/bart-id-summarization")
sum_model = AutoModelForSeq2SeqLM.from_pretrained("xTorch8/bart-id-summarization")

# Label klasifikasi
labels_topik = ["Produk", "Layanan", "Pengiriman", "Pembatalan", "Lainnya"]
labels_sentiment = ["Negatif", "Netral", "Positif"]

# Fungsi analisis
def analyze(text):
    # Klasifikasi topik & sentimen
    inp = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
    with torch.no_grad():
        t_logits, s_logits = model(**inp)
        topik = labels_topik[int(torch.argmax(t_logits))]
        sentimen = labels_sentiment[int(torch.argmax(s_logits))]

    # Ringkasan teks
    s_inp = sum_tok(text, return_tensors="pt", truncation=True, padding=True)
    summ_ids = sum_model.generate(**s_inp, max_length=50, num_beams=2)
    ringkasan = sum_tok.decode(summ_ids[0], skip_special_tokens=True)

    return (f"HASIL ANALISIS\n"
            f"Topik: {topik}\n"
            f"Sentimen: {sentimen}\n"
            f"Ringkasan: {ringkasan}")

# Gradio interface
demo = gr.Interface(fn=analyze, inputs="text", outputs="text", title="Analisis Topik, Sentimen, dan Ringkasan Pelanggan")
demo.launch()