ElizabethSrgh commited on
Commit
1be3171
·
verified ·
1 Parent(s): 14658f7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +40 -41
app.py CHANGED
@@ -3,56 +3,55 @@ import torch.nn as nn
3
  from transformers import AutoTokenizer, AutoModel, AutoModelForSeq2SeqLM
4
  import gradio as gr
5
 
6
- # Model klasifikasi multitugas
7
  class MultiTaskModel(nn.Module):
8
  def __init__(self, base_model_name, num_topic_classes, num_sentiment_classes):
9
- super(MultiTaskModel, self).__init__()
10
  self.encoder = AutoModel.from_pretrained(base_model_name)
11
- hidden_size = self.encoder.config.hidden_size
12
- self.topik_classifier = nn.Linear(hidden_size, num_topic_classes)
13
- self.sentiment_classifier = nn.Linear(hidden_size, num_sentiment_classes)
14
 
15
  def forward(self, input_ids, attention_mask, token_type_ids=None):
16
- outputs = self.encoder(
17
- input_ids=input_ids,
18
- attention_mask=attention_mask,
19
- token_type_ids=token_type_ids
20
- )
21
- pooled_output = outputs.last_hidden_state[:, 0]
22
- topik_logits = self.topik_classifier(pooled_output)
23
- sentimen_logits = self.sentiment_classifier(pooled_output)
24
- return topik_logits, sentimen_logits
25
-
26
- # Load model klasifikasi
27
- tokenizer = AutoTokenizer.from_pretrained("tokenizer")
28
  model = MultiTaskModel("indobenchmark/indobert-base-p1", num_topic_classes=5, num_sentiment_classes=3)
29
- model.load_state_dict(torch.load("model.pt", map_location=torch.device("cpu")))
30
  model.eval()
31
 
32
- # Load model summarization
33
- sum_tokenizer = AutoTokenizer.from_pretrained("cahya/bart-base-indonesian-summarization")
34
- sum_model = AutoModelForSeq2SeqLM.from_pretrained("cahya/bart-base-indonesian-summarization")
35
 
36
- topik_labels = ["Produk", "Layanan", "Pengiriman", "Pembatalan", "Lainnya"]
37
- sentimen_labels = ["Negatif", "Netral", "Positif"]
 
38
 
39
- def klasifikasi(text):
40
- # Klasifikasi
41
- inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
 
42
  with torch.no_grad():
43
- topik_logits, sentimen_logits = model(**inputs)
44
- topik_idx = torch.argmax(topik_logits, dim=-1).item()
45
- sentimen_idx = torch.argmax(sentimen_logits, dim=-1).item()
46
-
47
- topik = topik_labels[topik_idx]
48
- sentimen = sentimen_labels[sentimen_idx]
49
-
50
- # Ringkasan
51
- sum_inputs = sum_tokenizer(text, return_tensors="pt", max_length=512, truncation=True)
52
- summary_ids = sum_model.generate(**sum_inputs, max_length=40, min_length=10, do_sample=False)
53
- ringkasan = sum_tokenizer.decode(summary_ids[0], skip_special_tokens=True)
54
-
55
- return f"HASIL ANALISIS\nTopik: {topik}\nSentimen: {sentimen}\nRingkasan: {ringkasan}"
56
-
57
- demo = gr.Interface(fn=klasifikasi, inputs="text", outputs="text", title="Klasifikasi Topik dan Sentimen Pelanggan")
 
58
  demo.launch()
 
3
  from transformers import AutoTokenizer, AutoModel, AutoModelForSeq2SeqLM
4
  import gradio as gr
5
 
6
+ # Definisi model klasifikasi multitugas
7
  class MultiTaskModel(nn.Module):
8
  def __init__(self, base_model_name, num_topic_classes, num_sentiment_classes):
9
+ super().__init__()
10
  self.encoder = AutoModel.from_pretrained(base_model_name)
11
+ hs = self.encoder.config.hidden_size
12
+ self.topik_classifier = nn.Linear(hs, num_topic_classes)
13
+ self.sentiment_classifier = nn.Linear(hs, num_sentiment_classes)
14
 
15
  def forward(self, input_ids, attention_mask, token_type_ids=None):
16
+ out = self.encoder(input_ids=input_ids,
17
+ attention_mask=attention_mask,
18
+ token_type_ids=token_type_ids)
19
+ pooled = out.last_hidden_state[:, 0]
20
+ return self.topik_classifier(pooled), self.sentiment_classifier(pooled)
21
+
22
+ # Load tokenizer dan model klasifikasi
23
+ tokenizer = AutoTokenizer.from_pretrained("tokenizer") # Folder tokenizer harus diupload
 
 
 
 
24
  model = MultiTaskModel("indobenchmark/indobert-base-p1", num_topic_classes=5, num_sentiment_classes=3)
25
+ model.load_state_dict(torch.load("model.pt", map_location="cpu"))
26
  model.eval()
27
 
28
+ # Load tokenizer dan model summarization
29
+ sum_tok = AutoTokenizer.from_pretrained("xTorch8/bart-id-summarization")
30
+ sum_model = AutoModelForSeq2SeqLM.from_pretrained("xTorch8/bart-id-summarization")
31
 
32
+ # Label klasifikasi
33
+ labels_topik = ["Produk", "Layanan", "Pengiriman", "Pembatalan", "Lainnya"]
34
+ labels_sentiment = ["Negatif", "Netral", "Positif"]
35
 
36
+ # Fungsi analisis
37
+ def analyze(text):
38
+ # Klasifikasi topik & sentimen
39
+ inp = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
40
  with torch.no_grad():
41
+ t_logits, s_logits = model(**inp)
42
+ topik = labels_topik[int(torch.argmax(t_logits))]
43
+ sentimen = labels_sentiment[int(torch.argmax(s_logits))]
44
+
45
+ # Ringkasan teks
46
+ s_inp = sum_tok(text, return_tensors="pt", truncation=True, padding=True)
47
+ summ_ids = sum_model.generate(**s_inp, max_length=50, num_beams=2)
48
+ ringkasan = sum_tok.decode(summ_ids[0], skip_special_tokens=True)
49
+
50
+ return (f"HASIL ANALISIS\n"
51
+ f"Topik: {topik}\n"
52
+ f"Sentimen: {sentimen}\n"
53
+ f"Ringkasan: {ringkasan}")
54
+
55
+ # Gradio interface
56
+ demo = gr.Interface(fn=analyze, inputs="text", outputs="text", title="Analisis Topik, Sentimen, dan Ringkasan Pelanggan")
57
  demo.launch()