jerrychen428 commited on
Commit
e8b910d
·
verified ·
1 Parent(s): 7b44596

Create fraud_detector.py

Browse files
Files changed (1) hide show
  1. fraud_detector.py +43 -0
fraud_detector.py ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # fraud_detector.py
2
+
3
+ import torch
4
+ import pandas as pd
5
+ from transformers import BertTokenizer, BertForSequenceClassification
6
+ from torch.utils.data import Dataset
7
+ from sklearn.model_selection import train_test_split
8
+ from sklearn.metrics import accuracy_score, precision_recall_fscore_support
9
+
10
+ class FinancialFraudDataset(Dataset):
11
+ def __init__(self, encodings, labels):
12
+ self.encodings = encodings
13
+ self.labels = labels
14
+
15
+ def __len__(self):
16
+ return len(self.labels)
17
+
18
+ def __getitem__(self, idx):
19
+ item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
20
+ item["labels"] = torch.tensor(self.labels[idx])
21
+ return item
22
+
23
+ class FinancialFraudTrainer:
24
+ def __init__(self, data_path=None):
25
+ self.data_path = data_path
26
+ self.tokenizer = None
27
+ self.model = None
28
+
29
+ def load_model(self):
30
+ # 從 Hugging Face 模型倉庫載入(或用你訓練好的模型路徑)
31
+ self.model = BertForSequenceClassification.from_pretrained("hfl/chinese-roberta-wwm-ext", num_labels=2)
32
+ self.tokenizer = BertTokenizer.from_pretrained("hfl/chinese-roberta-wwm-ext")
33
+ self.model.eval()
34
+
35
+ def predict_transaction(self, text):
36
+ try:
37
+ self.model.eval()
38
+ inputs = self.tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=128)
39
+ with torch.no_grad():
40
+ outputs = self.model(**inputs)
41
+ probs = torch.softmax(outputs.logits, dim=1)
42
+ prediction = torch.argmax(probs, dim=1).item()
43
+ confidence = probs[0][prediction].item()