Create fraud_detector.py
Browse files- fraud_detector.py +43 -0
fraud_detector.py
ADDED
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# fraud_detector.py
|
2 |
+
|
3 |
+
import torch
|
4 |
+
import pandas as pd
|
5 |
+
from transformers import BertTokenizer, BertForSequenceClassification
|
6 |
+
from torch.utils.data import Dataset
|
7 |
+
from sklearn.model_selection import train_test_split
|
8 |
+
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
|
9 |
+
|
10 |
+
class FinancialFraudDataset(Dataset):
|
11 |
+
def __init__(self, encodings, labels):
|
12 |
+
self.encodings = encodings
|
13 |
+
self.labels = labels
|
14 |
+
|
15 |
+
def __len__(self):
|
16 |
+
return len(self.labels)
|
17 |
+
|
18 |
+
def __getitem__(self, idx):
|
19 |
+
item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
|
20 |
+
item["labels"] = torch.tensor(self.labels[idx])
|
21 |
+
return item
|
22 |
+
|
23 |
+
class FinancialFraudTrainer:
|
24 |
+
def __init__(self, data_path=None):
|
25 |
+
self.data_path = data_path
|
26 |
+
self.tokenizer = None
|
27 |
+
self.model = None
|
28 |
+
|
29 |
+
def load_model(self):
|
30 |
+
# 從 Hugging Face 模型倉庫載入(或用你訓練好的模型路徑)
|
31 |
+
self.model = BertForSequenceClassification.from_pretrained("hfl/chinese-roberta-wwm-ext", num_labels=2)
|
32 |
+
self.tokenizer = BertTokenizer.from_pretrained("hfl/chinese-roberta-wwm-ext")
|
33 |
+
self.model.eval()
|
34 |
+
|
35 |
+
def predict_transaction(self, text):
|
36 |
+
try:
|
37 |
+
self.model.eval()
|
38 |
+
inputs = self.tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=128)
|
39 |
+
with torch.no_grad():
|
40 |
+
outputs = self.model(**inputs)
|
41 |
+
probs = torch.softmax(outputs.logits, dim=1)
|
42 |
+
prediction = torch.argmax(probs, dim=1).item()
|
43 |
+
confidence = probs[0][prediction].item()
|