subbunanepalli commited on
Commit
d490011
·
verified ·
1 Parent(s): f1a93da

Create config.py

Browse files
Files changed (1) hide show
  1. config.py +64 -0
config.py ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import os
3
+
4
+ # --- Paths ---
5
+ DATA_PATH = '/kaggle/input/synthesis-data/synthetic_transactions_samples_5000.csv'
6
+ TOKENIZER_PATH = './tokenizer_roberta/'
7
+ LABEL_ENCODERS_PATH = './label_encoders.pkl'
8
+ MODEL_SAVE_DIR = './saved_models/'
9
+ PREDICTIONS_SAVE_DIR = './predictions/'
10
+
11
+ # --- Data Columns ---
12
+ TEXT_COLUMN = "Sanction_Context"
13
+ LABEL_COLUMNS = [
14
+ "Red_Flag_Reason",
15
+ "Maker_Action",
16
+ "Escalation_Level",
17
+ "Risk_Category",
18
+ "Risk_Drivers",
19
+ "Investigation_Outcome"
20
+ ]
21
+ METADATA_COLUMNS = []
22
+
23
+ # --- Model Hyperparameters ---
24
+ MAX_LEN = 128
25
+ BATCH_SIZE = 16
26
+ LEARNING_RATE = 2e-5
27
+ NUM_EPOCHS = 3
28
+ DROPOUT_RATE = 0.3
29
+
30
+ # --- Device Configuration ---
31
+ DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
32
+
33
+ # --- Model Names ---
34
+ BERT_MODEL_NAME = 'bert-base-uncased'
35
+ ROBERTA_MODEL_NAME = 'roberta-base'
36
+ DEBERTA_MODEL_NAME = 'microsoft/deberta-base'
37
+
38
+ # --- TF-IDF ---
39
+ TFIDF_MAX_FEATURES = 5000
40
+
41
+ # --- Strategy Definitions ---
42
+ FIELD_STRATEGIES = {
43
+ "Maker_Action": {
44
+ "loss": "focal_loss",
45
+ "enhancements": ["action_templates", "context_prompt_tuning"]
46
+ },
47
+ "Risk_Category": {
48
+ "enhancements": ["numerical_metadata", "transaction_patterns"]
49
+ },
50
+ "Escalation_Level": {
51
+ "enhancements": ["class_balancing", "policy_keyword_patterns"]
52
+ },
53
+ "Investigation_Outcome": {
54
+ "type": "classification_or_generation"
55
+ }
56
+ }
57
+
58
+ # --- Ensure directories exist ---
59
+ os.makedirs(MODEL_SAVE_DIR, exist_ok=True)
60
+ os.makedirs(PREDICTIONS_SAVE_DIR, exist_ok=True)
61
+ os.makedirs(TOKENIZER_PATH, exist_ok=True)
62
+
63
+ # ✅ Set the active model for training/deployment
64
+ MODEL_NAME = ROBERTA_MODEL_NAME