Spaces:

point9
/

Roberta_model_Test

Sleeping

App Files Files Community

subbunanepalli commited on Jun 12

Commit

d490011

verified ·

1 Parent(s): f1a93da

Create config.py

Browse files

Files changed (1) hide show

config.py +64 -0

config.py ADDED Viewed

	@@ -0,0 +1,64 @@

+import torch
+import os
+# --- Paths ---
+DATA_PATH = '/kaggle/input/synthesis-data/synthetic_transactions_samples_5000.csv'
+TOKENIZER_PATH = './tokenizer_roberta/'
+LABEL_ENCODERS_PATH = './label_encoders.pkl'
+MODEL_SAVE_DIR = './saved_models/'
+PREDICTIONS_SAVE_DIR = './predictions/'
+# --- Data Columns ---
+TEXT_COLUMN = "Sanction_Context"
+LABEL_COLUMNS = [
+    "Red_Flag_Reason",
+    "Maker_Action",
+    "Escalation_Level",
+    "Risk_Category",
+    "Risk_Drivers",
+    "Investigation_Outcome"
+]
+METADATA_COLUMNS = []
+# --- Model Hyperparameters ---
+MAX_LEN = 128
+BATCH_SIZE = 16
+LEARNING_RATE = 2e-5
+NUM_EPOCHS = 3
+DROPOUT_RATE = 0.3
+# --- Device Configuration ---
+DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+# --- Model Names ---
+BERT_MODEL_NAME = 'bert-base-uncased'
+ROBERTA_MODEL_NAME = 'roberta-base'
+DEBERTA_MODEL_NAME = 'microsoft/deberta-base'
+# --- TF-IDF ---
+TFIDF_MAX_FEATURES = 5000
+# --- Strategy Definitions ---
+FIELD_STRATEGIES = {
+    "Maker_Action": {
+        "loss": "focal_loss",
+        "enhancements": ["action_templates", "context_prompt_tuning"]
+    },
+    "Risk_Category": {
+        "enhancements": ["numerical_metadata", "transaction_patterns"]
+    },
+    "Escalation_Level": {
+        "enhancements": ["class_balancing", "policy_keyword_patterns"]
+    },
+    "Investigation_Outcome": {
+        "type": "classification_or_generation"
+    }
+}
+# --- Ensure directories exist ---
+os.makedirs(MODEL_SAVE_DIR, exist_ok=True)
+os.makedirs(PREDICTIONS_SAVE_DIR, exist_ok=True)
+os.makedirs(TOKENIZER_PATH, exist_ok=True)
+# ✅ Set the active model for training/deployment
+MODEL_NAME = ROBERTA_MODEL_NAME