End of training

Files changed (8) hide show

README.md CHANGED Viewed

@@ -16,7 +16,7 @@ should probably proofread and complete it, then remove this comment. -->
 This model is a fine-tuned version of [microsoft/Phi-4-mini-instruct](https://huggingface.co/microsoft/Phi-4-mini-instruct) on the None dataset.
 It achieves the following results on the evaluation set:
-- Loss: 2.4895
 ## Model description
@@ -44,19 +44,27 @@ The following hyperparameters were used during training:
 - optimizer: Use OptimizerNames.ADAMW_TORCH with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
 - lr_scheduler_type: cosine
 - lr_scheduler_warmup_ratio: 0.1
-- num_epochs: 3
 ### Training results
-| Training Loss | Epoch  | Step | Validation Loss |
-|:-------------:|:------:|:----:|:---------------:|
-| 2.7429        | 1.5201 | 50   | 2.6420          |
 ### Framework versions
 - PEFT 0.15.2
-- Transformers 4.52.4
 - Pytorch 2.6.0+cu124
 - Datasets 2.14.4
-- Tokenizers 0.21.1

 This model is a fine-tuned version of [microsoft/Phi-4-mini-instruct](https://huggingface.co/microsoft/Phi-4-mini-instruct) on the None dataset.
 It achieves the following results on the evaluation set:
+- Loss: 1.6524
 ## Model description
 - optimizer: Use OptimizerNames.ADAMW_TORCH with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
 - lr_scheduler_type: cosine
 - lr_scheduler_warmup_ratio: 0.1
+- num_epochs: 15
 ### Training results
+| Training Loss | Epoch   | Step | Validation Loss |
+|:-------------:|:-------:|:----:|:---------------:|
+| 2.9068        | 1.5201  | 50   | 2.7980          |
+| 2.2862        | 3.0306  | 100  | 2.1574          |
+| 1.8811        | 4.5507  | 150  | 1.8244          |
+| 1.7806        | 6.0612  | 200  | 1.7332          |
+| 1.7361        | 7.5813  | 250  | 1.6944          |
+| 1.6709        | 9.0918  | 300  | 1.6713          |
+| 1.6989        | 10.6119 | 350  | 1.6588          |
+| 1.6858        | 12.1224 | 400  | 1.6524          |
+| 1.6844        | 13.6424 | 450  | 1.6502          |
 ### Framework versions
 - PEFT 0.15.2
+- Transformers 4.53.0
 - Pytorch 2.6.0+cu124
 - Datasets 2.14.4
+- Tokenizers 0.21.2

adapter_config.json CHANGED Viewed

@@ -24,13 +24,13 @@
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
     "o_proj",
     "v_proj",
     "k_proj",
-    "q_proj",
-    "down_proj",
     "up_proj",
-    "gate_proj"
   ],
   "task_type": "CAUSAL_LM",
   "trainable_token_indices": null,

   "rank_pattern": {},
   "revision": null,
   "target_modules": [
+    "down_proj",
+    "gate_proj",
     "o_proj",
     "v_proj",
     "k_proj",
     "up_proj",
+    "q_proj"
   ],
   "task_type": "CAUSAL_LM",
   "trainable_token_indices": null,

adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f4961560fd989d9ee7765b7a2df7f64031dcf63a8c948289752b163d1987cb63
 size 35668592

 version https://git-lfs.github.com/spec/v1
+oid sha256:54f02ec5b73f7767f960434a492e0ac47e14d2627a1fd12caeb0c40d4d77fbea
 size 35668592

base_model_info.json CHANGED Viewed

@@ -4,6 +4,6 @@
   "quantization": "4-bit",
   "dataset": "MITRE ATT&CK tactics and techniques",
   "task": "Cybersecurity Q&A",
-  "final_eval_loss": 2.4895331859588623,
   "inference_instructions": "Load base model with quantization, then load adapters"
 }

   "quantization": "4-bit",
   "dataset": "MITRE ATT&CK tactics and techniques",
   "task": "Cybersecurity Q&A",
+  "final_eval_loss": 1.6523603200912476,
   "inference_instructions": "Load base model with quantization, then load adapters"
 }

runs/Jul09_13-26-19_8a79f5b3baf0/events.out.tfevents.1752067583.8a79f5b3baf0.769.0 ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:ab93617947bd928725fa0bedb8b0710402295c76e4a398d0846c4f2ef57c6291
+size 14110

runs/Jul09_13-26-19_8a79f5b3baf0/events.out.tfevents.1752079931.8a79f5b3baf0.769.1 ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:f564abf6a82d42a8eb840450072bec158f40456ed80f08f6883554c4227e174e
+size 359

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b2108bc3203cd0081785e0ba6af0cf7141b13bfb3155ff4e78e169a5e831db6c
-size 5368

 version https://git-lfs.github.com/spec/v1
+oid sha256:433efcd87990360d7b3c74d8f6b4cc98f39826bae0cd9b4cb93ba749b89b6ef1
+size 5432

training_config.json CHANGED Viewed

@@ -7,9 +7,9 @@
   "train_test_split": 0.8,
   "random_seed": 42,
   "batch_size": 1,
-  "gradient_accumulation_steps": 16,
   "learning_rate": 2e-05,
-  "num_epochs": 3,
   "warmup_ratio": 0.1,
   "weight_decay": 0.01,
   "save_steps": 100,
@@ -22,9 +22,9 @@
   "dataloader_num_workers": 2,
   "dataloader_pin_memory": true,
   "USE_PEFT": true,
-  "final_eval_loss": 2.4895331859588623,
   "target_eval_loss": 1.0,
   "training_completed": true,
-  "timestamp": "2025-06-26T10:09:09.831498",
   "dataset_info": "MITRE ATT&CK tactics and techniques Q&A"
 }

   "train_test_split": 0.8,
   "random_seed": 42,
   "batch_size": 1,
+  "gradient_accumulation_steps": 8,
   "learning_rate": 2e-05,
+  "num_epochs": 15,
   "warmup_ratio": 0.1,
   "weight_decay": 0.01,
   "save_steps": 100,
   "dataloader_num_workers": 2,
   "dataloader_pin_memory": true,
   "USE_PEFT": true,
+  "final_eval_loss": 1.6523603200912476,
   "target_eval_loss": 1.0,
   "training_completed": true,
+  "timestamp": "2025-07-09T16:52:12.376347",
   "dataset_info": "MITRE ATT&CK tactics and techniques Q&A"
 }