Simform-tech commited on
Commit
22ded17
·
verified ·
1 Parent(s): 2971e77

End of training

Browse files
README.md CHANGED
@@ -16,7 +16,7 @@ should probably proofread and complete it, then remove this comment. -->
16
 
17
  This model is a fine-tuned version of [microsoft/Phi-4-mini-instruct](https://huggingface.co/microsoft/Phi-4-mini-instruct) on the None dataset.
18
  It achieves the following results on the evaluation set:
19
- - Loss: 2.4895
20
 
21
  ## Model description
22
 
@@ -44,19 +44,27 @@ The following hyperparameters were used during training:
44
  - optimizer: Use OptimizerNames.ADAMW_TORCH with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
45
  - lr_scheduler_type: cosine
46
  - lr_scheduler_warmup_ratio: 0.1
47
- - num_epochs: 3
48
 
49
  ### Training results
50
 
51
- | Training Loss | Epoch | Step | Validation Loss |
52
- |:-------------:|:------:|:----:|:---------------:|
53
- | 2.7429 | 1.5201 | 50 | 2.6420 |
 
 
 
 
 
 
 
 
54
 
55
 
56
  ### Framework versions
57
 
58
  - PEFT 0.15.2
59
- - Transformers 4.52.4
60
  - Pytorch 2.6.0+cu124
61
  - Datasets 2.14.4
62
- - Tokenizers 0.21.1
 
16
 
17
  This model is a fine-tuned version of [microsoft/Phi-4-mini-instruct](https://huggingface.co/microsoft/Phi-4-mini-instruct) on the None dataset.
18
  It achieves the following results on the evaluation set:
19
+ - Loss: 1.6524
20
 
21
  ## Model description
22
 
 
44
  - optimizer: Use OptimizerNames.ADAMW_TORCH with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
45
  - lr_scheduler_type: cosine
46
  - lr_scheduler_warmup_ratio: 0.1
47
+ - num_epochs: 15
48
 
49
  ### Training results
50
 
51
+ | Training Loss | Epoch | Step | Validation Loss |
52
+ |:-------------:|:-------:|:----:|:---------------:|
53
+ | 2.9068 | 1.5201 | 50 | 2.7980 |
54
+ | 2.2862 | 3.0306 | 100 | 2.1574 |
55
+ | 1.8811 | 4.5507 | 150 | 1.8244 |
56
+ | 1.7806 | 6.0612 | 200 | 1.7332 |
57
+ | 1.7361 | 7.5813 | 250 | 1.6944 |
58
+ | 1.6709 | 9.0918 | 300 | 1.6713 |
59
+ | 1.6989 | 10.6119 | 350 | 1.6588 |
60
+ | 1.6858 | 12.1224 | 400 | 1.6524 |
61
+ | 1.6844 | 13.6424 | 450 | 1.6502 |
62
 
63
 
64
  ### Framework versions
65
 
66
  - PEFT 0.15.2
67
+ - Transformers 4.53.0
68
  - Pytorch 2.6.0+cu124
69
  - Datasets 2.14.4
70
+ - Tokenizers 0.21.2
adapter_config.json CHANGED
@@ -24,13 +24,13 @@
24
  "rank_pattern": {},
25
  "revision": null,
26
  "target_modules": [
 
 
27
  "o_proj",
28
  "v_proj",
29
  "k_proj",
30
- "q_proj",
31
- "down_proj",
32
  "up_proj",
33
- "gate_proj"
34
  ],
35
  "task_type": "CAUSAL_LM",
36
  "trainable_token_indices": null,
 
24
  "rank_pattern": {},
25
  "revision": null,
26
  "target_modules": [
27
+ "down_proj",
28
+ "gate_proj",
29
  "o_proj",
30
  "v_proj",
31
  "k_proj",
 
 
32
  "up_proj",
33
+ "q_proj"
34
  ],
35
  "task_type": "CAUSAL_LM",
36
  "trainable_token_indices": null,
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f4961560fd989d9ee7765b7a2df7f64031dcf63a8c948289752b163d1987cb63
3
  size 35668592
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:54f02ec5b73f7767f960434a492e0ac47e14d2627a1fd12caeb0c40d4d77fbea
3
  size 35668592
base_model_info.json CHANGED
@@ -4,6 +4,6 @@
4
  "quantization": "4-bit",
5
  "dataset": "MITRE ATT&CK tactics and techniques",
6
  "task": "Cybersecurity Q&A",
7
- "final_eval_loss": 2.4895331859588623,
8
  "inference_instructions": "Load base model with quantization, then load adapters"
9
  }
 
4
  "quantization": "4-bit",
5
  "dataset": "MITRE ATT&CK tactics and techniques",
6
  "task": "Cybersecurity Q&A",
7
+ "final_eval_loss": 1.6523603200912476,
8
  "inference_instructions": "Load base model with quantization, then load adapters"
9
  }
runs/Jul09_13-26-19_8a79f5b3baf0/events.out.tfevents.1752067583.8a79f5b3baf0.769.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ab93617947bd928725fa0bedb8b0710402295c76e4a398d0846c4f2ef57c6291
3
+ size 14110
runs/Jul09_13-26-19_8a79f5b3baf0/events.out.tfevents.1752079931.8a79f5b3baf0.769.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f564abf6a82d42a8eb840450072bec158f40456ed80f08f6883554c4227e174e
3
+ size 359
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b2108bc3203cd0081785e0ba6af0cf7141b13bfb3155ff4e78e169a5e831db6c
3
- size 5368
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:433efcd87990360d7b3c74d8f6b4cc98f39826bae0cd9b4cb93ba749b89b6ef1
3
+ size 5432
training_config.json CHANGED
@@ -7,9 +7,9 @@
7
  "train_test_split": 0.8,
8
  "random_seed": 42,
9
  "batch_size": 1,
10
- "gradient_accumulation_steps": 16,
11
  "learning_rate": 2e-05,
12
- "num_epochs": 3,
13
  "warmup_ratio": 0.1,
14
  "weight_decay": 0.01,
15
  "save_steps": 100,
@@ -22,9 +22,9 @@
22
  "dataloader_num_workers": 2,
23
  "dataloader_pin_memory": true,
24
  "USE_PEFT": true,
25
- "final_eval_loss": 2.4895331859588623,
26
  "target_eval_loss": 1.0,
27
  "training_completed": true,
28
- "timestamp": "2025-06-26T10:09:09.831498",
29
  "dataset_info": "MITRE ATT&CK tactics and techniques Q&A"
30
  }
 
7
  "train_test_split": 0.8,
8
  "random_seed": 42,
9
  "batch_size": 1,
10
+ "gradient_accumulation_steps": 8,
11
  "learning_rate": 2e-05,
12
+ "num_epochs": 15,
13
  "warmup_ratio": 0.1,
14
  "weight_decay": 0.01,
15
  "save_steps": 100,
 
22
  "dataloader_num_workers": 2,
23
  "dataloader_pin_memory": true,
24
  "USE_PEFT": true,
25
+ "final_eval_loss": 1.6523603200912476,
26
  "target_eval_loss": 1.0,
27
  "training_completed": true,
28
+ "timestamp": "2025-07-09T16:52:12.376347",
29
  "dataset_info": "MITRE ATT&CK tactics and techniques Q&A"
30
  }