sravanthib commited on
Commit
84a5d16
·
verified ·
1 Parent(s): c21975f

Training completed

Browse files
Files changed (4) hide show
  1. README.md +3 -3
  2. all_results.json +6 -6
  3. train_results.json +6 -6
  4. trainer_state.json +9 -9
README.md CHANGED
@@ -37,11 +37,11 @@ More information needed
37
 
38
  The following hyperparameters were used during training:
39
  - learning_rate: 5e-05
40
- - train_batch_size: 16
41
- - eval_batch_size: 64
42
  - seed: 42
43
  - gradient_accumulation_steps: 10
44
- - total_train_batch_size: 160
45
  - optimizer: Use adamw_torch with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
46
  - lr_scheduler_type: cosine
47
  - lr_scheduler_warmup_ratio: 0.03
 
37
 
38
  The following hyperparameters were used during training:
39
  - learning_rate: 5e-05
40
+ - train_batch_size: 2
41
+ - eval_batch_size: 8
42
  - seed: 42
43
  - gradient_accumulation_steps: 10
44
+ - total_train_batch_size: 20
45
  - optimizer: Use adamw_torch with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
46
  - lr_scheduler_type: cosine
47
  - lr_scheduler_warmup_ratio: 0.03
all_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 0.0182648401826484,
3
- "total_flos": 5.5657843654656e+16,
4
- "train_loss": 1.3686250686645507,
5
- "train_runtime": 146.0605,
6
- "train_samples_per_second": 10.954,
7
- "train_steps_per_second": 0.068
8
  }
 
1
  {
2
+ "epoch": 0.00228310502283105,
3
+ "total_flos": 6957230456832000.0,
4
+ "train_loss": 10.657806396484375,
5
+ "train_runtime": 130.9955,
6
+ "train_samples_per_second": 1.527,
7
+ "train_steps_per_second": 0.076
8
  }
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 0.0182648401826484,
3
- "total_flos": 5.5657843654656e+16,
4
- "train_loss": 1.3686250686645507,
5
- "train_runtime": 146.0605,
6
- "train_samples_per_second": 10.954,
7
- "train_steps_per_second": 0.068
8
  }
 
1
  {
2
+ "epoch": 0.00228310502283105,
3
+ "total_flos": 6957230456832000.0,
4
+ "train_loss": 10.657806396484375,
5
+ "train_runtime": 130.9955,
6
+ "train_samples_per_second": 1.527,
7
+ "train_steps_per_second": 0.076
8
  }
trainer_state.json CHANGED
@@ -2,7 +2,7 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.0182648401826484,
6
  "eval_steps": 0,
7
  "global_step": 10,
8
  "is_hyper_param_search": false,
@@ -10,13 +10,13 @@
10
  "is_world_process_zero": true,
11
  "log_history": [
12
  {
13
- "epoch": 0.0182648401826484,
14
  "step": 10,
15
- "total_flos": 5.5657843654656e+16,
16
- "train_loss": 1.3686250686645507,
17
- "train_runtime": 146.0605,
18
- "train_samples_per_second": 10.954,
19
- "train_steps_per_second": 0.068
20
  }
21
  ],
22
  "logging_steps": 100,
@@ -36,8 +36,8 @@
36
  "attributes": {}
37
  }
38
  },
39
- "total_flos": 5.5657843654656e+16,
40
- "train_batch_size": 16,
41
  "trial_name": null,
42
  "trial_params": null
43
  }
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.00228310502283105,
6
  "eval_steps": 0,
7
  "global_step": 10,
8
  "is_hyper_param_search": false,
 
10
  "is_world_process_zero": true,
11
  "log_history": [
12
  {
13
+ "epoch": 0.00228310502283105,
14
  "step": 10,
15
+ "total_flos": 6957230456832000.0,
16
+ "train_loss": 10.657806396484375,
17
+ "train_runtime": 130.9955,
18
+ "train_samples_per_second": 1.527,
19
+ "train_steps_per_second": 0.076
20
  }
21
  ],
22
  "logging_steps": 100,
 
36
  "attributes": {}
37
  }
38
  },
39
+ "total_flos": 6957230456832000.0,
40
+ "train_batch_size": 2,
41
  "trial_name": null,
42
  "trial_params": null
43
  }