rshwndsz commited on
Commit
ae4f17e
·
verified ·
1 Parent(s): 54cc6ff

Upload folder using huggingface_hub

Browse files
config.json CHANGED
@@ -4,8 +4,8 @@
4
  ],
5
  "attention_bias": false,
6
  "attention_dropout": 0.0,
7
- "bos_token_id": 128256,
8
- "eos_token_id": 128257,
9
  "head_dim": 128,
10
  "hidden_act": "silu",
11
  "hidden_size": 4096,
@@ -17,7 +17,7 @@
17
  "num_attention_heads": 32,
18
  "num_hidden_layers": 32,
19
  "num_key_value_heads": 8,
20
- "pad_token_id": 128257,
21
  "pretraining_tp": 1,
22
  "rms_norm_eps": 1e-05,
23
  "rope_scaling": {
@@ -30,7 +30,7 @@
30
  "rope_theta": 500000.0,
31
  "tie_word_embeddings": false,
32
  "torch_dtype": "bfloat16",
33
- "transformers_version": "4.50.0",
34
  "use_cache": false,
35
- "vocab_size": 128258
36
  }
 
4
  ],
5
  "attention_bias": false,
6
  "attention_dropout": 0.0,
7
+ "bos_token_id": 128000,
8
+ "eos_token_id": 128001,
9
  "head_dim": 128,
10
  "hidden_act": "silu",
11
  "hidden_size": 4096,
 
17
  "num_attention_heads": 32,
18
  "num_hidden_layers": 32,
19
  "num_key_value_heads": 8,
20
+ "pad_token_id": 128258,
21
  "pretraining_tp": 1,
22
  "rms_norm_eps": 1e-05,
23
  "rope_scaling": {
 
30
  "rope_theta": 500000.0,
31
  "tie_word_embeddings": false,
32
  "torch_dtype": "bfloat16",
33
+ "transformers_version": "4.51.3",
34
  "use_cache": false,
35
+ "vocab_size": 128259
36
  }
generation_config.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
  "_from_model_config": true,
3
- "bos_token_id": 128256,
4
  "do_sample": true,
5
- "eos_token_id": 128257,
6
- "pad_token_id": 128257,
7
  "temperature": 0.6,
8
  "top_p": 0.9,
9
- "transformers_version": "4.50.0"
10
  }
 
1
  {
2
  "_from_model_config": true,
3
+ "bos_token_id": 128000,
4
  "do_sample": true,
5
+ "eos_token_id": 128001,
6
+ "pad_token_id": 128258,
7
  "temperature": 0.6,
8
  "top_p": 0.9,
9
+ "transformers_version": "4.51.3"
10
  }
global_step6644/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:de9c96a41bb7295bade4a8f5b984481c186411a3a6279493a6f611c11b9f8d39
3
+ size 24090863678
global_step6644/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3fa8d1fff7eb7559bef409dbd7b74ea0e83cdd0db376d1fbd43c36a7a8394319
3
+ size 24090863678
global_step6644/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c4ce49a8a1ad5a9c4f7be5987f9a27e1a84872526d7f73f38acf6abcee4b642c
3
+ size 24090863678
global_step6644/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b6237c96a4290beb8325f3279112e89f1a0dfa55166911e0997204a65a9ccf94
3
+ size 24090863678
global_step6644/zero_pp_rank_0_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cfb1012d1b786d6c350da25da683a44fa03e607f22a42f0760f1320ccce88422
3
+ size 148837
global_step6644/zero_pp_rank_1_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:397fff671572295afb702e33aab5a6c47d715e1c6fe8c9ce9b9177800243b964
3
+ size 148837
global_step6644/zero_pp_rank_2_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f5d2bb4c46de5215023d9eeabbddd84ceb1cce9f1a92aaec8a3390097dec76ad
3
+ size 148837
global_step6644/zero_pp_rank_3_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8c85de6347279a7eac67464947839cd5f24608940955ce4a3ccf99e651cff8e6
3
+ size 148837
latest CHANGED
@@ -1 +1 @@
1
- global_step1433
 
1
+ global_step6644
model-00001-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:35b32098d5417f56d97bdd235c977a3cefa75546b46065820c83da3295dc8322
3
- size 4976715056
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eaa35b57420ddf65ac29ad4368d81f2c7b9c25abe77355f319fc4b59124a48d3
3
+ size 4976723248
model-00002-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:71e9ab3b81c6ab57d92fe1c3f0f40e08d6c2693e289b0c8ca6aa8acb1bfc54bd
3
  size 4999802720
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:62dffe2773f11dba7306cc79bd44d2bce030eabcb22157afe7e7c382111a78e2
3
  size 4999802720
model-00003-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:aa98c5abc70f6f7dc724967d4f9bfa163601b28e0ff227ca4a8b08fdca856cd2
3
  size 4915916176
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:280d3406be717d14e82c97de98c4de1ca5ce5f217ba584147b32159be98dc038
3
  size 4915916176
model-00004-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a11557f1cc838ba4e321812889f4eef8c502b1fc77c12b2f5e2e0b3355c094a1
3
- size 1168155192
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f6e66e66abd55acbb80eb260aed234cceb64630369417f01563279b8278b2dc2
3
+ size 1168163384
model.safetensors.index.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "metadata": {
3
- "total_size": 16060555264
4
  },
5
  "weight_map": {
6
  "lm_head.weight": "model-00004-of-00004.safetensors",
 
1
  {
2
  "metadata": {
3
+ "total_size": 16060571648
4
  },
5
  "weight_map": {
6
  "lm_head.weight": "model-00004-of-00004.safetensors",
rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:955b895101e13c81ba510512313a06782795770a0bf998c90f718166d25f1664
3
- size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b184f8fc9facd6e4a6513602952b43b9446784300c3ea2da72aefb029f93aac1
3
+ size 15024
rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:096e351ace65ff91008be171a45173ed6369cc639fce73a288f671041e24b0ec
3
- size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:937f07b3f415f537fbc809c70f15f64b8410cd6a07adbe7d246f5ca7359b3b49
3
+ size 15024
rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f21c61b1a7e793bbdec183de3b52da90042305234bc7e5887986655cd3fc2192
3
- size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:07e3f7eacec877950b3dad99b106bf80da30203dda6159b06f954c15b7f5d367
3
+ size 15024
rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:348742498d81780506d9760c655a7a7555185b5fbd70a7ae296d88fd9aeecd84
3
- size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2a5087a11a5f0a226f1607ec68740ea6baa2599559138de38da40f89dd666683
3
+ size 15024
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d427428dba1531aa627ec053886efe6432b8dcbc9cb32283f3906476f030ec86
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:69cdfa6cf38acfdff4fd96b9eb8312f5fb0f655877e187208ac9e1500ff67e6f
3
  size 1064
special_tokens_map.json CHANGED
@@ -13,9 +13,16 @@
13
  "normalized": false,
14
  "rstrip": false,
15
  "single_word": false
 
 
 
 
 
 
 
16
  }
17
  ],
18
- "bos_token": "<|im_start|>",
19
- "eos_token": "<|im_end|>",
20
- "pad_token": "<|im_end|>"
21
  }
 
13
  "normalized": false,
14
  "rstrip": false,
15
  "single_word": false
16
+ },
17
+ {
18
+ "content": "<|pad|>",
19
+ "lstrip": false,
20
+ "normalized": false,
21
+ "rstrip": false,
22
+ "single_word": false
23
  }
24
  ],
25
+ "bos_token": "<|begin_of_text|>",
26
+ "eos_token": "<|end_of_text|>",
27
+ "pad_token": "<|pad|>"
28
  }
tokenizer.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:510d12ec255f4cb0304aa5428d699c354c1a49696b427a2748a7b03bb7bbb575
3
- size 17210296
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:84d9fd6f8a3b531fcd3641d24db47ac67d512a3ab36abfe835934d19cff30a8d
3
+ size 17210480
tokenizer_config.json CHANGED
@@ -2063,22 +2063,32 @@
2063
  "rstrip": false,
2064
  "single_word": false,
2065
  "special": true
 
 
 
 
 
 
 
 
2066
  }
2067
  },
2068
  "additional_special_tokens": [
2069
  "<|im_start|>",
2070
- "<|im_end|>"
 
2071
  ],
2072
- "bos_token": "<|im_start|>",
2073
- "chat_template": "{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
2074
  "clean_up_tokenization_spaces": true,
2075
- "eos_token": "<|im_end|>",
2076
  "extra_special_tokens": {},
 
2077
  "model_input_names": [
2078
  "input_ids",
2079
  "attention_mask"
2080
  ],
2081
  "model_max_length": 131072,
2082
- "pad_token": "<|im_end|>",
2083
  "tokenizer_class": "PreTrainedTokenizer"
2084
  }
 
2063
  "rstrip": false,
2064
  "single_word": false,
2065
  "special": true
2066
+ },
2067
+ "128258": {
2068
+ "content": "<|pad|>",
2069
+ "lstrip": false,
2070
+ "normalized": false,
2071
+ "rstrip": false,
2072
+ "single_word": false,
2073
+ "special": true
2074
  }
2075
  },
2076
  "additional_special_tokens": [
2077
  "<|im_start|>",
2078
+ "<|im_end|>",
2079
+ "<|pad|>"
2080
  ],
2081
+ "bos_token": "<|begin_of_text|>",
2082
+ "chat_template": "{% if messages[0]['role'] == 'user' or messages[0]['role'] == 'system' %}{{ '<|begin_of_text|>' }}{% endif %}{% for message in messages %}{{ '<|im_start|>' + message['role'] + '\\n' + message['content'] + '<|im_end|>\\n' }}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\\n' }}{% elif messages[-1]['role'] == 'assistant' %}{{ '<|end_of_text|>' }}{% endif %}",
2083
  "clean_up_tokenization_spaces": true,
2084
+ "eos_token": "<|end_of_text|>",
2085
  "extra_special_tokens": {},
2086
+ "legacy": false,
2087
  "model_input_names": [
2088
  "input_ids",
2089
  "attention_mask"
2090
  ],
2091
  "model_max_length": 131072,
2092
+ "pad_token": "<|pad|>",
2093
  "tokenizer_class": "PreTrainedTokenizer"
2094
  }
trainer_state.json CHANGED
The diff for this file is too large to render. See raw diff
 
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:26685089dbc1e1360633fa418ba012ee63d4f4fda570441a22a1f60758811021
3
- size 7096
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b2d401aca38d027e0fcc12ba4910a855c90856823a14406faa548337c8d09812
3
+ size 7160