Upload folder using huggingface_hub
Browse files- config.json +5 -5
- generation_config.json +4 -4
- global_step6644/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
- global_step6644/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +3 -0
- global_step6644/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +3 -0
- global_step6644/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +3 -0
- global_step6644/zero_pp_rank_0_mp_rank_00_model_states.pt +3 -0
- global_step6644/zero_pp_rank_1_mp_rank_00_model_states.pt +3 -0
- global_step6644/zero_pp_rank_2_mp_rank_00_model_states.pt +3 -0
- global_step6644/zero_pp_rank_3_mp_rank_00_model_states.pt +3 -0
- latest +1 -1
- model-00001-of-00004.safetensors +2 -2
- model-00002-of-00004.safetensors +1 -1
- model-00003-of-00004.safetensors +1 -1
- model-00004-of-00004.safetensors +2 -2
- model.safetensors.index.json +1 -1
- rng_state_0.pth +2 -2
- rng_state_1.pth +2 -2
- rng_state_2.pth +2 -2
- rng_state_3.pth +2 -2
- scheduler.pt +1 -1
- special_tokens_map.json +10 -3
- tokenizer.json +2 -2
- tokenizer_config.json +15 -5
- trainer_state.json +0 -0
- training_args.bin +2 -2
config.json
CHANGED
@@ -4,8 +4,8 @@
|
|
4 |
],
|
5 |
"attention_bias": false,
|
6 |
"attention_dropout": 0.0,
|
7 |
-
"bos_token_id":
|
8 |
-
"eos_token_id":
|
9 |
"head_dim": 128,
|
10 |
"hidden_act": "silu",
|
11 |
"hidden_size": 4096,
|
@@ -17,7 +17,7 @@
|
|
17 |
"num_attention_heads": 32,
|
18 |
"num_hidden_layers": 32,
|
19 |
"num_key_value_heads": 8,
|
20 |
-
"pad_token_id":
|
21 |
"pretraining_tp": 1,
|
22 |
"rms_norm_eps": 1e-05,
|
23 |
"rope_scaling": {
|
@@ -30,7 +30,7 @@
|
|
30 |
"rope_theta": 500000.0,
|
31 |
"tie_word_embeddings": false,
|
32 |
"torch_dtype": "bfloat16",
|
33 |
-
"transformers_version": "4.
|
34 |
"use_cache": false,
|
35 |
-
"vocab_size":
|
36 |
}
|
|
|
4 |
],
|
5 |
"attention_bias": false,
|
6 |
"attention_dropout": 0.0,
|
7 |
+
"bos_token_id": 128000,
|
8 |
+
"eos_token_id": 128001,
|
9 |
"head_dim": 128,
|
10 |
"hidden_act": "silu",
|
11 |
"hidden_size": 4096,
|
|
|
17 |
"num_attention_heads": 32,
|
18 |
"num_hidden_layers": 32,
|
19 |
"num_key_value_heads": 8,
|
20 |
+
"pad_token_id": 128258,
|
21 |
"pretraining_tp": 1,
|
22 |
"rms_norm_eps": 1e-05,
|
23 |
"rope_scaling": {
|
|
|
30 |
"rope_theta": 500000.0,
|
31 |
"tie_word_embeddings": false,
|
32 |
"torch_dtype": "bfloat16",
|
33 |
+
"transformers_version": "4.51.3",
|
34 |
"use_cache": false,
|
35 |
+
"vocab_size": 128259
|
36 |
}
|
generation_config.json
CHANGED
@@ -1,10 +1,10 @@
|
|
1 |
{
|
2 |
"_from_model_config": true,
|
3 |
-
"bos_token_id":
|
4 |
"do_sample": true,
|
5 |
-
"eos_token_id":
|
6 |
-
"pad_token_id":
|
7 |
"temperature": 0.6,
|
8 |
"top_p": 0.9,
|
9 |
-
"transformers_version": "4.
|
10 |
}
|
|
|
1 |
{
|
2 |
"_from_model_config": true,
|
3 |
+
"bos_token_id": 128000,
|
4 |
"do_sample": true,
|
5 |
+
"eos_token_id": 128001,
|
6 |
+
"pad_token_id": 128258,
|
7 |
"temperature": 0.6,
|
8 |
"top_p": 0.9,
|
9 |
+
"transformers_version": "4.51.3"
|
10 |
}
|
global_step6644/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:de9c96a41bb7295bade4a8f5b984481c186411a3a6279493a6f611c11b9f8d39
|
3 |
+
size 24090863678
|
global_step6644/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3fa8d1fff7eb7559bef409dbd7b74ea0e83cdd0db376d1fbd43c36a7a8394319
|
3 |
+
size 24090863678
|
global_step6644/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c4ce49a8a1ad5a9c4f7be5987f9a27e1a84872526d7f73f38acf6abcee4b642c
|
3 |
+
size 24090863678
|
global_step6644/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b6237c96a4290beb8325f3279112e89f1a0dfa55166911e0997204a65a9ccf94
|
3 |
+
size 24090863678
|
global_step6644/zero_pp_rank_0_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cfb1012d1b786d6c350da25da683a44fa03e607f22a42f0760f1320ccce88422
|
3 |
+
size 148837
|
global_step6644/zero_pp_rank_1_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:397fff671572295afb702e33aab5a6c47d715e1c6fe8c9ce9b9177800243b964
|
3 |
+
size 148837
|
global_step6644/zero_pp_rank_2_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f5d2bb4c46de5215023d9eeabbddd84ceb1cce9f1a92aaec8a3390097dec76ad
|
3 |
+
size 148837
|
global_step6644/zero_pp_rank_3_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8c85de6347279a7eac67464947839cd5f24608940955ce4a3ccf99e651cff8e6
|
3 |
+
size 148837
|
latest
CHANGED
@@ -1 +1 @@
|
|
1 |
-
|
|
|
1 |
+
global_step6644
|
model-00001-of-00004.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:eaa35b57420ddf65ac29ad4368d81f2c7b9c25abe77355f319fc4b59124a48d3
|
3 |
+
size 4976723248
|
model-00002-of-00004.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4999802720
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:62dffe2773f11dba7306cc79bd44d2bce030eabcb22157afe7e7c382111a78e2
|
3 |
size 4999802720
|
model-00003-of-00004.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4915916176
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:280d3406be717d14e82c97de98c4de1ca5ce5f217ba584147b32159be98dc038
|
3 |
size 4915916176
|
model-00004-of-00004.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f6e66e66abd55acbb80eb260aed234cceb64630369417f01563279b8278b2dc2
|
3 |
+
size 1168163384
|
model.safetensors.index.json
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
{
|
2 |
"metadata": {
|
3 |
-
"total_size":
|
4 |
},
|
5 |
"weight_map": {
|
6 |
"lm_head.weight": "model-00004-of-00004.safetensors",
|
|
|
1 |
{
|
2 |
"metadata": {
|
3 |
+
"total_size": 16060571648
|
4 |
},
|
5 |
"weight_map": {
|
6 |
"lm_head.weight": "model-00004-of-00004.safetensors",
|
rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b184f8fc9facd6e4a6513602952b43b9446784300c3ea2da72aefb029f93aac1
|
3 |
+
size 15024
|
rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:937f07b3f415f537fbc809c70f15f64b8410cd6a07adbe7d246f5ca7359b3b49
|
3 |
+
size 15024
|
rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:07e3f7eacec877950b3dad99b106bf80da30203dda6159b06f954c15b7f5d367
|
3 |
+
size 15024
|
rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2a5087a11a5f0a226f1607ec68740ea6baa2599559138de38da40f89dd666683
|
3 |
+
size 15024
|
scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:69cdfa6cf38acfdff4fd96b9eb8312f5fb0f655877e187208ac9e1500ff67e6f
|
3 |
size 1064
|
special_tokens_map.json
CHANGED
@@ -13,9 +13,16 @@
|
|
13 |
"normalized": false,
|
14 |
"rstrip": false,
|
15 |
"single_word": false
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
16 |
}
|
17 |
],
|
18 |
-
"bos_token": "<|
|
19 |
-
"eos_token": "<|
|
20 |
-
"pad_token": "<|
|
21 |
}
|
|
|
13 |
"normalized": false,
|
14 |
"rstrip": false,
|
15 |
"single_word": false
|
16 |
+
},
|
17 |
+
{
|
18 |
+
"content": "<|pad|>",
|
19 |
+
"lstrip": false,
|
20 |
+
"normalized": false,
|
21 |
+
"rstrip": false,
|
22 |
+
"single_word": false
|
23 |
}
|
24 |
],
|
25 |
+
"bos_token": "<|begin_of_text|>",
|
26 |
+
"eos_token": "<|end_of_text|>",
|
27 |
+
"pad_token": "<|pad|>"
|
28 |
}
|
tokenizer.json
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:84d9fd6f8a3b531fcd3641d24db47ac67d512a3ab36abfe835934d19cff30a8d
|
3 |
+
size 17210480
|
tokenizer_config.json
CHANGED
@@ -2063,22 +2063,32 @@
|
|
2063 |
"rstrip": false,
|
2064 |
"single_word": false,
|
2065 |
"special": true
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2066 |
}
|
2067 |
},
|
2068 |
"additional_special_tokens": [
|
2069 |
"<|im_start|>",
|
2070 |
-
"<|im_end|>"
|
|
|
2071 |
],
|
2072 |
-
"bos_token": "<|
|
2073 |
-
"chat_template": "{% for message in messages %}{{'<|im_start|>' + message['role'] + '
|
2074 |
"clean_up_tokenization_spaces": true,
|
2075 |
-
"eos_token": "<|
|
2076 |
"extra_special_tokens": {},
|
|
|
2077 |
"model_input_names": [
|
2078 |
"input_ids",
|
2079 |
"attention_mask"
|
2080 |
],
|
2081 |
"model_max_length": 131072,
|
2082 |
-
"pad_token": "<|
|
2083 |
"tokenizer_class": "PreTrainedTokenizer"
|
2084 |
}
|
|
|
2063 |
"rstrip": false,
|
2064 |
"single_word": false,
|
2065 |
"special": true
|
2066 |
+
},
|
2067 |
+
"128258": {
|
2068 |
+
"content": "<|pad|>",
|
2069 |
+
"lstrip": false,
|
2070 |
+
"normalized": false,
|
2071 |
+
"rstrip": false,
|
2072 |
+
"single_word": false,
|
2073 |
+
"special": true
|
2074 |
}
|
2075 |
},
|
2076 |
"additional_special_tokens": [
|
2077 |
"<|im_start|>",
|
2078 |
+
"<|im_end|>",
|
2079 |
+
"<|pad|>"
|
2080 |
],
|
2081 |
+
"bos_token": "<|begin_of_text|>",
|
2082 |
+
"chat_template": "{% if messages[0]['role'] == 'user' or messages[0]['role'] == 'system' %}{{ '<|begin_of_text|>' }}{% endif %}{% for message in messages %}{{ '<|im_start|>' + message['role'] + '\\n' + message['content'] + '<|im_end|>\\n' }}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\\n' }}{% elif messages[-1]['role'] == 'assistant' %}{{ '<|end_of_text|>' }}{% endif %}",
|
2083 |
"clean_up_tokenization_spaces": true,
|
2084 |
+
"eos_token": "<|end_of_text|>",
|
2085 |
"extra_special_tokens": {},
|
2086 |
+
"legacy": false,
|
2087 |
"model_input_names": [
|
2088 |
"input_ids",
|
2089 |
"attention_mask"
|
2090 |
],
|
2091 |
"model_max_length": 131072,
|
2092 |
+
"pad_token": "<|pad|>",
|
2093 |
"tokenizer_class": "PreTrainedTokenizer"
|
2094 |
}
|
trainer_state.json
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b2d401aca38d027e0fcc12ba4910a855c90856823a14406faa548337c8d09812
|
3 |
+
size 7160
|