Upload model
Browse files
config.json
CHANGED
@@ -1,9 +1,10 @@
|
|
1 |
{
|
|
|
2 |
"activation_type": "silu",
|
3 |
"alibi": false,
|
4 |
"alibi_bias_max": 8.0,
|
5 |
"architectures": [
|
6 |
-
"
|
7 |
],
|
8 |
"attention_dropout": 0.0,
|
9 |
"attention_layer_norm": false,
|
@@ -36,7 +37,7 @@
|
|
36 |
"max_sequence_length": 4096,
|
37 |
"mlp_hidden_size": 12288,
|
38 |
"mlp_ratio": 4,
|
39 |
-
"model_type": "
|
40 |
"multi_query_attention": null,
|
41 |
"n_heads": 32,
|
42 |
"n_kv_heads": 32,
|
@@ -46,6 +47,7 @@
|
|
46 |
"num_vq_tokens": 256,
|
47 |
"pad_token_id": 126081,
|
48 |
"precision": "amp_bf16",
|
|
|
49 |
"residual_dropout": 0.0,
|
50 |
"rms_norm_eps": 1e-05,
|
51 |
"rope": true,
|
|
|
1 |
{
|
2 |
+
"_name_or_path": "/data_storage/lbw/MMaDA/mmada-training-stage4-llada-instruct/checkpoint-130000/unwrapped_model",
|
3 |
"activation_type": "silu",
|
4 |
"alibi": false,
|
5 |
"alibi_bias_max": 8.0,
|
6 |
"architectures": [
|
7 |
+
"LLaDAModelLM"
|
8 |
],
|
9 |
"attention_dropout": 0.0,
|
10 |
"attention_layer_norm": false,
|
|
|
37 |
"max_sequence_length": 4096,
|
38 |
"mlp_hidden_size": 12288,
|
39 |
"mlp_ratio": 4,
|
40 |
+
"model_type": "llada",
|
41 |
"multi_query_attention": null,
|
42 |
"n_heads": 32,
|
43 |
"n_kv_heads": 32,
|
|
|
47 |
"num_vq_tokens": 256,
|
48 |
"pad_token_id": 126081,
|
49 |
"precision": "amp_bf16",
|
50 |
+
"pretrained_model_path": "/data_storage/shared/pretrained_models/LLaDA-8B-Instruct",
|
51 |
"residual_dropout": 0.0,
|
52 |
"rms_norm_eps": 1e-05,
|
53 |
"rope": true,
|
model-00001-of-00004.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4928472568
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b65828600a8bdc7b0d7fd61847f033c92b5982b0395b1e1741ba864736c2c680
|
3 |
size 4928472568
|
model-00002-of-00004.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4932693832
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2441d4f4667fa8831d862cc33fbf153bfd320d22ad4c8d2f1e8088a739ce0a23
|
3 |
size 4932693832
|
model-00003-of-00004.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4999819560
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f6ff5c367093eb56a3639130f5525485b598b514a262da439f97597da6c89720
|
3 |
size 4999819560
|
model-00004-of-00004.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1304428928
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3875e39d151b69d51808101ac5b95819ccc88380c6a65443b9a5a889c9ba05b1
|
3 |
size 1304428928
|