yujiepan commited on
Commit
1cb969a
·
verified ·
1 Parent(s): 04b73ba

Upload folder using huggingface_hub

Browse files
README.md CHANGED
@@ -1,50 +1,107 @@
1
  ---
2
  library_name: transformers
3
- pipeline_tag: text-generation
4
- inference: true
5
- widget:
6
- - text: Hello!
7
- example_title: Hello world
8
- group: Python
9
  ---
10
 
11
- This model is randomly initialized, using the config from [google/gemma-2-27b-it](https://huggingface.co/google/gemma-2-27b-it) but with smaller size.
 
 
12
 
13
- Codes:
14
  ```python
15
  from transformers import pipeline
16
- from huggingface_hub import create_repo, upload_folder
 
 
 
 
 
 
 
 
 
 
 
17
  import torch
18
- import transformers
19
- import os
20
-
21
- model_id = 'google/gemma-2-27b-it'
22
- save_path = '/tmp/yujiepan/gemma-2-tiny-random'
23
- repo_id = 'yujiepan/gemma-2-tiny-random'
24
-
25
- config = transformers.AutoConfig.from_pretrained(model_id)
26
- config.hidden_size = 8
27
- config.head_dim = 2
28
- config.intermediate_size = 16
29
- config.num_attention_heads = 4
30
- config.num_hidden_layers = 2
31
- config.num_key_value_heads = 2
32
-
33
- tokenizer = transformers.AutoTokenizer.from_pretrained(model_id)
34
- tokenizer.save_pretrained(save_path)
35
-
36
- model = transformers.AutoModelForCausalLM.from_config(config, torch_dtype=torch.bfloat16)
37
- model.generation_config = transformers.GenerationConfig.from_pretrained(model_id)
38
- with torch.no_grad():
39
- for p in model.parameters():
40
- torch.nn.init.uniform_(p, -0.1, 0.1)
41
 
42
- pipe = pipeline('text-generation', model=model, tokenizer=tokenizer, do_sample=False, device='cuda')
43
- print(pipe('Hello World!'))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
 
45
- model.save_pretrained(save_path)
46
 
47
- os.system(f'ls -alh {save_path}')
48
- create_repo(repo_id, exist_ok=True)
49
- upload_folder(repo_id=repo_id, folder_path=save_path)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
  ```
 
1
  ---
2
  library_name: transformers
3
+ base_model:
4
+ - google/gemma-2-27b-it
 
 
 
 
5
  ---
6
 
7
+ This tiny model is intended for debugging. It is randomly initialized using the configuration adapted from [google/gemma-2-27b-it](https://huggingface.co/google/gemma-2-27b-it).
8
+
9
+ ### Example usage:
10
 
 
11
  ```python
12
  from transformers import pipeline
13
+ model_id = "yujiepan/gemma-2-tiny-random"
14
+ pipe = pipeline('text-generation', model=model_id, device='cuda', dtype="bfloat16")
15
+ print(pipe('Hello World!'))
16
+ ```
17
+
18
+ ### Codes to create this repo:
19
+
20
+ ```python
21
+ import json
22
+ from pathlib import Path
23
+
24
+ import accelerate
25
  import torch
26
+ from huggingface_hub import file_exists, hf_hub_download
27
+ from transformers import (
28
+ AutoConfig,
29
+ AutoModelForCausalLM,
30
+ AutoProcessor,
31
+ GenerationConfig,
32
+ set_seed,
33
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
 
35
+ source_model_id = "google/gemma-2-27b-it"
36
+ save_folder = "/tmp/yujiepan/gemma-2-tiny-random"
37
+
38
+ processor = AutoProcessor.from_pretrained(
39
+ source_model_id, trust_remote_code=True)
40
+ processor.save_pretrained(save_folder)
41
+
42
+ with open(hf_hub_download(source_model_id, filename='config.json', repo_type='model'), 'r', encoding='utf-8') as f:
43
+ config_json = json.load(f)
44
+ config_json['hidden_size'] = 8
45
+ config_json['intermediate_size'] = 64
46
+ config_json['num_attention_heads'] = 8
47
+ config_json['num_hidden_layers'] = 2
48
+ config_json['num_key_value_heads'] = 4
49
+ config_json['head_dim'] = 32
50
+ config_json['tie_word_embeddings'] = True
51
+ with open(f"{save_folder}/config.json", "w", encoding='utf-8') as f:
52
+ json.dump(config_json, f, indent=2)
53
+
54
+ config = AutoConfig.from_pretrained(
55
+ save_folder,
56
+ trust_remote_code=True,
57
+ )
58
+ print(config)
59
+ torch.set_default_dtype(torch.bfloat16)
60
+ model = AutoModelForCausalLM.from_config(config)
61
+ torch.set_default_dtype(torch.float32)
62
+ if file_exists(filename="generation_config.json", repo_id=source_model_id, repo_type='model'):
63
+ model.generation_config = GenerationConfig.from_pretrained(
64
+ source_model_id, trust_remote_code=True,
65
+ )
66
+ set_seed(42)
67
+ model = model.cpu()
68
+ with torch.no_grad():
69
+ for name, p in sorted(model.named_parameters()):
70
+ torch.nn.init.normal_(p, 0, 0.1)
71
+ print(name, p.shape)
72
+ model.save_pretrained(save_folder)
73
+ print(model)
74
+ ```
75
 
76
+ ### Printing the model:
77
 
78
+ ```text
79
+ Gemma2ForCausalLM(
80
+ (model): Gemma2Model(
81
+ (embed_tokens): Embedding(256000, 8, padding_idx=0)
82
+ (layers): ModuleList(
83
+ (0-1): 2 x Gemma2DecoderLayer(
84
+ (self_attn): Gemma2Attention(
85
+ (q_proj): Linear(in_features=8, out_features=256, bias=False)
86
+ (k_proj): Linear(in_features=8, out_features=128, bias=False)
87
+ (v_proj): Linear(in_features=8, out_features=128, bias=False)
88
+ (o_proj): Linear(in_features=256, out_features=8, bias=False)
89
+ )
90
+ (mlp): Gemma2MLP(
91
+ (gate_proj): Linear(in_features=8, out_features=64, bias=False)
92
+ (up_proj): Linear(in_features=8, out_features=64, bias=False)
93
+ (down_proj): Linear(in_features=64, out_features=8, bias=False)
94
+ (act_fn): GELUTanh()
95
+ )
96
+ (input_layernorm): Gemma2RMSNorm((8,), eps=1e-06)
97
+ (post_attention_layernorm): Gemma2RMSNorm((8,), eps=1e-06)
98
+ (pre_feedforward_layernorm): Gemma2RMSNorm((8,), eps=1e-06)
99
+ (post_feedforward_layernorm): Gemma2RMSNorm((8,), eps=1e-06)
100
+ )
101
+ )
102
+ (norm): Gemma2RMSNorm((8,), eps=1e-06)
103
+ (rotary_emb): Gemma2RotaryEmbedding()
104
+ )
105
+ (lm_head): Linear(in_features=8, out_features=256000, bias=False)
106
+ )
107
  ```
chat_template.jinja ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {{ bos_token }}{% if messages[0]['role'] == 'system' %}{{ raise_exception('System role not supported') }}{% endif %}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if (message['role'] == 'assistant') %}{% set role = 'model' %}{% else %}{% set role = message['role'] %}{% endif %}{{ '<start_of_turn>' + role + '
2
+ ' + message['content'] | trim + '<end_of_turn>
3
+ ' }}{% endfor %}{% if add_generation_prompt %}{{'<start_of_turn>model
4
+ '}}{% endif %}
config.json CHANGED
@@ -1,5 +1,4 @@
1
  {
2
- "_name_or_path": "google/gemma-2-27b-it",
3
  "architectures": [
4
  "Gemma2ForCausalLM"
5
  ],
@@ -8,27 +7,31 @@
8
  "attn_logit_softcapping": 50.0,
9
  "bos_token_id": 2,
10
  "cache_implementation": "hybrid",
 
11
  "eos_token_id": 1,
12
  "final_logit_softcapping": 30.0,
13
- "head_dim": 2,
14
  "hidden_act": "gelu_pytorch_tanh",
15
  "hidden_activation": "gelu_pytorch_tanh",
16
  "hidden_size": 8,
17
  "initializer_range": 0.02,
18
- "intermediate_size": 16,
 
 
 
 
19
  "max_position_embeddings": 8192,
20
  "model_type": "gemma2",
21
- "num_attention_heads": 4,
22
  "num_hidden_layers": 2,
23
- "num_key_value_heads": 2,
24
  "pad_token_id": 0,
25
  "query_pre_attn_scalar": 144,
26
  "rms_norm_eps": 1e-06,
27
  "rope_theta": 10000.0,
28
  "sliding_window": 4096,
29
  "sliding_window_size": 4096,
30
- "torch_dtype": "bfloat16",
31
- "transformers_version": "4.42.1",
32
  "use_cache": true,
33
  "vocab_size": 256000
34
  }
 
1
  {
 
2
  "architectures": [
3
  "Gemma2ForCausalLM"
4
  ],
 
7
  "attn_logit_softcapping": 50.0,
8
  "bos_token_id": 2,
9
  "cache_implementation": "hybrid",
10
+ "dtype": "bfloat16",
11
  "eos_token_id": 1,
12
  "final_logit_softcapping": 30.0,
13
+ "head_dim": 32,
14
  "hidden_act": "gelu_pytorch_tanh",
15
  "hidden_activation": "gelu_pytorch_tanh",
16
  "hidden_size": 8,
17
  "initializer_range": 0.02,
18
+ "intermediate_size": 64,
19
+ "layer_types": [
20
+ "sliding_attention",
21
+ "full_attention"
22
+ ],
23
  "max_position_embeddings": 8192,
24
  "model_type": "gemma2",
25
+ "num_attention_heads": 8,
26
  "num_hidden_layers": 2,
27
+ "num_key_value_heads": 4,
28
  "pad_token_id": 0,
29
  "query_pre_attn_scalar": 144,
30
  "rms_norm_eps": 1e-06,
31
  "rope_theta": 10000.0,
32
  "sliding_window": 4096,
33
  "sliding_window_size": 4096,
34
+ "transformers_version": "4.57.1",
 
35
  "use_cache": true,
36
  "vocab_size": 256000
37
  }
generation_config.json CHANGED
@@ -4,5 +4,5 @@
4
  "cache_implementation": "hybrid",
5
  "eos_token_id": 1,
6
  "pad_token_id": 0,
7
- "transformers_version": "4.42.1"
8
  }
 
4
  "cache_implementation": "hybrid",
5
  "eos_token_id": 1,
6
  "pad_token_id": 0,
7
+ "transformers_version": "4.57.1"
8
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:438bdf38f16ee5b8697e4432dd369211ae56181d440ef494cb8f8f18b050c363
3
- size 4100992
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b822fb4bd70ba7c075fd4ce0c4c16fa31946a1f52f95df3e6868c70fbf915f30
3
+ size 4129424
tokenizer.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7da53ca29fb16f6b2489482fc0bc6a394162cdab14d12764a1755ebc583fea79
3
- size 17518525
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5f7eee611703c5ce5d1eee32d9cdcfe465647b8aff0c1dfb3bed7ad7dbb05060
3
+ size 34362873
tokenizer_config.json CHANGED
@@ -1737,6 +1737,262 @@
1737
  "rstrip": false,
1738
  "single_word": false,
1739
  "special": false
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1740
  }
1741
  },
1742
  "additional_special_tokens": [
@@ -1744,9 +2000,9 @@
1744
  "<end_of_turn>"
1745
  ],
1746
  "bos_token": "<bos>",
1747
- "chat_template": "{{ bos_token }}{% if messages[0]['role'] == 'system' %}{{ raise_exception('System role not supported') }}{% endif %}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if (message['role'] == 'assistant') %}{% set role = 'model' %}{% else %}{% set role = message['role'] %}{% endif %}{{ '<start_of_turn>' + role + '\n' + message['content'] | trim + '<end_of_turn>\n' }}{% endfor %}{% if add_generation_prompt %}{{'<start_of_turn>model\n'}}{% endif %}",
1748
  "clean_up_tokenization_spaces": false,
1749
  "eos_token": "<eos>",
 
1750
  "model_max_length": 1000000000000000019884624838656,
1751
  "pad_token": "<pad>",
1752
  "sp_model_kwargs": {},
 
1737
  "rstrip": false,
1738
  "single_word": false,
1739
  "special": false
1740
+ },
1741
+ "255968": {
1742
+ "content": "[toxicity=0]",
1743
+ "lstrip": false,
1744
+ "normalized": false,
1745
+ "rstrip": false,
1746
+ "single_word": false,
1747
+ "special": false
1748
+ },
1749
+ "255969": {
1750
+ "content": "\t\t",
1751
+ "lstrip": false,
1752
+ "normalized": false,
1753
+ "rstrip": false,
1754
+ "single_word": false,
1755
+ "special": false
1756
+ },
1757
+ "255970": {
1758
+ "content": "\t\t\t",
1759
+ "lstrip": false,
1760
+ "normalized": false,
1761
+ "rstrip": false,
1762
+ "single_word": false,
1763
+ "special": false
1764
+ },
1765
+ "255971": {
1766
+ "content": "\t\t\t\t",
1767
+ "lstrip": false,
1768
+ "normalized": false,
1769
+ "rstrip": false,
1770
+ "single_word": false,
1771
+ "special": false
1772
+ },
1773
+ "255972": {
1774
+ "content": "\t\t\t\t\t",
1775
+ "lstrip": false,
1776
+ "normalized": false,
1777
+ "rstrip": false,
1778
+ "single_word": false,
1779
+ "special": false
1780
+ },
1781
+ "255973": {
1782
+ "content": "\t\t\t\t\t\t",
1783
+ "lstrip": false,
1784
+ "normalized": false,
1785
+ "rstrip": false,
1786
+ "single_word": false,
1787
+ "special": false
1788
+ },
1789
+ "255974": {
1790
+ "content": "\t\t\t\t\t\t\t",
1791
+ "lstrip": false,
1792
+ "normalized": false,
1793
+ "rstrip": false,
1794
+ "single_word": false,
1795
+ "special": false
1796
+ },
1797
+ "255975": {
1798
+ "content": "\t\t\t\t\t\t\t\t",
1799
+ "lstrip": false,
1800
+ "normalized": false,
1801
+ "rstrip": false,
1802
+ "single_word": false,
1803
+ "special": false
1804
+ },
1805
+ "255976": {
1806
+ "content": "\t\t\t\t\t\t\t\t\t",
1807
+ "lstrip": false,
1808
+ "normalized": false,
1809
+ "rstrip": false,
1810
+ "single_word": false,
1811
+ "special": false
1812
+ },
1813
+ "255977": {
1814
+ "content": "\t\t\t\t\t\t\t\t\t\t",
1815
+ "lstrip": false,
1816
+ "normalized": false,
1817
+ "rstrip": false,
1818
+ "single_word": false,
1819
+ "special": false
1820
+ },
1821
+ "255978": {
1822
+ "content": "\t\t\t\t\t\t\t\t\t\t\t",
1823
+ "lstrip": false,
1824
+ "normalized": false,
1825
+ "rstrip": false,
1826
+ "single_word": false,
1827
+ "special": false
1828
+ },
1829
+ "255979": {
1830
+ "content": "\t\t\t\t\t\t\t\t\t\t\t\t",
1831
+ "lstrip": false,
1832
+ "normalized": false,
1833
+ "rstrip": false,
1834
+ "single_word": false,
1835
+ "special": false
1836
+ },
1837
+ "255980": {
1838
+ "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t",
1839
+ "lstrip": false,
1840
+ "normalized": false,
1841
+ "rstrip": false,
1842
+ "single_word": false,
1843
+ "special": false
1844
+ },
1845
+ "255981": {
1846
+ "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t",
1847
+ "lstrip": false,
1848
+ "normalized": false,
1849
+ "rstrip": false,
1850
+ "single_word": false,
1851
+ "special": false
1852
+ },
1853
+ "255982": {
1854
+ "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t",
1855
+ "lstrip": false,
1856
+ "normalized": false,
1857
+ "rstrip": false,
1858
+ "single_word": false,
1859
+ "special": false
1860
+ },
1861
+ "255983": {
1862
+ "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t",
1863
+ "lstrip": false,
1864
+ "normalized": false,
1865
+ "rstrip": false,
1866
+ "single_word": false,
1867
+ "special": false
1868
+ },
1869
+ "255984": {
1870
+ "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t",
1871
+ "lstrip": false,
1872
+ "normalized": false,
1873
+ "rstrip": false,
1874
+ "single_word": false,
1875
+ "special": false
1876
+ },
1877
+ "255985": {
1878
+ "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t",
1879
+ "lstrip": false,
1880
+ "normalized": false,
1881
+ "rstrip": false,
1882
+ "single_word": false,
1883
+ "special": false
1884
+ },
1885
+ "255986": {
1886
+ "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t",
1887
+ "lstrip": false,
1888
+ "normalized": false,
1889
+ "rstrip": false,
1890
+ "single_word": false,
1891
+ "special": false
1892
+ },
1893
+ "255987": {
1894
+ "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t",
1895
+ "lstrip": false,
1896
+ "normalized": false,
1897
+ "rstrip": false,
1898
+ "single_word": false,
1899
+ "special": false
1900
+ },
1901
+ "255988": {
1902
+ "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t",
1903
+ "lstrip": false,
1904
+ "normalized": false,
1905
+ "rstrip": false,
1906
+ "single_word": false,
1907
+ "special": false
1908
+ },
1909
+ "255989": {
1910
+ "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t",
1911
+ "lstrip": false,
1912
+ "normalized": false,
1913
+ "rstrip": false,
1914
+ "single_word": false,
1915
+ "special": false
1916
+ },
1917
+ "255990": {
1918
+ "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t",
1919
+ "lstrip": false,
1920
+ "normalized": false,
1921
+ "rstrip": false,
1922
+ "single_word": false,
1923
+ "special": false
1924
+ },
1925
+ "255991": {
1926
+ "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t",
1927
+ "lstrip": false,
1928
+ "normalized": false,
1929
+ "rstrip": false,
1930
+ "single_word": false,
1931
+ "special": false
1932
+ },
1933
+ "255992": {
1934
+ "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t",
1935
+ "lstrip": false,
1936
+ "normalized": false,
1937
+ "rstrip": false,
1938
+ "single_word": false,
1939
+ "special": false
1940
+ },
1941
+ "255993": {
1942
+ "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t",
1943
+ "lstrip": false,
1944
+ "normalized": false,
1945
+ "rstrip": false,
1946
+ "single_word": false,
1947
+ "special": false
1948
+ },
1949
+ "255994": {
1950
+ "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t",
1951
+ "lstrip": false,
1952
+ "normalized": false,
1953
+ "rstrip": false,
1954
+ "single_word": false,
1955
+ "special": false
1956
+ },
1957
+ "255995": {
1958
+ "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t",
1959
+ "lstrip": false,
1960
+ "normalized": false,
1961
+ "rstrip": false,
1962
+ "single_word": false,
1963
+ "special": false
1964
+ },
1965
+ "255996": {
1966
+ "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t",
1967
+ "lstrip": false,
1968
+ "normalized": false,
1969
+ "rstrip": false,
1970
+ "single_word": false,
1971
+ "special": false
1972
+ },
1973
+ "255997": {
1974
+ "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t",
1975
+ "lstrip": false,
1976
+ "normalized": false,
1977
+ "rstrip": false,
1978
+ "single_word": false,
1979
+ "special": false
1980
+ },
1981
+ "255998": {
1982
+ "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t",
1983
+ "lstrip": false,
1984
+ "normalized": false,
1985
+ "rstrip": false,
1986
+ "single_word": false,
1987
+ "special": false
1988
+ },
1989
+ "255999": {
1990
+ "content": "<unused99>",
1991
+ "lstrip": false,
1992
+ "normalized": false,
1993
+ "rstrip": false,
1994
+ "single_word": false,
1995
+ "special": false
1996
  }
1997
  },
1998
  "additional_special_tokens": [
 
2000
  "<end_of_turn>"
2001
  ],
2002
  "bos_token": "<bos>",
 
2003
  "clean_up_tokenization_spaces": false,
2004
  "eos_token": "<eos>",
2005
+ "extra_special_tokens": {},
2006
  "model_max_length": 1000000000000000019884624838656,
2007
  "pad_token": "<pad>",
2008
  "sp_model_kwargs": {},