PocketDoc commited on
Commit
921ef19
·
verified ·
1 Parent(s): a6b7d96

Upload 5 files

Browse files
Files changed (3) hide show
  1. special_tokens_map.json +8 -8
  2. tokenizer.json +2 -2
  3. tokenizer_config.json +16 -16
special_tokens_map.json CHANGED
@@ -1,13 +1,13 @@
1
  {
2
  "additional_special_tokens": [
3
  "<unk>",
4
- "<s>",
5
- "</s>",
6
- "[INST]",
7
- "[/INST]",
8
  "[AVAILABLE_TOOLS]",
9
  "[/AVAILABLE_TOOLS]",
10
- "[TOOL_RESULTS]",
11
  "[/TOOL_RESULTS]",
12
  "[TOOL_CALLS]",
13
  "[IMG]",
@@ -17,7 +17,7 @@
17
  "[PREFIX]",
18
  "[MIDDLE]",
19
  "[SUFFIX]",
20
- "[SYSTEM_PROMPT]",
21
  "[/SYSTEM_PROMPT]",
22
  "[TOOL_CONTENT]",
23
  "<SPECIAL_20>",
@@ -1002,14 +1002,14 @@
1002
  "<SPECIAL_999>"
1003
  ],
1004
  "bos_token": {
1005
- "content": "<s>",
1006
  "lstrip": false,
1007
  "normalized": false,
1008
  "rstrip": false,
1009
  "single_word": false
1010
  },
1011
  "eos_token": {
1012
- "content": "</s>",
1013
  "lstrip": false,
1014
  "normalized": false,
1015
  "rstrip": false,
 
1
  {
2
  "additional_special_tokens": [
3
  "<unk>",
4
+ "[gMASK]<sop>",
5
+ "<|endoftext|>",
6
+ "<|user|>",
7
+ "<|assistant|>",
8
  "[AVAILABLE_TOOLS]",
9
  "[/AVAILABLE_TOOLS]",
10
+ "<|tool|>",
11
  "[/TOOL_RESULTS]",
12
  "[TOOL_CALLS]",
13
  "[IMG]",
 
17
  "[PREFIX]",
18
  "[MIDDLE]",
19
  "[SUFFIX]",
20
+ "<|system|>",
21
  "[/SYSTEM_PROMPT]",
22
  "[TOOL_CONTENT]",
23
  "<SPECIAL_20>",
 
1002
  "<SPECIAL_999>"
1003
  ],
1004
  "bos_token": {
1005
+ "content": "[gMASK]<sop>",
1006
  "lstrip": false,
1007
  "normalized": false,
1008
  "rstrip": false,
1009
  "single_word": false
1010
  },
1011
  "eos_token": {
1012
+ "content": "<|endoftext|>",
1013
  "lstrip": false,
1014
  "normalized": false,
1015
  "rstrip": false,
tokenizer.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b76085f9923309d873994d444989f7eb6ec074b06f25b58f1e8d7b7741070949
3
- size 17078037
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6ad58cc5a1b54079c79ff92de90df3383f45550eea1ed833d552a6f48089ad09
3
+ size 17078121
tokenizer_config.json CHANGED
@@ -12,7 +12,7 @@
12
  "special": true
13
  },
14
  "1": {
15
- "content": "<s>",
16
  "lstrip": false,
17
  "normalized": false,
18
  "rstrip": false,
@@ -20,7 +20,7 @@
20
  "special": true
21
  },
22
  "2": {
23
- "content": "</s>",
24
  "lstrip": false,
25
  "normalized": false,
26
  "rstrip": false,
@@ -28,7 +28,7 @@
28
  "special": true
29
  },
30
  "3": {
31
- "content": "[INST]",
32
  "lstrip": false,
33
  "normalized": false,
34
  "rstrip": false,
@@ -36,7 +36,7 @@
36
  "special": true
37
  },
38
  "4": {
39
- "content": "[/INST]",
40
  "lstrip": false,
41
  "normalized": false,
42
  "rstrip": false,
@@ -60,7 +60,7 @@
60
  "special": true
61
  },
62
  "7": {
63
- "content": "[TOOL_RESULTS]",
64
  "lstrip": false,
65
  "normalized": false,
66
  "rstrip": false,
@@ -140,7 +140,7 @@
140
  "special": true
141
  },
142
  "17": {
143
- "content": "[SYSTEM_PROMPT]",
144
  "lstrip": false,
145
  "normalized": false,
146
  "rstrip": false,
@@ -8006,13 +8006,13 @@
8006
  },
8007
  "additional_special_tokens": [
8008
  "<unk>",
8009
- "<s>",
8010
- "</s>",
8011
- "[INST]",
8012
- "[/INST]",
8013
  "[AVAILABLE_TOOLS]",
8014
  "[/AVAILABLE_TOOLS]",
8015
- "[TOOL_RESULTS]",
8016
  "[/TOOL_RESULTS]",
8017
  "[TOOL_CALLS]",
8018
  "[IMG]",
@@ -8022,7 +8022,7 @@
8022
  "[PREFIX]",
8023
  "[MIDDLE]",
8024
  "[SUFFIX]",
8025
- "[SYSTEM_PROMPT]",
8026
  "[/SYSTEM_PROMPT]",
8027
  "[TOOL_CONTENT]",
8028
  "<SPECIAL_20>",
@@ -9006,10 +9006,10 @@
9006
  "<SPECIAL_998>",
9007
  "<SPECIAL_999>"
9008
  ],
9009
- "bos_token": "<s>",
9010
- "chat_template": "{%- set today = strftime_now(\"%Y-%m-%d\") %}\n{%- set default_system_message = \"You are Mistral Small 3, a Large Language Model (LLM) created by Mistral AI, a French startup headquartered in Paris.\\nYour knowledge base was last updated on 2023-10-01. The current date is \" + today + \".\\n\\nWhen you're not sure about some information, you say that you don't have the information and don't make up anything.\\nIf the user's question is not clear, ambiguous, or does not provide enough context for you to accurately answer the question, you do not try to answer it right away and you rather ask the user to clarify their request (e.g. \\\"What are some good restaurants around me?\\\" => \\\"Where are you?\\\" or \\\"When is the next flight to Tokyo\\\" => \\\"Where do you travel from?\\\")\" %}\n\n{{- bos_token }}\n\n{%- if messages[0]['role'] == 'system' %}\n {%- if messages[0]['content'] is string %}\n {%- set system_message = messages[0]['content'] %}\n {%- else %}\n {%- set system_message = messages[0]['content'][0]['text'] %}\n {%- endif %}\n {%- set loop_messages = messages[1:] %}\n{%- else %}\n {%- set system_message = default_system_message %}\n {%- set loop_messages = messages %}\n{%- endif %}\n{{- '[SYSTEM_PROMPT]' + system_message + '[/SYSTEM_PROMPT]' }}\n\n{%- for message in loop_messages %}\n {%- if message['role'] == 'user' %}\n {%- if message['content'] is string %}\n {{- '[INST]' + message['content'] + '[/INST]' }}\n {%- else %}\n {{- '[INST]' }}\n {%- for block in message['content'] %}\n {%- if block['type'] == 'text' %}\n {{- block['text'] }}\n {%- elif block['type'] in ['image', 'image_url'] %}\n {{- '[IMG]' }}\n {%- else %}\n {{- raise_exception('Only text and image blocks are supported in message content!') }}\n {%- endif %}\n {%- endfor %}\n {{- '[/INST]' }}\n {%- endif %}\n {%- elif message['role'] == 'system' %}\n {%- if message['content'] is string %}\n {{- '[SYSTEM_PROMPT]' + message['content'] + '[/SYSTEM_PROMPT]' }}\n {%- else %}\n {{- '[SYSTEM_PROMPT]' + message['content'][0]['text'] + '[/SYSTEM_PROMPT]' }}\n {%- endif %}\n {%- elif message['role'] == 'assistant' %}\n {%- if message['content'] is string %}\n {{- message['content'] + eos_token }}\n {%- else %}\n {{- message['content'][0]['text'] + eos_token }}\n {%- endif %}\n {%- else %}\n {{- raise_exception('Only user, system and assistant roles are supported!') }}\n {%- endif %}\n{%- endfor %}",
9011
  "clean_up_tokenization_spaces": false,
9012
- "eos_token": "</s>",
9013
  "extra_special_tokens": {},
9014
  "legacy": true,
9015
  "model_max_length": 131072,
@@ -9017,4 +9017,4 @@
9017
  "tokenizer_class": "LlamaTokenizerFast",
9018
  "unk_token": "<unk>",
9019
  "use_default_system_prompt": false
9020
- }
 
12
  "special": true
13
  },
14
  "1": {
15
+ "content": "[gMASK]<sop>",
16
  "lstrip": false,
17
  "normalized": false,
18
  "rstrip": false,
 
20
  "special": true
21
  },
22
  "2": {
23
+ "content": "<|endoftext|>",
24
  "lstrip": false,
25
  "normalized": false,
26
  "rstrip": false,
 
28
  "special": true
29
  },
30
  "3": {
31
+ "content": "<|user|>",
32
  "lstrip": false,
33
  "normalized": false,
34
  "rstrip": false,
 
36
  "special": true
37
  },
38
  "4": {
39
+ "content": "<|assistant|>",
40
  "lstrip": false,
41
  "normalized": false,
42
  "rstrip": false,
 
60
  "special": true
61
  },
62
  "7": {
63
+ "content": "<|tool|>",
64
  "lstrip": false,
65
  "normalized": false,
66
  "rstrip": false,
 
140
  "special": true
141
  },
142
  "17": {
143
+ "content": "<|system|>",
144
  "lstrip": false,
145
  "normalized": false,
146
  "rstrip": false,
 
8006
  },
8007
  "additional_special_tokens": [
8008
  "<unk>",
8009
+ "[gMASK]<sop>",
8010
+ "<|endoftext|>",
8011
+ "<|user|>",
8012
+ "<|assistant|>",
8013
  "[AVAILABLE_TOOLS]",
8014
  "[/AVAILABLE_TOOLS]",
8015
+ "<|tool|>",
8016
  "[/TOOL_RESULTS]",
8017
  "[TOOL_CALLS]",
8018
  "[IMG]",
 
8022
  "[PREFIX]",
8023
  "[MIDDLE]",
8024
  "[SUFFIX]",
8025
+ "<|system|>",
8026
  "[/SYSTEM_PROMPT]",
8027
  "[TOOL_CONTENT]",
8028
  "<SPECIAL_20>",
 
9006
  "<SPECIAL_998>",
9007
  "<SPECIAL_999>"
9008
  ],
9009
+ "bos_token": "[gMASK]<sop>",
9010
+ "chat_template": "{{ bos_token }}{%- set loop_messages = messages %}\n{%- for message in loop_messages %}\n {%- set content = '<|' + message['role'] + '|>'+ message['content'] | trim %}\n {%- if loop.index0 == 0 %}\n {%- set content = content %}\n {%- endif %}\n {%- if not (loop.last and message['role'] == 'assistant') %}\n {%- set content = content + '<|endoftext|>' %}\n {%- endif %}\n {{- content }}\n{%- endfor %}\n{%- if messages[-1]['role'] != 'assistant' %}\n {{- '<|assistant|>' }}\n{%- endif %}",
9011
  "clean_up_tokenization_spaces": false,
9012
+ "eos_token": "<|endoftext|>",
9013
  "extra_special_tokens": {},
9014
  "legacy": true,
9015
  "model_max_length": 131072,
 
9017
  "tokenizer_class": "LlamaTokenizerFast",
9018
  "unk_token": "<unk>",
9019
  "use_default_system_prompt": false
9020
+ }