Add support for images and fix end of assistant issue (#22)
Browse files- add support for images and fix an issue with the end of assistant tokens that was not added if the last message was an assistant message. (f89f5ef02e1b9ecd18dd8b982016312f09c08663)
Co-authored-by: nathan ranchin <nathanrchn@users.noreply.huggingface.co>
- chat_template.jinja +11 -1
- tokenizer.json +2 -2
- tokenizer_config.json +1 -1
chat_template.jinja
CHANGED
|
@@ -150,8 +150,9 @@
|
|
| 150 |
{%- set outer_token = '<|inner_suffix|>' -%}
|
| 151 |
{%- set tool_calls_token = '<|tools_prefix|>' -%}
|
| 152 |
{%- set end_tool_calls_token = '<|tools_suffix|>' -%}
|
|
|
|
| 153 |
|
| 154 |
-
{%- set ns = namespace(in_assistant=false, in_tool=false, in_inner=false, assistant_format=none) -%}
|
| 155 |
|
| 156 |
{%- if messages and messages[0].role == 'system' -%}
|
| 157 |
{%- if "content" in messages[0] -%}
|
|
@@ -204,6 +205,8 @@
|
|
| 204 |
{%- for part in parts -%}
|
| 205 |
{%- if part.type == "text" -%}
|
| 206 |
{{ part.text }}
|
|
|
|
|
|
|
| 207 |
{%- else -%}
|
| 208 |
{{- raise_exception("Invalid user part: " + part.type) -}}
|
| 209 |
{%- endif -%}
|
|
@@ -257,6 +260,7 @@
|
|
| 257 |
{%- endif -%}
|
| 258 |
{%- endfor -%}
|
| 259 |
{{ ']' + end_tool_calls_token }}
|
|
|
|
| 260 |
{%- elif block.type == 'tool_outputs' -%}
|
| 261 |
{%- if ns.in_tool -%}
|
| 262 |
{{- raise_exception("Cannot have both tool outputs as separate messages and tool outputs as blocks") -}}
|
|
@@ -269,6 +273,7 @@
|
|
| 269 |
{%- endif -%}
|
| 270 |
{%- endfor -%}
|
| 271 |
{{- ']' }}
|
|
|
|
| 272 |
{%- elif block.type == 'response' -%}
|
| 273 |
{%- if ns.in_tool -%}
|
| 274 |
{{ ']' }}
|
|
@@ -303,6 +308,7 @@
|
|
| 303 |
{%- endif -%}
|
| 304 |
{%- endfor -%}
|
| 305 |
{{ ']' + end_tool_calls_token }}
|
|
|
|
| 306 |
{%- endif -%}
|
| 307 |
{%- elif message.role == 'tool' -%}
|
| 308 |
{%- if not ns.in_assistant -%}
|
|
@@ -315,6 +321,7 @@
|
|
| 315 |
{{ ", "}}
|
| 316 |
{%- endif -%}
|
| 317 |
{{ message.content }}
|
|
|
|
| 318 |
{%- else -%}
|
| 319 |
{{- raise_exception("Invalid message role") -}}
|
| 320 |
{%- endif -%}
|
|
@@ -322,6 +329,9 @@
|
|
| 322 |
{%- if ns.in_tool -%}
|
| 323 |
{{ ']' }}
|
| 324 |
{%- endif -%}
|
|
|
|
|
|
|
|
|
|
| 325 |
{%- if add_generation_prompt -%}
|
| 326 |
{{ assistant_token }}
|
| 327 |
{%- endif -%}
|
|
|
|
| 150 |
{%- set outer_token = '<|inner_suffix|>' -%}
|
| 151 |
{%- set tool_calls_token = '<|tools_prefix|>' -%}
|
| 152 |
{%- set end_tool_calls_token = '<|tools_suffix|>' -%}
|
| 153 |
+
{%- set image_token = '<|image|>' -%}
|
| 154 |
|
| 155 |
+
{%- set ns = namespace(in_assistant=false, in_tool=false, in_inner=false, waiting_for_tool_outputs=false, assistant_format=none) -%}
|
| 156 |
|
| 157 |
{%- if messages and messages[0].role == 'system' -%}
|
| 158 |
{%- if "content" in messages[0] -%}
|
|
|
|
| 205 |
{%- for part in parts -%}
|
| 206 |
{%- if part.type == "text" -%}
|
| 207 |
{{ part.text }}
|
| 208 |
+
{%- elif part.type == "image" -%}
|
| 209 |
+
{{ image_token }}
|
| 210 |
{%- else -%}
|
| 211 |
{{- raise_exception("Invalid user part: " + part.type) -}}
|
| 212 |
{%- endif -%}
|
|
|
|
| 260 |
{%- endif -%}
|
| 261 |
{%- endfor -%}
|
| 262 |
{{ ']' + end_tool_calls_token }}
|
| 263 |
+
{%- set ns.waiting_for_tool_outputs = true -%}
|
| 264 |
{%- elif block.type == 'tool_outputs' -%}
|
| 265 |
{%- if ns.in_tool -%}
|
| 266 |
{{- raise_exception("Cannot have both tool outputs as separate messages and tool outputs as blocks") -}}
|
|
|
|
| 273 |
{%- endif -%}
|
| 274 |
{%- endfor -%}
|
| 275 |
{{- ']' }}
|
| 276 |
+
{%- set ns.waiting_for_tool_outputs = false -%}
|
| 277 |
{%- elif block.type == 'response' -%}
|
| 278 |
{%- if ns.in_tool -%}
|
| 279 |
{{ ']' }}
|
|
|
|
| 308 |
{%- endif -%}
|
| 309 |
{%- endfor -%}
|
| 310 |
{{ ']' + end_tool_calls_token }}
|
| 311 |
+
{%- set ns.waiting_for_tool_outputs = true -%}
|
| 312 |
{%- endif -%}
|
| 313 |
{%- elif message.role == 'tool' -%}
|
| 314 |
{%- if not ns.in_assistant -%}
|
|
|
|
| 321 |
{{ ", "}}
|
| 322 |
{%- endif -%}
|
| 323 |
{{ message.content }}
|
| 324 |
+
{%- set ns.waiting_for_tool_outputs = false -%}
|
| 325 |
{%- else -%}
|
| 326 |
{{- raise_exception("Invalid message role") -}}
|
| 327 |
{%- endif -%}
|
|
|
|
| 329 |
{%- if ns.in_tool -%}
|
| 330 |
{{ ']' }}
|
| 331 |
{%- endif -%}
|
| 332 |
+
{%- if ns.in_assistant and not (continue_assistant_message is defined and continue_assistant_message) and not ns.waiting_for_tool_outputs -%}
|
| 333 |
+
{{ end_assistant_token }}
|
| 334 |
+
{%- endif -%}
|
| 335 |
{%- if add_generation_prompt -%}
|
| 336 |
{{ assistant_token }}
|
| 337 |
{%- endif -%}
|
tokenizer.json
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:010095abf1dc6f52d4867584e7b3f0d4eece854593ae310220ec7782dd1b0a66
|
| 3 |
+
size 17078474
|
tokenizer_config.json
CHANGED
|
@@ -588,7 +588,7 @@
|
|
| 588 |
"special": true
|
| 589 |
},
|
| 590 |
"73": {
|
| 591 |
-
"content": "
|
| 592 |
"lstrip": false,
|
| 593 |
"normalized": false,
|
| 594 |
"rstrip": false,
|
|
|
|
| 588 |
"special": true
|
| 589 |
},
|
| 590 |
"73": {
|
| 591 |
+
"content": "<|image|>",
|
| 592 |
"lstrip": false,
|
| 593 |
"normalized": false,
|
| 594 |
"rstrip": false,
|