mjaggi nathanrchn commited on
Commit
cdb3e4f
·
verified ·
1 Parent(s): 5df61a7

Add support for images and fix end of assistant issue (#22)

Browse files

- add support for images and fix an issue with the end of assistant tokens that was not added if the last message was an assistant message. (f89f5ef02e1b9ecd18dd8b982016312f09c08663)


Co-authored-by: nathan ranchin <nathanrchn@users.noreply.huggingface.co>

Files changed (3) hide show
  1. chat_template.jinja +11 -1
  2. tokenizer.json +2 -2
  3. tokenizer_config.json +1 -1
chat_template.jinja CHANGED
@@ -150,8 +150,9 @@
150
  {%- set outer_token = '<|inner_suffix|>' -%}
151
  {%- set tool_calls_token = '<|tools_prefix|>' -%}
152
  {%- set end_tool_calls_token = '<|tools_suffix|>' -%}
 
153
 
154
- {%- set ns = namespace(in_assistant=false, in_tool=false, in_inner=false, assistant_format=none) -%}
155
 
156
  {%- if messages and messages[0].role == 'system' -%}
157
  {%- if "content" in messages[0] -%}
@@ -204,6 +205,8 @@
204
  {%- for part in parts -%}
205
  {%- if part.type == "text" -%}
206
  {{ part.text }}
 
 
207
  {%- else -%}
208
  {{- raise_exception("Invalid user part: " + part.type) -}}
209
  {%- endif -%}
@@ -257,6 +260,7 @@
257
  {%- endif -%}
258
  {%- endfor -%}
259
  {{ ']' + end_tool_calls_token }}
 
260
  {%- elif block.type == 'tool_outputs' -%}
261
  {%- if ns.in_tool -%}
262
  {{- raise_exception("Cannot have both tool outputs as separate messages and tool outputs as blocks") -}}
@@ -269,6 +273,7 @@
269
  {%- endif -%}
270
  {%- endfor -%}
271
  {{- ']' }}
 
272
  {%- elif block.type == 'response' -%}
273
  {%- if ns.in_tool -%}
274
  {{ ']' }}
@@ -303,6 +308,7 @@
303
  {%- endif -%}
304
  {%- endfor -%}
305
  {{ ']' + end_tool_calls_token }}
 
306
  {%- endif -%}
307
  {%- elif message.role == 'tool' -%}
308
  {%- if not ns.in_assistant -%}
@@ -315,6 +321,7 @@
315
  {{ ", "}}
316
  {%- endif -%}
317
  {{ message.content }}
 
318
  {%- else -%}
319
  {{- raise_exception("Invalid message role") -}}
320
  {%- endif -%}
@@ -322,6 +329,9 @@
322
  {%- if ns.in_tool -%}
323
  {{ ']' }}
324
  {%- endif -%}
 
 
 
325
  {%- if add_generation_prompt -%}
326
  {{ assistant_token }}
327
  {%- endif -%}
 
150
  {%- set outer_token = '<|inner_suffix|>' -%}
151
  {%- set tool_calls_token = '<|tools_prefix|>' -%}
152
  {%- set end_tool_calls_token = '<|tools_suffix|>' -%}
153
+ {%- set image_token = '<|image|>' -%}
154
 
155
+ {%- set ns = namespace(in_assistant=false, in_tool=false, in_inner=false, waiting_for_tool_outputs=false, assistant_format=none) -%}
156
 
157
  {%- if messages and messages[0].role == 'system' -%}
158
  {%- if "content" in messages[0] -%}
 
205
  {%- for part in parts -%}
206
  {%- if part.type == "text" -%}
207
  {{ part.text }}
208
+ {%- elif part.type == "image" -%}
209
+ {{ image_token }}
210
  {%- else -%}
211
  {{- raise_exception("Invalid user part: " + part.type) -}}
212
  {%- endif -%}
 
260
  {%- endif -%}
261
  {%- endfor -%}
262
  {{ ']' + end_tool_calls_token }}
263
+ {%- set ns.waiting_for_tool_outputs = true -%}
264
  {%- elif block.type == 'tool_outputs' -%}
265
  {%- if ns.in_tool -%}
266
  {{- raise_exception("Cannot have both tool outputs as separate messages and tool outputs as blocks") -}}
 
273
  {%- endif -%}
274
  {%- endfor -%}
275
  {{- ']' }}
276
+ {%- set ns.waiting_for_tool_outputs = false -%}
277
  {%- elif block.type == 'response' -%}
278
  {%- if ns.in_tool -%}
279
  {{ ']' }}
 
308
  {%- endif -%}
309
  {%- endfor -%}
310
  {{ ']' + end_tool_calls_token }}
311
+ {%- set ns.waiting_for_tool_outputs = true -%}
312
  {%- endif -%}
313
  {%- elif message.role == 'tool' -%}
314
  {%- if not ns.in_assistant -%}
 
321
  {{ ", "}}
322
  {%- endif -%}
323
  {{ message.content }}
324
+ {%- set ns.waiting_for_tool_outputs = false -%}
325
  {%- else -%}
326
  {{- raise_exception("Invalid message role") -}}
327
  {%- endif -%}
 
329
  {%- if ns.in_tool -%}
330
  {{ ']' }}
331
  {%- endif -%}
332
+ {%- if ns.in_assistant and not (continue_assistant_message is defined and continue_assistant_message) and not ns.waiting_for_tool_outputs -%}
333
+ {{ end_assistant_token }}
334
+ {%- endif -%}
335
  {%- if add_generation_prompt -%}
336
  {{ assistant_token }}
337
  {%- endif -%}
tokenizer.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bb201fb226cde11f66c3cf51c5344fb37b1611f00c21e75c324546d854eff2e1
3
- size 17078480
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:010095abf1dc6f52d4867584e7b3f0d4eece854593ae310220ec7782dd1b0a66
3
+ size 17078474
tokenizer_config.json CHANGED
@@ -588,7 +588,7 @@
588
  "special": true
589
  },
590
  "73": {
591
- "content": "<SPECIAL_73>",
592
  "lstrip": false,
593
  "normalized": false,
594
  "rstrip": false,
 
588
  "special": true
589
  },
590
  "73": {
591
+ "content": "<|image|>",
592
  "lstrip": false,
593
  "normalized": false,
594
  "rstrip": false,