prince-canuma commited on 8 days ago

Commit

16b359a

verified ·

1 Parent(s): 2f28015

Add files using upload-large-folder tool

Browse files

Files changed (31) hide show

.gitattributes +1 -0
README.md +36 -0
added_tokens.json +56 -0
chat_template.jinja +159 -0
config.json +614 -0
generation_config.json +7 -0
merges.txt +0 -0
model-00001-of-00019.safetensors +3 -0
model-00002-of-00019.safetensors +3 -0
model-00003-of-00019.safetensors +3 -0
model-00004-of-00019.safetensors +3 -0
model-00005-of-00019.safetensors +3 -0
model-00006-of-00019.safetensors +3 -0
model-00007-of-00019.safetensors +3 -0
model-00008-of-00019.safetensors +3 -0
model-00009-of-00019.safetensors +3 -0
model-00010-of-00019.safetensors +3 -0
model-00011-of-00019.safetensors +3 -0
model-00012-of-00019.safetensors +3 -0
model-00013-of-00019.safetensors +3 -0
model-00014-of-00019.safetensors +3 -0
model-00015-of-00019.safetensors +3 -0
model-00016-of-00019.safetensors +3 -0
model-00017-of-00019.safetensors +3 -0
model-00018-of-00019.safetensors +3 -0
model-00019-of-00019.safetensors +3 -0
model.safetensors.index.json +0 -0
special_tokens_map.json +75 -0
tokenizer.json +3 -0
tokenizer_config.json +496 -0
vocab.json +0 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+tokenizer.json filter=lfs diff=lfs merge=lfs -text

README.md ADDED Viewed

	@@ -0,0 +1,36 @@

+---
+pipeline_tag: text-generation
+license: mit
+library_name: mlx
+base_model: MiniMaxAI/MiniMax-M2
+tags:
+- mlx
+---
+# mlx-community/MiniMax-M2-3bit
+This model [mlx-community/MiniMax-M2-3bit](https://huggingface.co/mlx-community/MiniMax-M2-3bit) was
+converted to MLX format from [MiniMaxAI/MiniMax-M2](https://huggingface.co/MiniMaxAI/MiniMax-M2)
+using mlx-lm version **0.28.4**.
+## Use with mlx
+```bash
+pip install mlx-lm
+```
+```python
+from mlx_lm import load, generate
+model, tokenizer = load("mlx-community/MiniMax-M2-3bit")
+prompt = "hello"
+if tokenizer.chat_template is not None:
+    messages = [{"role": "user", "content": prompt}]
+    prompt = tokenizer.apply_chat_template(
+        messages, add_generation_prompt=True
+    )
+response = generate(model, tokenizer, prompt=prompt, verbose=True)
+```

added_tokens.json ADDED Viewed

	@@ -0,0 +1,56 @@

+{
+  "</minimax:tool_call>": 200053,
+  "</think>": 200051,
+  "<add_file>": 200036,
+  "<code_context>": 200043,
+  "<code_interpreter>": 200023,
+  "<commit_after>": 200018,
+  "<commit_before>": 200016,
+  "<commit_message>": 200040,
+  "<commit_msg>": 200017,
+  "<delete_file>": 200037,
+  "<edit_file>": 200039,
+  "<empty_output>": 200015,
+  "<empty_source_file>": 200041,
+  "<file_content>": 200044,
+  "<file_sep>": 200049,
+  "<filename>": 200006,
+  "<filepath>": 200048,
+  "<fim_middle>": 200002,
+  "<fim_pad>": 200004,
+  "<fim_prefix>": 200001,
+  "<fim_suffix>": 200003,
+  "<function_call>": 200022,
+  "<gh_stars>": 200007,
+  "<issue_closed>": 200010,
+  "<issue_comment>": 200009,
+  "<issue_start>": 200008,
+  "<jupyter_code>": 200013,
+  "<jupyter_error>": 200035,
+  "<jupyter_output>": 200014,
+  "<jupyter_start>": 200011,
+  "<jupyter_text>": 200012,
+  "<minimax:tool_call>": 200052,
+  "<pr_start>": 200046,
+  "<rename_file>": 200038,
+  "<repo_struct>": 200042,
+  "<reponame>": 200005,
+  "<review_comment>": 200047,
+  "<source_files>": 200045,
+  "<think>": 200050,
+  "[e~[": 200020,
+  "]!d~[": 200021,
+  "]!p~[": 200000,
+  "]<]end of image[>[": 200030,
+  "]<]end of speech[>[": 200028,
+  "]<]end of video[>[": 200032,
+  "]<]image[>[": 200025,
+  "]<]speech[>[": 200024,
+  "]<]start of image[>[": 200029,
+  "]<]start of speech[>[": 200027,
+  "]<]start of video[>[": 200031,
+  "]<]video[>[": 200026,
+  "]<]vision pad[>[": 200033,
+  "]~!b[": 200034,
+  "]~b]": 200019
+}

chat_template.jinja ADDED Viewed

	@@ -0,0 +1,159 @@

+{# ----------‑‑‑ special token variables ‑‑‑---------- #}
+{%- set toolcall_begin_token   = '<minimax:tool_call>'         -%}
+{%- set toolcall_end_token     = '</minimax:tool_call>'        -%}
+{#- Tool Rendering Functions ============================================== -#}
+{%- macro render_tool_namespace(namespace_name, tool_list) -%}
+{%- for tool in tool_list -%}
+<tool>{{ tool.function | tojson(ensure_ascii=False) }}</tool>
+{% endfor -%}
+{%- endmacro -%}
+{%- macro visible_text(content) -%}
+    {%- if content is string -%}
+        {{ content }}
+    {%- elif content is iterable and content is not mapping -%}
+        {%- for item in content -%}
+            {%- if item is mapping and item.type == 'text' -%}
+                {{- item.text }}
+            {%- elif item is string -%}
+                {{- item }}
+            {%- endif -%}
+        {%- endfor -%}
+    {%- else -%}
+        {{- content }}
+    {%- endif -%}
+{%- endmacro -%}
+{#- System Message Construction ============================================ -#}
+{%- macro build_system_message(system_message) -%}
+    {%- if system_message and system_message.content -%}
+        {{- visible_text(system_message.content) }}
+    {%- else -%}
+        {%- if model_identity is not defined -%}
+            {%- set model_identity = "You are a helpful assistant." -%}
+        {%- endif -%}
+        {{- model_identity }}
+    {%- endif -%}
+    {#- Handle current_date -#}
+    {%- if system_message and system_message.current_date -%}
+        {{- '\n' ~ 'Current date: ' + system_message.current_date }}
+    {%- endif -%}
+    {#- Handle current_location -#}
+    {%- if system_message and system_message.current_location -%}
+        {{- '\n' ~ 'Current location: ' + system_message.current_location }}
+    {%- endif -%}
+{%- endmacro -%}
+{#- Main Template Logic ================================================= -#}
+{#- Extract system message (only first message if it's system) -#}
+{%- set system_message = none -%}
+{%- set conversation_messages = messages -%}
+{%- if messages and messages[0].role == "system" -%}
+    {%- set system_message = messages[0] -%}
+    {%- set conversation_messages = messages[1:] -%}
+{%- endif -%}
+{#- Get the last user message turn, for interleved thinking -#}
+{%- set ns = namespace(last_user_index=-1) %}
+{% for m in conversation_messages %}
+    {%- if m.role == 'user' %}
+        {% set ns.last_user_index = loop.index0 -%}
+    {%- endif %}
+{%- endfor %}
+{#- Render system message -#}
+{{- ']~!b[' ~ ']~b]system' ~ '\n' }}
+{{- build_system_message(system_message) }}
+{#- Render tools if available -#}
+{%- if tools -%}
+    {{- '\n\n' ~ '# Tools' ~ '\n' ~ 'You may call one or more tools to assist with the user query.\nHere are the tools available in JSONSchema format:' ~ '\n' }}
+    {{- '\n' ~ '<tools>' ~ '\n' }}
+    {{- render_tool_namespace("functions", tools) }}
+    {{- '</tools>' ~ '\n\n' }}
+{{- 'When making tool calls, use XML format to invoke tools and pass parameters:' ~ '\n' }}
+{{- '\n' ~ toolcall_begin_token }}
+<invoke name="tool-name-1">
+<parameter name="param-key-1">param-value-1</parameter>
+<parameter name="param-key-2">param-value-2</parameter>
+...
+</invoke>
+{{- '\n' ~ toolcall_end_token }}
+{%- endif -%}
+{{- '[e~[\n' }}
+{#- Render messages -#}
+{%- set last_tool_call = namespace(name=none) -%}
+{%- for message in conversation_messages -%}
+    {%- if message.role == 'assistant' -%}
+        {#- Only render reasoning_content if no user message follows -#}
+        {{- ']~b]ai' ~ '\n' }}
+        {%- set reasoning_content = '' %}
+        {%- set content = visible_text(message.content) %}
+        {%- if message.reasoning_content is string %}
+            {%- set reasoning_content = message.reasoning_content %}
+        {%- else %}
+            {%- if '</think>' in content %}
+                {%- set reasoning_content = content.split('</think>')[0].strip('\n').split('<think>')[-1].strip('\n') %}
+                {%- set content = content.split('</think>')[-1].strip('\n') %}
+            {%- endif %}
+        {%- endif %}
+        {%- if reasoning_content and loop.index0 > ns.last_user_index -%}
+            {{- '<think>' ~ '\n' ~ reasoning_content ~ '\n' ~ '</think>' ~ '\n\n' }}
+        {%- endif -%}
+        {%- if content -%}
+            {{- content }}
+        {%- endif -%}
+        {%- if message.tool_calls -%}
+            {{- '\n' ~ toolcall_begin_token ~ '\n' }}
+            {%- for tool_call in message.tool_calls -%}
+                {%- if tool_call.function %}
+                    {%- set tool_call = tool_call.function %}
+                {%- endif %}
+                {{- '<invoke name="' + tool_call.name + '">' }}
+                {% set _args = tool_call.arguments %}
+                {%- for k, v in _args.items() %}
+                {{- '<parameter name="' + k + '">' }}
+                {{- v | tojson(ensure_ascii=False) if v is not string else v }}
+                {{- '</parameter>' }}
+                {% endfor %}
+                {{- '</invoke>' ~ '\n' }}
+            {%- endfor -%}
+            {{- toolcall_end_token}}
+            {%- set last_tool_call.name = message.tool_calls[-1].name -%}
+        {%- else -%}
+            {%- set last_tool_call.name = none -%}
+        {%- endif -%}
+        {{- '[e~[' ~ '\n' }}
+    {%- elif message.role == 'tool' -%}
+    {%- if last_tool_call.name is none -%}
+        {{- raise_exception("Message has tool role, but there was no previous assistant message with a tool call!") }}
+    {%- endif -%}
+    {%- if loop.first or (conversation_messages[loop.index0 - 1].role != 'tool') -%}
+        {{- ']~b]tool' }}
+    {%- endif -%}
+    {%- if message.content is string -%}
+        {{- '\n<response>' }}
+        {{- message.content }}
+        {{- '</response>' }}
+    {%- else -%}
+        {%- for tr in message.content -%}
+            {{- '\n<response>' }}
+            {{- tr.output if tr.output is defined else (tr.text if tr.type == 'text' and tr.text is defined else tr) }}
+            {{- '\n</response>' }}
+        {%- endfor -%}
+    {%- endif -%}
+    {%- if loop.last or (conversation_messages[loop.index0 + 1].role != 'tool') -%}
+        {{- '[e~[\n' -}}
+    {%- endif -%}
+    {%- elif message.role == 'user' -%}
+        {{- ']~b]user' ~ '\n' }}
+        {{- visible_text(message.content) }}
+        {{- '[e~[' ~ '\n' }}
+    {%- endif -%}
+{%- endfor -%}
+{#- Generation prompt -#}
+{%- if add_generation_prompt -%}
+{{- ']~b]ai' ~ '\n' ~ '<think>' ~ '\n' }}
+{%- endif -%}

config.json ADDED Viewed

	@@ -0,0 +1,614 @@

+{
+    "architectures": [
+        "MiniMaxM2ForCausalLM"
+    ],
+    "attention_dropout": 0.0,
+    "attn_type_list": [
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1
+    ],
+    "bos_token_id": null,
+    "eos_token_id": null,
+    "head_dim": 128,
+    "hidden_act": "silu",
+    "hidden_size": 3072,
+    "initializer_range": 0.02,
+    "intermediate_size": 1536,
+    "layernorm_full_attention_beta": 1.0,
+    "layernorm_linear_attention_beta": 1.0,
+    "layernorm_mlp_beta": 1.0,
+    "max_position_embeddings": 196608,
+    "mlp_intermediate_size": 8192,
+    "model_type": "minimax",
+    "mtp_transformer_layers": 1,
+    "num_attention_heads": 48,
+    "num_experts_per_tok": 8,
+    "num_hidden_layers": 62,
+    "num_key_value_heads": 8,
+    "num_local_experts": 256,
+    "num_mtp_modules": 3,
+    "output_router_logits": false,
+    "qk_norm_type": "per_layer",
+    "quantization": {
+        "group_size": 64,
+        "bits": 3,
+        "mode": "affine",
+        "model.layers.0.block_sparse_moe.gate": {
+            "group_size": 64,
+            "bits": 8
+        },
+        "model.layers.1.block_sparse_moe.gate": {
+            "group_size": 64,
+            "bits": 8
+        },
+        "model.layers.2.block_sparse_moe.gate": {
+            "group_size": 64,
+            "bits": 8
+        },
+        "model.layers.3.block_sparse_moe.gate": {
+            "group_size": 64,
+            "bits": 8
+        },
+        "model.layers.4.block_sparse_moe.gate": {
+            "group_size": 64,
+            "bits": 8
+        },
+        "model.layers.5.block_sparse_moe.gate": {
+            "group_size": 64,
+            "bits": 8
+        },
+        "model.layers.6.block_sparse_moe.gate": {
+            "group_size": 64,
+            "bits": 8
+        },
+        "model.layers.7.block_sparse_moe.gate": {
+            "group_size": 64,
+            "bits": 8
+        },
+        "model.layers.8.block_sparse_moe.gate": {
+            "group_size": 64,
+            "bits": 8
+        },
+        "model.layers.9.block_sparse_moe.gate": {
+            "group_size": 64,
+            "bits": 8
+        },
+        "model.layers.10.block_sparse_moe.gate": {
+            "group_size": 64,
+            "bits": 8
+        },
+        "model.layers.11.block_sparse_moe.gate": {
+            "group_size": 64,
+            "bits": 8
+        },
+        "model.layers.12.block_sparse_moe.gate": {
+            "group_size": 64,
+            "bits": 8
+        },
+        "model.layers.13.block_sparse_moe.gate": {
+            "group_size": 64,
+            "bits": 8
+        },
+        "model.layers.14.block_sparse_moe.gate": {
+            "group_size": 64,
+            "bits": 8
+        },
+        "model.layers.15.block_sparse_moe.gate": {
+            "group_size": 64,
+            "bits": 8
+        },
+        "model.layers.16.block_sparse_moe.gate": {
+            "group_size": 64,
+            "bits": 8
+        },
+        "model.layers.17.block_sparse_moe.gate": {
+            "group_size": 64,
+            "bits": 8
+        },
+        "model.layers.18.block_sparse_moe.gate": {
+            "group_size": 64,
+            "bits": 8
+        },
+        "model.layers.19.block_sparse_moe.gate": {
+            "group_size": 64,
+            "bits": 8
+        },
+        "model.layers.20.block_sparse_moe.gate": {
+            "group_size": 64,
+            "bits": 8
+        },
+        "model.layers.21.block_sparse_moe.gate": {
+            "group_size": 64,
+            "bits": 8
+        },
+        "model.layers.22.block_sparse_moe.gate": {
+            "group_size": 64,
+            "bits": 8
+        },
+        "model.layers.23.block_sparse_moe.gate": {
+            "group_size": 64,
+            "bits": 8
+        },
+        "model.layers.24.block_sparse_moe.gate": {
+            "group_size": 64,
+            "bits": 8
+        },
+        "model.layers.25.block_sparse_moe.gate": {
+            "group_size": 64,
+            "bits": 8
+        },
+        "model.layers.26.block_sparse_moe.gate": {
+            "group_size": 64,
+            "bits": 8
+        },
+        "model.layers.27.block_sparse_moe.gate": {
+            "group_size": 64,
+            "bits": 8
+        },
+        "model.layers.28.block_sparse_moe.gate": {
+            "group_size": 64,
+            "bits": 8
+        },
+        "model.layers.29.block_sparse_moe.gate": {
+            "group_size": 64,
+            "bits": 8
+        },
+        "model.layers.30.block_sparse_moe.gate": {
+            "group_size": 64,
+            "bits": 8
+        },
+        "model.layers.31.block_sparse_moe.gate": {
+            "group_size": 64,
+            "bits": 8
+        },
+        "model.layers.32.block_sparse_moe.gate": {
+            "group_size": 64,
+            "bits": 8
+        },
+        "model.layers.33.block_sparse_moe.gate": {
+            "group_size": 64,
+            "bits": 8
+        },
+        "model.layers.34.block_sparse_moe.gate": {
+            "group_size": 64,
+            "bits": 8
+        },
+        "model.layers.35.block_sparse_moe.gate": {
+            "group_size": 64,
+            "bits": 8
+        },
+        "model.layers.36.block_sparse_moe.gate": {
+            "group_size": 64,
+            "bits": 8
+        },
+        "model.layers.37.block_sparse_moe.gate": {
+            "group_size": 64,
+            "bits": 8
+        },
+        "model.layers.38.block_sparse_moe.gate": {
+            "group_size": 64,
+            "bits": 8
+        },
+        "model.layers.39.block_sparse_moe.gate": {
+            "group_size": 64,
+            "bits": 8
+        },
+        "model.layers.40.block_sparse_moe.gate": {
+            "group_size": 64,
+            "bits": 8
+        },
+        "model.layers.41.block_sparse_moe.gate": {
+            "group_size": 64,
+            "bits": 8
+        },
+        "model.layers.42.block_sparse_moe.gate": {
+            "group_size": 64,
+            "bits": 8
+        },
+        "model.layers.43.block_sparse_moe.gate": {
+            "group_size": 64,
+            "bits": 8
+        },
+        "model.layers.44.block_sparse_moe.gate": {
+            "group_size": 64,
+            "bits": 8
+        },
+        "model.layers.45.block_sparse_moe.gate": {
+            "group_size": 64,
+            "bits": 8
+        },
+        "model.layers.46.block_sparse_moe.gate": {
+            "group_size": 64,
+            "bits": 8
+        },
+        "model.layers.47.block_sparse_moe.gate": {
+            "group_size": 64,
+            "bits": 8
+        },
+        "model.layers.48.block_sparse_moe.gate": {
+            "group_size": 64,
+            "bits": 8
+        },
+        "model.layers.49.block_sparse_moe.gate": {
+            "group_size": 64,
+            "bits": 8
+        },
+        "model.layers.50.block_sparse_moe.gate": {
+            "group_size": 64,
+            "bits": 8
+        },
+        "model.layers.51.block_sparse_moe.gate": {
+            "group_size": 64,
+            "bits": 8
+        },
+        "model.layers.52.block_sparse_moe.gate": {
+            "group_size": 64,
+            "bits": 8
+        },
+        "model.layers.53.block_sparse_moe.gate": {
+            "group_size": 64,
+            "bits": 8
+        },
+        "model.layers.54.block_sparse_moe.gate": {
+            "group_size": 64,
+            "bits": 8
+        },
+        "model.layers.55.block_sparse_moe.gate": {
+            "group_size": 64,
+            "bits": 8
+        },
+        "model.layers.56.block_sparse_moe.gate": {
+            "group_size": 64,
+            "bits": 8
+        },
+        "model.layers.57.block_sparse_moe.gate": {
+            "group_size": 64,
+            "bits": 8
+        },
+        "model.layers.58.block_sparse_moe.gate": {
+            "group_size": 64,
+            "bits": 8
+        },
+        "model.layers.59.block_sparse_moe.gate": {
+            "group_size": 64,
+            "bits": 8
+        },
+        "model.layers.60.block_sparse_moe.gate": {
+            "group_size": 64,
+            "bits": 8
+        },
+        "model.layers.61.block_sparse_moe.gate": {
+            "group_size": 64,
+            "bits": 8
+        }
+    },
+    "quantization_config": {
+        "group_size": 64,
+        "bits": 3,
+        "mode": "affine",
+        "model.layers.0.block_sparse_moe.gate": {
+            "group_size": 64,
+            "bits": 8
+        },
+        "model.layers.1.block_sparse_moe.gate": {
+            "group_size": 64,
+            "bits": 8
+        },
+        "model.layers.2.block_sparse_moe.gate": {
+            "group_size": 64,
+            "bits": 8
+        },
+        "model.layers.3.block_sparse_moe.gate": {
+            "group_size": 64,
+            "bits": 8
+        },
+        "model.layers.4.block_sparse_moe.gate": {
+            "group_size": 64,
+            "bits": 8
+        },
+        "model.layers.5.block_sparse_moe.gate": {
+            "group_size": 64,
+            "bits": 8
+        },
+        "model.layers.6.block_sparse_moe.gate": {
+            "group_size": 64,
+            "bits": 8
+        },
+        "model.layers.7.block_sparse_moe.gate": {
+            "group_size": 64,
+            "bits": 8
+        },
+        "model.layers.8.block_sparse_moe.gate": {
+            "group_size": 64,
+            "bits": 8
+        },
+        "model.layers.9.block_sparse_moe.gate": {
+            "group_size": 64,
+            "bits": 8
+        },
+        "model.layers.10.block_sparse_moe.gate": {
+            "group_size": 64,
+            "bits": 8
+        },
+        "model.layers.11.block_sparse_moe.gate": {
+            "group_size": 64,
+            "bits": 8
+        },
+        "model.layers.12.block_sparse_moe.gate": {
+            "group_size": 64,
+            "bits": 8
+        },
+        "model.layers.13.block_sparse_moe.gate": {
+            "group_size": 64,
+            "bits": 8
+        },
+        "model.layers.14.block_sparse_moe.gate": {
+            "group_size": 64,
+            "bits": 8
+        },
+        "model.layers.15.block_sparse_moe.gate": {
+            "group_size": 64,
+            "bits": 8
+        },
+        "model.layers.16.block_sparse_moe.gate": {
+            "group_size": 64,
+            "bits": 8
+        },
+        "model.layers.17.block_sparse_moe.gate": {
+            "group_size": 64,
+            "bits": 8
+        },
+        "model.layers.18.block_sparse_moe.gate": {
+            "group_size": 64,
+            "bits": 8
+        },
+        "model.layers.19.block_sparse_moe.gate": {
+            "group_size": 64,
+            "bits": 8
+        },
+        "model.layers.20.block_sparse_moe.gate": {
+            "group_size": 64,
+            "bits": 8
+        },
+        "model.layers.21.block_sparse_moe.gate": {
+            "group_size": 64,
+            "bits": 8
+        },
+        "model.layers.22.block_sparse_moe.gate": {
+            "group_size": 64,
+            "bits": 8
+        },
+        "model.layers.23.block_sparse_moe.gate": {
+            "group_size": 64,
+            "bits": 8
+        },
+        "model.layers.24.block_sparse_moe.gate": {
+            "group_size": 64,
+            "bits": 8
+        },
+        "model.layers.25.block_sparse_moe.gate": {
+            "group_size": 64,
+            "bits": 8
+        },
+        "model.layers.26.block_sparse_moe.gate": {
+            "group_size": 64,
+            "bits": 8
+        },
+        "model.layers.27.block_sparse_moe.gate": {
+            "group_size": 64,
+            "bits": 8
+        },
+        "model.layers.28.block_sparse_moe.gate": {
+            "group_size": 64,
+            "bits": 8
+        },
+        "model.layers.29.block_sparse_moe.gate": {
+            "group_size": 64,
+            "bits": 8
+        },
+        "model.layers.30.block_sparse_moe.gate": {
+            "group_size": 64,
+            "bits": 8
+        },
+        "model.layers.31.block_sparse_moe.gate": {
+            "group_size": 64,
+            "bits": 8
+        },
+        "model.layers.32.block_sparse_moe.gate": {
+            "group_size": 64,
+            "bits": 8
+        },
+        "model.layers.33.block_sparse_moe.gate": {
+            "group_size": 64,
+            "bits": 8
+        },
+        "model.layers.34.block_sparse_moe.gate": {
+            "group_size": 64,
+            "bits": 8
+        },
+        "model.layers.35.block_sparse_moe.gate": {
+            "group_size": 64,
+            "bits": 8
+        },
+        "model.layers.36.block_sparse_moe.gate": {
+            "group_size": 64,
+            "bits": 8
+        },
+        "model.layers.37.block_sparse_moe.gate": {
+            "group_size": 64,
+            "bits": 8
+        },
+        "model.layers.38.block_sparse_moe.gate": {
+            "group_size": 64,
+            "bits": 8
+        },
+        "model.layers.39.block_sparse_moe.gate": {
+            "group_size": 64,
+            "bits": 8
+        },
+        "model.layers.40.block_sparse_moe.gate": {
+            "group_size": 64,
+            "bits": 8
+        },
+        "model.layers.41.block_sparse_moe.gate": {
+            "group_size": 64,
+            "bits": 8
+        },
+        "model.layers.42.block_sparse_moe.gate": {
+            "group_size": 64,
+            "bits": 8
+        },
+        "model.layers.43.block_sparse_moe.gate": {
+            "group_size": 64,
+            "bits": 8
+        },
+        "model.layers.44.block_sparse_moe.gate": {
+            "group_size": 64,
+            "bits": 8
+        },
+        "model.layers.45.block_sparse_moe.gate": {
+            "group_size": 64,
+            "bits": 8
+        },
+        "model.layers.46.block_sparse_moe.gate": {
+            "group_size": 64,
+            "bits": 8
+        },
+        "model.layers.47.block_sparse_moe.gate": {
+            "group_size": 64,
+            "bits": 8
+        },
+        "model.layers.48.block_sparse_moe.gate": {
+            "group_size": 64,
+            "bits": 8
+        },
+        "model.layers.49.block_sparse_moe.gate": {
+            "group_size": 64,
+            "bits": 8
+        },
+        "model.layers.50.block_sparse_moe.gate": {
+            "group_size": 64,
+            "bits": 8
+        },
+        "model.layers.51.block_sparse_moe.gate": {
+            "group_size": 64,
+            "bits": 8
+        },
+        "model.layers.52.block_sparse_moe.gate": {
+            "group_size": 64,
+            "bits": 8
+        },
+        "model.layers.53.block_sparse_moe.gate": {
+            "group_size": 64,
+            "bits": 8
+        },
+        "model.layers.54.block_sparse_moe.gate": {
+            "group_size": 64,
+            "bits": 8
+        },
+        "model.layers.55.block_sparse_moe.gate": {
+            "group_size": 64,
+            "bits": 8
+        },
+        "model.layers.56.block_sparse_moe.gate": {
+            "group_size": 64,
+            "bits": 8
+        },
+        "model.layers.57.block_sparse_moe.gate": {
+            "group_size": 64,
+            "bits": 8
+        },
+        "model.layers.58.block_sparse_moe.gate": {
+            "group_size": 64,
+            "bits": 8
+        },
+        "model.layers.59.block_sparse_moe.gate": {
+            "group_size": 64,
+            "bits": 8
+        },
+        "model.layers.60.block_sparse_moe.gate": {
+            "group_size": 64,
+            "bits": 8
+        },
+        "model.layers.61.block_sparse_moe.gate": {
+            "group_size": 64,
+            "bits": 8
+        }
+    },
+    "rms_norm_eps": 1e-06,
+    "rope_theta": 5000000,
+    "rotary_dim": 64,
+    "router_aux_loss_coef": 0.001,
+    "router_jitter_noise": 0.0,
+    "scoring_func": "sigmoid",
+    "shared_intermediate_size": 0,
+    "shared_moe_mode": "sigmoid",
+    "sliding_window": null,
+    "tie_word_embeddings": false,
+    "transformers_version": "4.46.1",
+    "use_cache": true,
+    "use_mtp": true,
+    "use_qk_norm": true,
+    "use_routing_bias": true,
+    "vocab_size": 200064
+}

generation_config.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "do_sample": true,
+  "temperature": 1.0,
+  "top_p": 0.95,
+  "top_k": 40,
+  "transformers_version": "4.46.1"
+}

merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

model-00001-of-00019.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0583b966f8d771579f246d50d355e991d2614a6d5e5dad11abc0162cfee508c5
+size 5105946242

model-00002-of-00019.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8cfb95c086e26b3bc601e8c0c1a484531e971f9cf70e8ef07d0c9ee3522b35f9
+size 5345374071

model-00003-of-00019.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d11c6734473a312348a8705fc86609c55bb016c5b5308c66df1bf4931090da1d
+size 5345374063

model-00004-of-00019.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:860930309828068043b208a28b7833748891646b4adca50262c7bb07cd8ea5d7
+size 5365556366

model-00005-of-00019.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:284ab29393a8a7b6169ed6e61957a99b7816475d4d2cf2d1db64acfa24dc9076
+size 5345374165

model-00006-of-00019.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:cc172737fdc5bfaa9428a3e0268e0bed65cf902586c6272307c909c2504d0085
+size 5345374093

model-00007-of-00019.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:cbec5c76e630873edc7a9c8cae83ce53d195dd0471f093ceb7834891cea8a2da
+size 5365556384

model-00008-of-00019.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:42dd5961fdd73800f23f434a04671d6459db669dfbfb9e2f648d3c7760db2cbe
+size 5345374163

model-00009-of-00019.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6cf99535f214fbb2ec6960589a8b8d27d60e0eac6d5c37686cf5b4c51a4d6a32
+size 5345374113

model-00010-of-00019.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2c21593ed8c8f709982af22b02dbf26270cc2ef781988d519545adbe7224af92
+size 5365556348

model-00011-of-00019.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f3698a03f7e88662d49e7020e197b25517215b149185366324e55a493ae20b4a
+size 5345374163

model-00012-of-00019.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7ee590bb71308044614a143dd791e3fe8e4e9ab7e82d258027c749b14d34b5df
+size 5345374169

model-00013-of-00019.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2480c5232f4df3b9eac4e21aac965c8bf29308bf253cf7cd5a9ca6913e7a03d3
+size 5365556434

model-00014-of-00019.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:bcb50287bd4e723fb3afb9461d478de670fea0cdb3bff39e6e5fbc41f76791b9
+size 5345374087

model-00015-of-00019.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b405c6eb0bc2e48763105ba7cb2961ac9a20357e9527878dd1ed28c365762e88
+size 5345374153

model-00016-of-00019.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ffab19e0ab8955f937ddca937595800f1f56f29d57cbbe2715031b59f0db0b2d
+size 5365556386

model-00017-of-00019.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4a887b624d1191f7c5c9cbaca531f030453603e500575fe22207be94c66ba778
+size 5345374169

model-00018-of-00019.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b2d46d8a51ec6e0478794d10b10b4a1478abfbfb9f34dc574cc2d540208a64fa
+size 5345374163

model-00019-of-00019.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:26d04d89cfb7263bd33145c366c0f26aa90c201f93e03e940db297b9f616a109
+size 4008649866

model.safetensors.index.json ADDED Viewed

The diff for this file is too large to render. See raw diff

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,75 @@

+{
+  "additional_special_tokens": [
+    "<code_interpreter>",
+    "<commit_after>",
+    "<commit_before>",
+    "<commit_msg>",
+    "<empty_output>",
+    "<filename>",
+    "<fim_middle>",
+    "<fim_pad>",
+    "<fim_prefix>",
+    "<fim_suffix>",
+    "<function_call>",
+    "<gh_stars>",
+    "]<]speech[>[",
+    "]<]image[>[",
+    "]<]video[>[",
+    "]<]start of speech[>[",
+    "]<]end of speech[>[",
+    "]<]start of image[>[",
+    "]<]end of image[>[",
+    "]<]start of video[>[",
+    "]<]end of video[>[",
+    "]<]vision pad[>[",
+    "]~!b[",
+    "<issue_closed>",
+    "<issue_comment>",
+    "<issue_start>",
+    "<jupyter_code>",
+    "<jupyter_output>",
+    "<jupyter_start>",
+    "<jupyter_text>",
+    "<reponame>",
+    "[e~[",
+    "]!d~[",
+    "]!p~[",
+    "]~b]",
+    "<jupyter_error>",
+    "<add_file>",
+    "<delete_file>",
+    "<rename_file>",
+    "<edit_file>",
+    "<commit_message>",
+    "<empty_source_file>",
+    "<repo_struct>",
+    "<code_context>",
+    "<file_content>",
+    "<source_files>",
+    "<pr_start>",
+    "<review_comment>",
+    "<filepath>",
+    "<file_sep>"
+  ],
+  "bos_token": {
+    "content": "]~!b[",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "[e~[",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "]!d~[",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

tokenizer.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e7b90ed7f55d905175bc26771d6d7d33b40b46742f073675bc816fedaf482ea1
+size 15522763

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,496 @@

+{
+  "add_prefix_space": false,
+  "added_tokens_decoder": {
+    "200000": {
+      "content": "]!p~[",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "200001": {
+      "content": "<fim_prefix>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "200002": {
+      "content": "<fim_middle>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "200003": {
+      "content": "<fim_suffix>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "200004": {
+      "content": "<fim_pad>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "200005": {
+      "content": "<reponame>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "200006": {
+      "content": "<filename>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "200007": {
+      "content": "<gh_stars>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "200008": {
+      "content": "<issue_start>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "200009": {
+      "content": "<issue_comment>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "200010": {
+      "content": "<issue_closed>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "200011": {
+      "content": "<jupyter_start>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "200012": {
+      "content": "<jupyter_text>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "200013": {
+      "content": "<jupyter_code>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "200014": {
+      "content": "<jupyter_output>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "200015": {
+      "content": "<empty_output>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "200016": {
+      "content": "<commit_before>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "200017": {
+      "content": "<commit_msg>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "200018": {
+      "content": "<commit_after>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "200019": {
+      "content": "]~b]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "200020": {
+      "content": "[e~[",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "200021": {
+      "content": "]!d~[",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "200022": {
+      "content": "<function_call>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "200023": {
+      "content": "<code_interpreter>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "200024": {
+      "content": "]<]speech[>[",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "200025": {
+      "content": "]<]image[>[",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "200026": {
+      "content": "]<]video[>[",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "200027": {
+      "content": "]<]start of speech[>[",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "200028": {
+      "content": "]<]end of speech[>[",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "200029": {
+      "content": "]<]start of image[>[",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "200030": {
+      "content": "]<]end of image[>[",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "200031": {
+      "content": "]<]start of video[>[",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "200032": {
+      "content": "]<]end of video[>[",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "200033": {
+      "content": "]<]vision pad[>[",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "200034": {
+      "content": "]~!b[",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "200035": {
+      "content": "<jupyter_error>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "200036": {
+      "content": "<add_file>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "200037": {
+      "content": "<delete_file>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "200038": {
+      "content": "<rename_file>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "200039": {
+      "content": "<edit_file>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "200040": {
+      "content": "<commit_message>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "200041": {
+      "content": "<empty_source_file>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "200042": {
+      "content": "<repo_struct>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "200043": {
+      "content": "<code_context>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "200044": {
+      "content": "<file_content>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "200045": {
+      "content": "<source_files>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "200046": {
+      "content": "<pr_start>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "200047": {
+      "content": "<review_comment>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "200048": {
+      "content": "<filepath>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "200049": {
+      "content": "<file_sep>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "200050": {
+      "content": "<think>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "200051": {
+      "content": "</think>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "200052": {
+      "content": "<minimax:tool_call>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "200053": {
+      "content": "</minimax:tool_call>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    }
+  },
+  "additional_special_tokens": [
+    "<code_interpreter>",
+    "<commit_after>",
+    "<commit_before>",
+    "<commit_msg>",
+    "<empty_output>",
+    "<filename>",
+    "<fim_middle>",
+    "<fim_pad>",
+    "<fim_prefix>",
+    "<fim_suffix>",
+    "<function_call>",
+    "<gh_stars>",
+    "]<]speech[>[",
+    "]<]image[>[",
+    "]<]video[>[",
+    "]<]start of speech[>[",
+    "]<]end of speech[>[",
+    "]<]start of image[>[",
+    "]<]end of image[>[",
+    "]<]start of video[>[",
+    "]<]end of video[>[",
+    "]<]vision pad[>[",
+    "]~!b[",
+    "<issue_closed>",
+    "<issue_comment>",
+    "<issue_start>",
+    "<jupyter_code>",
+    "<jupyter_output>",
+    "<jupyter_start>",
+    "<jupyter_text>",
+    "<reponame>",
+    "[e~[",
+    "]!d~[",
+    "]!p~[",
+    "]~b]",
+    "<jupyter_error>",
+    "<add_file>",
+    "<delete_file>",
+    "<rename_file>",
+    "<edit_file>",
+    "<commit_message>",
+    "<empty_source_file>",
+    "<repo_struct>",
+    "<code_context>",
+    "<file_content>",
+    "<source_files>",
+    "<pr_start>",
+    "<review_comment>",
+    "<filepath>",
+    "<file_sep>"
+  ],
+  "bos_token": "]~!b[",
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "[e~[",
+  "extra_special_tokens": {},
+  "model_max_length": 40960000,
+  "tokenizer_class": "GPT2Tokenizer",
+  "unk_token": "]!d~["
+}

vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff