JunHowie commited on 10 days ago

Commit

08e240a

verified ·

1 Parent(s): 3e23943

Add files using upload-large-folder tool

Browse files

Files changed (21) hide show

.gitattributes +2 -0
chat_template.jinja +159 -0
config.json +117 -0
docs/function_call_guide.md +482 -0
docs/vllm_deploy_guide.md +88 -0
figures/Bench.png +3 -0
model-00001-of-00041.safetensors +3 -0
model-00002-of-00041.safetensors +3 -0
model-00003-of-00041.safetensors +3 -0
model-00004-of-00041.safetensors +3 -0
model-00005-of-00041.safetensors +3 -0
model-00006-of-00041.safetensors +3 -0
model-00007-of-00041.safetensors +3 -0
model-00008-of-00041.safetensors +3 -0
model-00009-of-00041.safetensors +3 -0
model-00010-of-00041.safetensors +3 -0
model-00015-of-00041.safetensors +3 -0
model-00040-of-00041.safetensors +3 -0
model-00041-of-00041.safetensors +3 -0
model.safetensors.index.json +3 -0
tokenizer_config.json +495 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+model.safetensors.index.json filter=lfs diff=lfs merge=lfs -text
+figures/Bench.png filter=lfs diff=lfs merge=lfs -text

chat_template.jinja ADDED Viewed

	@@ -0,0 +1,159 @@

+{# ----------‑‑‑ special token variables ‑‑‑---------- #}
+{%- set toolcall_begin_token   = '<minimax:tool_call>'         -%}
+{%- set toolcall_end_token     = '</minimax:tool_call>'        -%}
+{#- Tool Rendering Functions ============================================== -#}
+{%- macro render_tool_namespace(namespace_name, tool_list) -%}
+{%- for tool in tool_list -%}
+<tool>{{ tool.function | tojson(ensure_ascii=False) }}</tool>
+{% endfor -%}
+{%- endmacro -%}
+{%- macro visible_text(content) -%}
+    {%- if content is string -%}
+        {{ content }}
+    {%- elif content is iterable and content is not mapping -%}
+        {%- for item in content -%}
+            {%- if item is mapping and item.type == 'text' -%}
+                {{- item.text }}
+            {%- elif item is string -%}
+                {{- item }}
+            {%- endif -%}
+        {%- endfor -%}
+    {%- else -%}
+        {{- content }}
+    {%- endif -%}
+{%- endmacro -%}
+{#- System Message Construction ============================================ -#}
+{%- macro build_system_message(system_message) -%}
+    {%- if system_message and system_message.content -%}
+        {{- visible_text(system_message.content) }}
+    {%- else -%}
+        {%- if model_identity is not defined -%}
+            {%- set model_identity = "You are a helpful assistant." -%}
+        {%- endif -%}
+        {{- model_identity }}
+    {%- endif -%}
+    {#- Handle current_date -#}
+    {%- if system_message and system_message.current_date -%}
+        {{- '\n' ~ 'Current date: ' + system_message.current_date }}
+    {%- endif -%}
+    {#- Handle current_location -#}
+    {%- if system_message and system_message.current_location -%}
+        {{- '\n' ~ 'Current location: ' + system_message.current_location }}
+    {%- endif -%}
+{%- endmacro -%}
+{#- Main Template Logic ================================================= -#}
+{#- Extract system message (only first message if it's system) -#}
+{%- set system_message = none -%}
+{%- set conversation_messages = messages -%}
+{%- if messages and messages[0].role == "system" -%}
+    {%- set system_message = messages[0] -%}
+    {%- set conversation_messages = messages[1:] -%}
+{%- endif -%}
+{#- Get the last user message turn, for interleved thinking -#}
+{%- set ns = namespace(last_user_index=-1) %}
+{% for m in conversation_messages %}
+    {%- if m.role == 'user' %}
+        {% set ns.last_user_index = loop.index0 -%}
+    {%- endif %}
+{%- endfor %}
+{#- Render system message -#}
+{{- ']~!b[' ~ ']~b]system' ~ '\n' }}
+{{- build_system_message(system_message) }}
+{#- Render tools if available -#}
+{%- if tools -%}
+    {{- '\n\n' ~ '# Tools' ~ '\n' ~ 'You may call one or more tools to assist with the user query.\nHere are the tools available in JSONSchema format:' ~ '\n' }}
+    {{- '\n' ~ '<tools>' ~ '\n' }}
+    {{- render_tool_namespace("functions", tools) }}
+    {{- '</tools>' ~ '\n\n' }}
+{{- 'When making tool calls, use XML format to invoke tools and pass parameters:' ~ '\n' }}
+{{- '\n' ~ toolcall_begin_token }}
+<invoke name="tool-name-1">
+<parameter name="param-key-1">param-value-1</parameter>
+<parameter name="param-key-2">param-value-2</parameter>
+...
+</invoke>
+{{- '\n' ~ toolcall_end_token }}
+{%- endif -%}
+{{- '[e~[\n' }}
+{#- Render messages -#}
+{%- set last_tool_call = namespace(name=none) -%}
+{%- for message in conversation_messages -%}
+    {%- if message.role == 'assistant' -%}
+        {#- Only render reasoning_content if no user message follows -#}
+        {{- ']~b]ai' ~ '\n' }}
+        {%- set reasoning_content = '' %}
+        {%- set content = visible_text(message.content) %}
+        {%- if message.reasoning_content is string %}
+            {%- set reasoning_content = message.reasoning_content %}
+        {%- else %}
+            {%- if '</think>' in content %}
+                {%- set reasoning_content = content.split('</think>')[0].strip('\n').split('<think>')[-1].strip('\n') %}
+                {%- set content = content.split('</think>')[-1].strip('\n') %}
+            {%- endif %}
+        {%- endif %}
+        {%- if reasoning_content and loop.index0 > ns.last_user_index -%}
+            {{- '<think>' ~ '\n' ~ reasoning_content ~ '\n' ~ '</think>' ~ '\n\n' }}
+        {%- endif -%}
+        {%- if content -%}
+            {{- content }}
+        {%- endif -%}
+        {%- if message.tool_calls -%}
+            {{- '\n' ~ toolcall_begin_token ~ '\n' }}
+            {%- for tool_call in message.tool_calls -%}
+                {%- if tool_call.function %}
+                    {%- set tool_call = tool_call.function %}
+                {%- endif %}
+                {{- '<invoke name="' + tool_call.name + '">' }}
+                {% set _args = tool_call.arguments %}
+                {%- for k, v in _args.items() %}
+                {{- '<parameter name="' + k + '">' }}
+                {{- v | tojson(ensure_ascii=False) if v is not string else v }}
+                {{- '</parameter>' }}
+                {% endfor %}
+                {{- '</invoke>' ~ '\n' }}
+            {%- endfor -%}
+            {{- toolcall_end_token}}
+            {%- set last_tool_call.name = message.tool_calls[-1].name -%}
+        {%- else -%}
+            {%- set last_tool_call.name = none -%}
+        {%- endif -%}
+        {{- '[e~[' ~ '\n' }}
+    {%- elif message.role == 'tool' -%}
+    {%- if last_tool_call.name is none -%}
+        {{- raise_exception("Message has tool role, but there was no previous assistant message with a tool call!") }}
+    {%- endif -%}
+    {%- if loop.first or (conversation_messages[loop.index0 - 1].role != 'tool') -%}
+        {{- ']~b]tool' }}
+    {%- endif -%}
+    {%- if message.content is string -%}
+        {{- '\n<response>' }}
+        {{- message.content }}
+        {{- '</response>' }}
+    {%- else -%}
+        {%- for tr in message.content -%}
+            {{- '\n<response>' }}
+            {{- tr.output if tr.output is defined else (tr.text if tr.type == 'text' and tr.text is defined else tr) }}
+            {{- '\n</response>' }}
+        {%- endfor -%}
+    {%- endif -%}
+    {%- if loop.last or (conversation_messages[loop.index0 + 1].role != 'tool') -%}
+        {{- '[e~[\n' -}}
+    {%- endif -%}
+    {%- elif message.role == 'user' -%}
+        {{- ']~b]user' ~ '\n' }}
+        {{- visible_text(message.content) }}
+        {{- '[e~[' ~ '\n' }}
+    {%- endif -%}
+{%- endfor -%}
+{#- Generation prompt -#}
+{%- if add_generation_prompt -%}
+{{- ']~b]ai' ~ '\n' ~ '<think>' ~ '\n' }}
+{%- endif -%}

config.json ADDED Viewed

	@@ -0,0 +1,117 @@

+{
+  "name_or_path": "tclf90/MiniMax-M2-AWQ",
+  "architectures": [
+    "MiniMaxM2ForCausalLM"
+  ],
+  "attention_dropout": 0.0,
+  "attn_type_list": [
+    1,
+    1,
+    1,
+    1,
+    1,
+    1,
+    1,
+    1,
+    1,
+    1,
+    1,
+    1,
+    1,
+    1,
+    1,
+    1,
+    1,
+    1,
+    1,
+    1,
+    1,
+    1,
+    1,
+    1,
+    1,
+    1,
+    1,
+    1,
+    1,
+    1,
+    1,
+    1,
+    1,
+    1,
+    1,
+    1,
+    1,
+    1,
+    1,
+    1,
+    1,
+    1,
+    1,
+    1,
+    1,
+    1,
+    1,
+    1,
+    1,
+    1,
+    1,
+    1,
+    1,
+    1,
+    1,
+    1,
+    1,
+    1,
+    1,
+    1,
+    1,
+    1
+  ],
+  "bos_token_id": null,
+  "eos_token_id": null,
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 3072,
+  "initializer_range": 0.02,
+  "intermediate_size": 1536,
+  "layernorm_full_attention_beta": 1.0,
+  "layernorm_linear_attention_beta": 1.0,
+  "layernorm_mlp_beta": 1.0,
+  "max_position_embeddings": 196608,
+  "mlp_intermediate_size": 8192,
+  "model_type": "mixtral",
+  "mtp_transformer_layers": 1,
+  "num_attention_heads": 48,
+  "num_experts_per_tok": 8,
+  "num_hidden_layers": 62,
+  "num_key_value_heads": 8,
+  "num_local_experts": 256,
+  "num_mtp_modules": 3,
+  "output_router_logits": false,
+  "qk_norm_type": "per_layer",
+  "quantization_config": {
+    "quant_method": "awq",
+    "bits": 4,
+    "group_size": 128,
+    "version": "gemm",
+    "zero_point": true
+  },
+  "rms_norm_eps": 1e-06,
+  "rope_theta": 5000000,
+  "rotary_dim": 64,
+  "router_aux_loss_coef": 0.001,
+  "router_jitter_noise": 0.0,
+  "scoring_func": "sigmoid",
+  "shared_intermediate_size": 0,
+  "shared_moe_mode": "sigmoid",
+  "sliding_window": null,
+  "tie_word_embeddings": false,
+  "transformers_version": "4.46.1",
+  "use_cache": true,
+  "use_mtp": true,
+  "use_qk_norm": true,
+  "use_routing_bias": true,
+  "vocab_size": 200064,
+  "torch_dtype": "float16"
+}

docs/function_call_guide.md ADDED Viewed

	@@ -0,0 +1,482 @@

+# MiniMax-M2 Function Call Guide
+## Introduction
+The MiniMax-M2 model supports function calling capabilities, enabling the model to identify when external functions need to be called and output function call parameters in a structured format. This document provides detailed instructions on how to use the function calling features of MiniMax-M2.
+## Basic Example
+The following Python script implements a weather query function call example based on the OpenAI SDK:
+```python
+from openai import OpenAI
+import json
+client = OpenAI(base_url="http://localhost:8000/v1", api_key="dummy")
+def get_weather(location: str, unit: str):
+    return f"Getting the weather for {location} in {unit}..."
+tool_functions = {"get_weather": get_weather}
+tools = [{
+    "type": "function",
+    "function": {
+        "name": "get_weather",
+        "description": "Get the current weather in a given location",
+        "parameters": {
+            "type": "object",
+            "properties": {
+                "location": {"type": "string", "description": "City and state, e.g., 'San Francisco, CA'"},
+                "unit": {"type": "string", "enum": ["celsius", "fahrenheit"]}
+            },
+            "required": ["location", "unit"]
+        }
+    }
+}]
+response = client.chat.completions.create(
+    model=client.models.list().data[0].id,
+    messages=[{"role": "user", "content": "What's the weather like in San Francisco? use celsius."}],
+    tools=tools,
+    tool_choice="auto"
+)
+print(response)
+tool_call = response.choices[0].message.tool_calls[0].function
+print(f"Function called: {tool_call.name}")
+print(f"Arguments: {tool_call.arguments}")
+print(f"Result: {get_weather(**json.loads(tool_call.arguments))}")
+```
+**Output Example:**
+```
+Function called: get_weather
+Arguments: {"location": "San Francisco, CA", "unit": "celsius"}
+Result: Getting the weather for San Francisco, CA in celsius...
+```
+## Manually Parsing Model Output
+If you cannot use the built-in parser of inference engines that support MiniMax-M2, or need to use other inference frameworks (such as transformers, TGI, etc.), you can manually parse the model's raw output using the following method. This approach requires you to parse the XML tag format of the model output yourself.
+### Example Using Transformers
+Here is a complete example using the transformers library:
+```python
+from transformers import AutoTokenizer
+def get_default_tools():
+    return [
+        {
+          "name": "get_current_weather",
+          "description": "Get the latest weather for a location",
+          "parameters": {
+              "type": "object",
+              "properties": {
+                  "location": {
+                      "type": "string",
+                      "description": "A certain city, such as Beijing, Shanghai"
+                  }
+              },
+          }
+          "required": ["location"],
+          "type": "object"
+        }
+    ]
+# Load model and tokenizer
+tokenizer = AutoTokenizer.from_pretrained(model_id)
+prompt = "What's the weather like in Shanghai today?"
+messages = [
+    {"role": "system", "content": "You are a helpful assistant."},
+    {"role": "user", "content": prompt},
+]
+# Enable function calling tools
+tools = get_default_tools()
+# Apply chat template and include tool definitions
+text = tokenizer.apply_chat_template(
+    messages,
+    tokenize=False,
+    add_generation_prompt=True,
+    tools=tools
+)
+# Send request (using any inference service)
+import requests
+payload = {
+    "model": "MiniMaxAI/MiniMax-M2",
+    "prompt": text,
+    "max_tokens": 4096
+}
+response = requests.post(
+    "http://localhost:8000/v1/completions",
+    headers={"Content-Type": "application/json"},
+    json=payload,
+    stream=False,
+)
+# Model output needs manual parsing
+raw_output = response.json()["choices"][0]["text"]
+print("Raw output:", raw_output)
+# Use the parsing function below to process the output
+function_calls = parse_tool_calls(raw_output, tools)
+```
+## 🛠️ Function Call Definition
+### Function Structure
+Function calls need to define the `tools` field in the request body. Each function consists of the following parts:
+```json
+{
+  "tools": [
+    {
+      "name": "search_web",
+      "description": "Search function.",
+      "parameters": {
+        "properties": {
+          "query_list": {
+            "description": "Keywords for search, list should contain 1 element.",
+            "items": { "type": "string" },
+            "type": "array"
+          },
+          "query_tag": {
+            "description": "Category of query",
+            "items": { "type": "string" },
+            "type": "array"
+          }
+        },
+        "required": [ "query_list", "query_tag" ],
+        "type": "object"
+      }
+    }
+  ]
+}
+```
+**Field Descriptions:**
+- `name`: Function name
+- `description`: Function description
+- `parameters`: Function parameter definition
+  - `properties`: Parameter property definition, where key is the parameter name and value contains detailed parameter description
+  - `required`: List of required parameters
+  - `type`: Parameter type (usually "object")
+### Internal Processing Format
+When processing within the MiniMax-M2 model, function definitions are converted to a special format and concatenated to the input text. Here is a complete example:
+```
+]~!b[]~b]system
+You are a helpful assistant.
+# Tools
+You may call one or more tools to assist with the user query.
+Here are the tools available in JSONSchema format:
+<tools>
+<tool>{"name": "search_web", "description": "Search function.", "parameters": {"type": "object", "properties": {"query_list": {"type": "array", "items": {"type": "string"}, "description": "Keywords for search, list should contain 1 element."}, "query_tag": {"type": "array", "items": {"type": "string"}, "description": "Category of query"}}, "required": ["query_list", "query_tag"]}}</tool>
+</tools>
+When making tool calls, use XML format to invoke tools and pass parameters:
+<minimax:tool_call>
+<invoke name="tool-name-1">
+<parameter name="param-key-1">param-value-1</parameter>
+<parameter name="param-key-2">param-value-2</parameter>
+...
+</invoke>
+[e~[
+]~b]user
+When were the latest announcements from OpenAI and Gemini?[e~[
+]~b]ai
+<think>
+```
+**Format Description:**
+- `]~!b[]~b]system`: System message start marker
+- `[e~[`: Message end marker
+- `]~b]user`: User message start marker
+- `]~b]ai`: Assistant message start marker
+- `]~b]tool`: Tool result message start marker
+- `<tools>...</tools>`: Tool definition area, each tool is wrapped with `<tool>` tag, content is JSON Schema
+- `<minimax:tool_call>...</minimax:tool_call>`: Tool call area
+- `<think>`: Thinking process marker during generation (optional)
+### Model Output Format
+MiniMax-M2 uses structured XML tag format:
+```xml
+<minimax:tool_call>
+<invoke name="search_web">
+<parameter name="query_tag">["technology", "events"]</parameter>
+<parameter name="query_list">["\"OpenAI\" \"latest\" \"release\""]</parameter>
+</invoke>
+<invoke name="search_web">
+<parameter name="query_tag">["technology", "events"]</parameter>
+<parameter name="query_list">["\"Gemini\" \"latest\" \"release\""]</parameter>
+</invoke>
+</minimax:tool_call>
+```
+Each function call uses the `<invoke name="function_name">` tag, and parameters use the `<parameter name="parameter_name">` tag wrapper.
+## Manually Parsing Function Call Results
+### Parsing Function Calls
+MiniMax-M2 uses structured XML tags, which require a different parsing approach. The core function is as follows:
+```python
+import re
+import json
+from typing import Any, Optional, List, Dict
+def extract_name(name_str: str) -> str:
+    """Extract name from quoted string"""
+    name_str = name_str.strip()
+    if name_str.startswith('"') and name_str.endswith('"'):
+        return name_str[1:-1]
+    elif name_str.startswith("'") and name_str.endswith("'"):
+        return name_str[1:-1]
+    return name_str
+def convert_param_value(value: str, param_type: str) -> Any:
+    """Convert parameter value based on parameter type"""
+    if value.lower() == "null":
+        return None
+    param_type = param_type.lower()
+    if param_type in ["string", "str", "text"]:
+        return value
+    elif param_type in ["integer", "int"]:
+        try:
+            return int(value)
+        except (ValueError, TypeError):
+            return value
+    elif param_type in ["number", "float"]:
+        try:
+            val = float(value)
+            return val if val != int(val) else int(val)
+        except (ValueError, TypeError):
+            return value
+    elif param_type in ["boolean", "bool"]:
+        return value.lower() in ["true", "1"]
+    elif param_type in ["object", "array"]:
+        try:
+            return json.loads(value)
+        except json.JSONDecodeError:
+            return value
+    else:
+        # Try JSON parsing, return string if failed
+        try:
+            return json.loads(value)
+        except json.JSONDecodeError:
+            return value
+def parse_tool_calls(model_output: str, tools: Optional[List[Dict]] = None) -> List[Dict]:
+    """
+    Extract all tool calls from model output
+    Args:
+        model_output: Complete output text from the model
+        tools: Tool definition list for getting parameter type information, format can be:
+               - [{"name": "...", "parameters": {...}}]
+               - [{"type": "function", "function": {"name": "...", "parameters": {...}}}]
+    Returns:
+        Parsed tool call list, each element contains name and arguments fields
+    Example:
+        >>> tools = [{
+        ...     "name": "get_weather",
+        ...     "parameters": {
+        ...         "type": "object",
+        ...         "properties": {
+        ...             "location": {"type": "string"},
+        ...             "unit": {"type": "string"}
+        ...         }
+        ...     }
+        ... }]
+        >>> output = '''<minimax:tool_call>
+        ... <invoke name="get_weather">
+        ... <parameter name="location">San Francisco</parameter>
+        ... <parameter name="unit">celsius</parameter>
+        ... </invoke>
+        ... </minimax:tool_call>'''
+        >>> result = parse_tool_calls(output, tools)
+        >>> print(result)
+        [{'name': 'get_weather', 'arguments': {'location': 'San Francisco', 'unit': 'celsius'}}]
+    """
+    # Quick check if tool call marker is present
+    if "<minimax:tool_call>" not in model_output:
+        return []
+    tool_calls = []
+    try:
+        # Match all <minimax:tool_call> blocks
+        tool_call_regex = re.compile(r"<minimax:tool_call>(.*?)</minimax:tool_call>", re.DOTALL)
+        invoke_regex = re.compile(r"<invoke name=(.*?)</invoke>", re.DOTALL)
+        parameter_regex = re.compile(r"<parameter name=(.*?)</parameter>", re.DOTALL)
+        # Iterate through all tool_call blocks
+        for tool_call_match in tool_call_regex.findall(model_output):
+            # Iterate through all invokes in this block
+            for invoke_match in invoke_regex.findall(tool_call_match):
+                # Extract function name
+                name_match = re.search(r'^([^>]+)', invoke_match)
+                if not name_match:
+                    continue
+                function_name = extract_name(name_match.group(1))
+                # Get parameter configuration
+                param_config = {}
+                if tools:
+                    for tool in tools:
+                        tool_name = tool.get("name") or tool.get("function", {}).get("name")
+                        if tool_name == function_name:
+                            params = tool.get("parameters") or tool.get("function", {}).get("parameters")
+                            if isinstance(params, dict) and "properties" in params:
+                                param_config = params["properties"]
+                            break
+                # Extract parameters
+                param_dict = {}
+                for match in parameter_regex.findall(invoke_match):
+                    param_match = re.search(r'^([^>]+)>(.*)', match, re.DOTALL)
+                    if param_match:
+                        param_name = extract_name(param_match.group(1))
+                        param_value = param_match.group(2).strip()
+                        # Remove leading and trailing newlines
+                        if param_value.startswith('\n'):
+                            param_value = param_value[1:]
+                        if param_value.endswith('\n'):
+                            param_value = param_value[:-1]
+                        # Get parameter type and convert
+                        param_type = "string"
+                        if param_name in param_config:
+                            if isinstance(param_config[param_name], dict) and "type" in param_config[param_name]:
+                                param_type = param_config[param_name]["type"]
+                        param_dict[param_name] = convert_param_value(param_value, param_type)
+                tool_calls.append({
+                    "name": function_name,
+                    "arguments": param_dict
+                })
+    except Exception as e:
+        print(f"Failed to parse tool calls: {e}")
+        return []
+    return tool_calls
+```
+**Usage Example:**
+```python
+# Define tools
+tools = [
+    {
+        "name": "get_weather",
+        "parameters": {
+            "type": "object",
+            "properties": {
+                "location": {"type": "string"},
+                "unit": {"type": "string"}
+            },
+            "required": ["location", "unit"]
+        }
+    }
+]
+# Model output
+model_output = """Let me help you query the weather.
+<minimax:tool_call>
+<invoke name="get_weather">
+<parameter name="location">San Francisco</parameter>
+<parameter name="unit">celsius</parameter>
+</invoke>
+</minimax:tool_call>"""
+# Parse tool calls
+tool_calls = parse_tool_calls(model_output, tools)
+# Output results
+for call in tool_calls:
+    print(f"Function called: {call['name']}")
+    print(f"Arguments: {call['arguments']}")
+    # Output: Function called: get_weather
+    #         Arguments: {'location': 'San Francisco', 'unit': 'celsius'}
+```
+### Executing Function Calls
+After parsing is complete, you can execute the corresponding function and construct the return result:
+```python
+def execute_function_call(function_name: str, arguments: dict):
+    """Execute function call and return result"""
+    if function_name == "get_weather":
+        location = arguments.get("location", "Unknown location")
+        unit = arguments.get("unit", "celsius")
+        # Build function execution result
+        return {
+            "role": "tool",
+            "content": [
+              {
+                "name": function_name,
+                "type": "text",
+                "text": json.dumps({
+                    "location": location,
+                    "temperature": "25",
+                    "unit": unit,
+                    "weather": "Sunny"
+                }, ensure_ascii=False)
+              }
+            ]
+          }
+    elif function_name == "search_web":
+        query_list = arguments.get("query_list", [])
+        query_tag = arguments.get("query_tag", [])
+        # Simulate search results
+        return {
+            "role": "tool",
+            "content": [
+              {
+                "name": function_name,
+                "type": "text",
+                "text": f"Search keywords: {query_list}, Category: {query_tag}\nSearch results: Relevant information found"
+              }
+            ]
+          }
+    return None
+```
+### Returning Function Execution Results to the Model
+After successfully parsing function calls, you should add the function execution results to the conversation history so that the model can access and utilize this information in subsequent interactions. Refer to chat_template.jinja for concatenation format.
+## References
+- [MiniMax-M2 Model Repository](https://github.com/MiniMax-AI/MiniMax-M2)
+- [vLLM Project Homepage](https://github.com/vllm-project/vllm)
+- [OpenAI Python SDK](https://github.com/openai/openai-python)

docs/vllm_deploy_guide.md ADDED Viewed

	@@ -0,0 +1,88 @@

+# MiniMax M2 Model vLLM Deployment Guide
+We recommend using [vLLM](https://docs.vllm.ai/en/stable/) to deploy the [MiniMax-M2](https://huggingface.co/MiniMaxAI/MiniMax-M2) model. vLLM is a high-performance inference engine with excellent serving throughput, efficient and intelligent memory management, powerful batch request processing capabilities, and deeply optimized underlying performance. We recommend reviewing vLLM's official documentation to check hardware compatibility before deployment.
+## System Requirements
+- OS: Linux
+- Python: 3.9 - 3.12
+- GPU:
+  - compute capability 7.0 or higher
+  - Memory requirements: 220 GB for weights, 60 GB per 1M context tokens
+The following are recommended configurations; actual requirements should be adjusted based on your use case:
+- 4x 96GB GPUs: Supports context input of up to 400K tokens.
+- 8x 144GB GPUs: Supports context input of up to 3M tokens.
+## Deployment with Python
+It is recommended to use a virtual environment (such as venv, conda, or uv) to avoid dependency conflicts. We recommend installing vLLM in a fresh Python environment:
+```bash
+# Not yet released, please install nightly build
+uv pip install -U vllm \
+    --torch-backend=auto \
+    --extra-index-url https://wheels.vllm.ai/nightly
+# If released, install using uv
+uv pip install "vllm" --torch-backend=auto
+```
+Run the following command to start the vLLM server. vLLM will automatically download and cache the MiniMax-M2 model from Hugging Face.
+4-GPU deployment command:
+```bash
+SAFETENSORS_FAST_GPU=1 VLLM_USE_V1=0 vllm serve \
+    --model MiniMaxAI/MiniMax-M2 \
+    --trust-remote-code \
+    --enable-expert-parallel --tensor-parallel-size 4 \
+    --enable-auto-tool-choice --tool-call-parser minimax_m2 \
+    --reasoning-parser minimax_m2
+```
+## Testing Deployment
+After startup, you can test the vLLM OpenAI-compatible API with the following command:
+```bash
+curl http://localhost:8000/v1/chat/completions \
+    -H "Content-Type: application/json" \
+    -d '{
+        "model": "MiniMaxAI/MiniMax-M2",
+        "messages": [
+            {"role": "system", "content": [{"type": "text", "text": "You are a helpful assistant."}]},
+            {"role": "user", "content": [{"type": "text", "text": "Who won the world series in 2020?"}]}
+        ]
+    }'
+```
+## Common Issues
+### Hugging Face Network Issues
+If you encounter network issues, you can set up a proxy before pulling the model.
+```bash
+export HF_ENDPOINT=https://hf-mirror.com
+```
+### MiniMax-M2 model is not currently supported
+This vLLM version is outdated. Please upgrade to the latest version.
+## Getting Support
+If you encounter any issues while deploying the MiniMax model:
+- Contact our technical support team through official channels such as email at api@minimaxi.com
+- Submit an issue on our [GitHub](https://github.com/MiniMax-AI) repository
+We continuously optimize the deployment experience for our models. Feedback is welcome!

figures/Bench.png ADDED Viewed

Git LFS Details

SHA256: 5ca5e3f1bc81738c76de3e7b86f5e329334ef38d74690dfb73e046fe13324322
Pointer size: 131 Bytes
Size of remote file: 162 kB

model-00001-of-00041.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b1bb5393a7818d3160927d830676cbfb166032ec18fd92d13b3f27c4fead0987
+size 2998507472

model-00002-of-00041.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c0548c74136e47f31d99f1ec36e2f7ba26af4607dba634f0d53aad4d2542d26e
+size 3000140360

model-00003-of-00041.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:501907424df6366c8347ab5563c87be221fc943425f9b63beda480e5cefcd6e2
+size 2999224328

model-00004-of-00041.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9ad6ba2e48b9f3694285125fc8f775477aa5d9f8c6f2fb722622d0b77483efbd
+size 3000139080

model-00005-of-00041.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:fbad54e6e5b5e458182b0ae33394a7bff87f02be2709913d7d0e3bd6f9464338
+size 2999224664

model-00006-of-00041.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:559f18c233a7a3b23270cb5078cdb2a0d459a4096006bc86bcdaca31a611476c
+size 2999223312

model-00007-of-00041.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:bff913bf65ab332dfb0ea6eaf76cc4c7b57389c6df7beb80e631cca298d3e1cd
+size 3000141216

model-00008-of-00041.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:353643fc6a9a90c7e2b098d3f272e7f312fc5a3d367a5255bd9fd2b0bea52a8c
+size 2999226896

model-00009-of-00041.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9b8c84b2ee3746464971bf11e3ef07ba26ae0b8845882602c5ca2cbad2e09ca1
+size 3000144016

model-00010-of-00041.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8a2a546c16ae0aefe42d03d3812473b1baf3286d7b7a8b12082731a9c083ea0c
+size 2999227968

model-00015-of-00041.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:24fba6f5010a7d2aff61906259532594faddf86cce972a90af6971f7fd96232b
+size 2999226896

model-00040-of-00041.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0c2bddb39d77fe9b52719bd5e4404da0709846b34c941cb012544fde683d453a
+size 2606931696

model-00041-of-00041.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e9a14e18c736c0fadff1985dd1826447c2e593564d68ab10b844177325c854e5
+size 1229199576

model.safetensors.index.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9734599c8e3e00dbb5612bfe264edae70e2f994696b54ad57f70a1faf7beb251
+size 14057058

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,495 @@

+{
+  "added_tokens_decoder": {
+  "200000": {
+      "content": "]!p~[",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+  },
+  "200001": {
+      "content": "<fim_prefix>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+  },
+  "200002": {
+      "content": "<fim_middle>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+  },
+  "200003": {
+      "content": "<fim_suffix>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+  },
+  "200004": {
+      "content": "<fim_pad>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+  },
+  "200005": {
+      "content": "<reponame>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+  },
+  "200006": {
+      "content": "<filename>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+  },
+  "200007": {
+      "content": "<gh_stars>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+  },
+  "200008": {
+      "content": "<issue_start>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+  },
+  "200009": {
+      "content": "<issue_comment>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+  },
+  "200010": {
+      "content": "<issue_closed>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+  },
+  "200011": {
+    "content": "<jupyter_start>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false,
+    "special": true
+  },
+  "200012": {
+      "content": "<jupyter_text>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+  },
+  "200013": {
+      "content": "<jupyter_code>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+  },
+  "200014": {
+      "content": "<jupyter_output>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+  },
+  "200015": {
+    "content": "<empty_output>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false,
+    "special": true
+  },
+  "200016": {
+      "content": "<commit_before>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+  },
+  "200017": {
+      "content": "<commit_msg>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+  },
+  "200018": {
+      "content": "<commit_after>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+  },
+  "200019": {
+      "content": "]~b]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+  },
+  "200020": {
+      "content": "[e~[",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+  },
+  "200021": {
+      "content": "]!d~[",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+  },
+  "200022": {
+      "content": "<function_call>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+  },
+  "200023": {
+      "content": "<code_interpreter>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+  },
+  "200024": {
+      "content": "]<]speech[>[",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+  },
+  "200025": {
+      "content": "]<]image[>[",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+  },
+  "200026": {
+      "content": "]<]video[>[",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+  },
+  "200027": {
+      "content": "]<]start of speech[>[",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+  },
+  "200028": {
+      "content": "]<]end of speech[>[",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+  },
+  "200029": {
+      "content": "]<]start of image[>[",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+  },
+  "200030": {
+      "content": "]<]end of image[>[",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+  },
+  "200031": {
+      "content": "]<]start of video[>[",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+  },
+  "200032": {
+      "content": "]<]end of video[>[",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+  },
+  "200033": {
+      "content": "]<]vision pad[>[",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+  },
+  "200034": {
+      "content": "]~!b[",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+  },
+  "200035": {
+      "content": "<jupyter_error>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+  },
+  "200036": {
+      "content": "<add_file>",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+  },
+  "200037": {
+      "content": "<delete_file>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+  },
+  "200038": {
+      "content": "<rename_file>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+  },
+  "200039": {
+      "content": "<edit_file>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+  },
+  "200040": {
+      "content": "<commit_message>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+  },
+  "200041": {
+      "content": "<empty_source_file>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+  },
+  "200042": {
+      "content": "<repo_struct>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+  },
+  "200043": {
+    "content": "<code_context>",
+    "single_word": false,
+    "lstrip": false,
+    "rstrip": false,
+    "normalized": false,
+    "special": true
+  },
+  "200044": {
+    "content": "<file_content>",
+    "single_word": false,
+    "lstrip": false,
+    "rstrip": false,
+    "normalized": false,
+    "special": true
+  },
+  "200045": {
+    "content": "<source_files>",
+    "single_word": false,
+    "lstrip": false,
+    "rstrip": false,
+    "normalized": false,
+    "special": true
+  },
+  "200046": {
+    "content": "<pr_start>",
+    "single_word": false,
+    "lstrip": false,
+    "rstrip": false,
+    "normalized": false,
+    "special": true
+  },
+  "200047": {
+    "content": "<review_comment>",
+    "single_word": false,
+    "lstrip": false,
+    "rstrip": false,
+    "normalized": false,
+    "special": true
+  },
+  "200048": {
+    "content": "<filepath>",
+    "single_word": false,
+    "lstrip": false,
+    "rstrip": false,
+    "normalized": false,
+    "special": true
+  },
+  "200049": {
+    "content": "<file_sep>",
+    "single_word": false,
+    "lstrip": false,
+    "rstrip": false,
+    "normalized": false,
+    "special": true
+  },
+  "200050": {
+    "content": "<think>",
+    "single_word": false,
+    "lstrip": false,
+    "rstrip": false,
+    "normalized": false,
+    "special": false
+  },
+  "200051": {
+    "content": "</think>",
+    "single_word": false,
+    "lstrip": false,
+    "rstrip": false,
+    "normalized": false,
+    "special": false
+  },
+  "200052": {
+    "content": "<minimax:tool_call>",
+    "single_word": false,
+    "lstrip": false,
+    "rstrip": false,
+    "normalized": false,
+    "special": false
+  },
+  "200053": {
+    "content": "</minimax:tool_call>",
+    "single_word": false,
+    "lstrip": false,
+    "rstrip": false,
+    "normalized": false,
+    "special": false
+  }
+  },
+  "additional_special_tokens": [
+        "<code_interpreter>",
+        "<commit_after>",
+        "<commit_before>",
+        "<commit_msg>",
+        "<empty_output>",
+        "<filename>",
+        "<fim_middle>",
+        "<fim_pad>",
+        "<fim_prefix>",
+        "<fim_suffix>",
+        "<function_call>",
+        "<gh_stars>",
+        "]<]speech[>[",
+        "]<]image[>[",
+        "]<]video[>[",
+        "]<]start of speech[>[",
+        "]<]end of speech[>[",
+        "]<]start of image[>[",
+        "]<]end of image[>[",
+        "]<]start of video[>[",
+        "]<]end of video[>[",
+        "]<]vision pad[>[",
+        "]~!b[",
+        "<issue_closed>",
+        "<issue_comment>",
+        "<issue_start>",
+        "<jupyter_code>",
+        "<jupyter_output>",
+        "<jupyter_start>",
+        "<jupyter_text>",
+        "<reponame>",
+        "[e~[",
+        "]!d~[",
+        "]!p~[",
+        "]~b]",
+        "<jupyter_error>",
+        "<add_file>",
+        "<delete_file>",
+        "<rename_file>",
+        "<edit_file>",
+        "<commit_message>",
+        "<empty_source_file>",
+        "<repo_struct>",
+        "<code_context>",
+        "<file_content>",
+        "<source_files>",
+        "<pr_start>",
+        "<review_comment>",
+        "<filepath>",
+        "<file_sep>"
+    ],
+  "add_prefix_space": false,
+  "bos_token": "]~!b[",
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "[e~[",
+  "model_max_length": 40960000,
+  "tokenizer_class": "GPT2Tokenizer",
+  "unk_token": "]!d~["
+}