kimi-k2-thinking can't use a tool with 2 required arguments if it has 2 or more tools.

#30
by Yerkhat - opened

This is a strange problem. When kimi-k2-thinking had only one tool with two required arguments, it worked fine. After I added another tool to test its interleaved reasoning ability, kimi-k2-thinking started refusing to call the first tool correctly — it now omits one of the required arguments

I am running the model using vLLM. Maybe their parser is not correctly written.

error:

TypeError: get_current_weather() missing 1 required positional argument: 'temperature_metric'

full code (sorry for excessive prints):

import os
import json
from dotenv import load_dotenv
from openai import OpenAI

# -----------------------------
# Load environment variables
# -----------------------------
load_dotenv()
API_URL = os.getenv("API_URL")
API_KEY = os.getenv("API_KEY")

# -----------------------------
# OpenAI client (compatible with vLLM/Kimi K2)
# -----------------------------
client = OpenAI(
    base_url=API_URL,
    api_key=API_KEY
)

MODEL_NAME = "kimi-k2-thinking"

# -----------------------------
# Define your tools
# -----------------------------
def get_current_weather(location: str, temperature_metric: str):
    """Get the current weather in a given location"""
    if temperature_metric.lower() == "celsius":
        return f"The current temperature in {location} is 22°C."
    else:
        return f"The current temperature in {location} is 72°F."
    
def get_local_time(location: str):
    """Get the current local time in a given location"""
    # Simulate different times for different locations
    time_offsets = {
        "San Francisco": -8,
        "New York": -5,
        "London": 0,
        "Tokyo": 9,
        "Sydney": 11
    }
    
    # Find matching location (case-insensitive partial match)
    offset = 0
    for city, hrs in time_offsets.items():
        if city.lower() in location.lower():
            offset = hrs
            break
    
    # Calculate local time (simplified - not accounting for DST)
    from datetime import datetime, timedelta, UTC
    utc_now = datetime.now(UTC)
    local_time = utc_now + timedelta(hours=offset)
    
    return f"The current local time in {location} is {local_time.strftime('%I:%M %p')} (UTC{offset:+d})"

# Tool definitions in OpenAI format
tools = [
    {
        "type": "function",
        "function": {
            "name": "get_weather",
            "description": "Get the current weather in a given location",
            "parameters": {
                "type": "object",
                "properties": {
                    "location": {
                        "type": "string",
                        "description": "City and state, e.g., 'San Francisco, CA'",
                    },
                    "temperature_metric": {
                        "type": "string", 
                        "enum": ["celsius", "fahrenheit"]
                    },
                },
                "required": ["location", "temperature_metric"],
            },
        },
    },
    {
        "type": "function",
        "function": {
            "name": "get_local_time",
            "description": "Get the current local time in a given location",
            "parameters": {
                "type": "object",
                "properties": {
                    "location": {
                        "type": "string",
                        "description": "City and state, e.g., 'San Francisco, CA'",
                    },
                },
                "required": ["location"],
            },
        },
    }
]

# Map tool names to functions
available_tools = {"get_weather": get_current_weather, "get_local_time": get_local_time}
# -----------------------------
# Agent execution with reasoning
# -----------------------------
def run_agent_with_reasoning(query: str, max_iterations: int = 100):
    """
    Run agent with interleaved reasoning support.
    Properly handles tool calls and reasoning traces.
    """
    messages = [{"role": "user", "content": query}]
    
    for iteration in range(max_iterations):
        print(f"\n--- Iteration {iteration + 1} ---")
        
        # Call LLM with tools
        response = client.chat.completions.create(
            model=MODEL_NAME,
            messages=messages,
            tools=tools,
            tool_choice="auto",
        )
        
        response_message = response.choices[0].message
        
        # Store the complete response for history
        # Convert response_message to dict to preserve all fields
        assistant_message = {
            "role": "assistant",
            "content": response_message.content,
            "reasoning": response_message.reasoning,
        }
        
        # Check if there are tool calls
        if response_message.tool_calls:
            assistant_message["tool_calls"] = response_message.tool_calls
            print(f"Reasoning: {response_message.reasoning}")
            print(f"Tool calls: {[tc.function.name for tc in response_message.tool_calls]}")
            
            # Append assistant message with tool calls and reasoning
            messages.append({
                "role": "assistant",
                "tool_calls": response_message.tool_calls,
                "reasoning": response_message.reasoning,
            })
            
            # Execute each tool call
            for tool_call in response_message.tool_calls:
                tool_name = tool_call.function.name
                tool_args = json.loads(tool_call.function.arguments)
                
                print(f"DEBUG - Tool: {tool_name}, Args: {tool_args}")
                
                # Execute the tool
                if tool_name in available_tools:
                    tool_result = available_tools[tool_name](**tool_args)
                else:
                    tool_result = f"Error: Tool {tool_name} not found"
                
                print(f"Tool {tool_name} result: {tool_result}")
                
                # Append tool result to messages
                messages.append({
                    "role": "tool",
                    "content": tool_result,
                    "tool_call_id": tool_call.id,
                    "name": tool_name,
                })
            
            # Continue to next iteration to get final response
            continue
        else:
            # No tool calls - this is the final answer
            print(f"\nFinal reasoning: {response_message.reasoning}")
            print(f"Final answer: {response_message.content}")
            return {
                "answer": response_message.content,
                "reasoning": response_message.reasoning,
                "messages": messages
            }
    
    raise RuntimeError(f"Max iterations ({max_iterations}) exceeded without final answer")

# -----------------------------
# Run the agent
# -----------------------------
if __name__ == "__main__":
    result = run_agent_with_reasoning(
        "What's the weather in San Francisco in celcius? (I need strictly in celcius, do not forget it when calling tools) Also, what's the local time there?"
    )
    
    print("\n" + "="*50)
    print("FINAL RESULT:")
    print("="*50)
    print(f"Answer: {result['answer']}")
    print(f"\nReasoning: {result['reasoning']}")
    
    print("\n" + "="*50)
    print("COMPLETE MESSAGE HISTORY:")
    print("="*50)
    for i, msg in enumerate(result['messages']):
        print(f"\nMessage {i + 1}:")
        print(json.dumps(msg, indent=2, default=str))

Sign up or log in to comment