kimi-k2-thinking can't use a tool with 2 required arguments if it has 2 or more tools.
#30
by
Yerkhat
- opened
This is a strange problem. When kimi-k2-thinking had only one tool with two required arguments, it worked fine. After I added another tool to test its interleaved reasoning ability, kimi-k2-thinking started refusing to call the first tool correctly — it now omits one of the required arguments
I am running the model using vLLM. Maybe their parser is not correctly written.
error:
TypeError: get_current_weather() missing 1 required positional argument: 'temperature_metric'
full code (sorry for excessive prints):
import os
import json
from dotenv import load_dotenv
from openai import OpenAI
# -----------------------------
# Load environment variables
# -----------------------------
load_dotenv()
API_URL = os.getenv("API_URL")
API_KEY = os.getenv("API_KEY")
# -----------------------------
# OpenAI client (compatible with vLLM/Kimi K2)
# -----------------------------
client = OpenAI(
base_url=API_URL,
api_key=API_KEY
)
MODEL_NAME = "kimi-k2-thinking"
# -----------------------------
# Define your tools
# -----------------------------
def get_current_weather(location: str, temperature_metric: str):
"""Get the current weather in a given location"""
if temperature_metric.lower() == "celsius":
return f"The current temperature in {location} is 22°C."
else:
return f"The current temperature in {location} is 72°F."
def get_local_time(location: str):
"""Get the current local time in a given location"""
# Simulate different times for different locations
time_offsets = {
"San Francisco": -8,
"New York": -5,
"London": 0,
"Tokyo": 9,
"Sydney": 11
}
# Find matching location (case-insensitive partial match)
offset = 0
for city, hrs in time_offsets.items():
if city.lower() in location.lower():
offset = hrs
break
# Calculate local time (simplified - not accounting for DST)
from datetime import datetime, timedelta, UTC
utc_now = datetime.now(UTC)
local_time = utc_now + timedelta(hours=offset)
return f"The current local time in {location} is {local_time.strftime('%I:%M %p')} (UTC{offset:+d})"
# Tool definitions in OpenAI format
tools = [
{
"type": "function",
"function": {
"name": "get_weather",
"description": "Get the current weather in a given location",
"parameters": {
"type": "object",
"properties": {
"location": {
"type": "string",
"description": "City and state, e.g., 'San Francisco, CA'",
},
"temperature_metric": {
"type": "string",
"enum": ["celsius", "fahrenheit"]
},
},
"required": ["location", "temperature_metric"],
},
},
},
{
"type": "function",
"function": {
"name": "get_local_time",
"description": "Get the current local time in a given location",
"parameters": {
"type": "object",
"properties": {
"location": {
"type": "string",
"description": "City and state, e.g., 'San Francisco, CA'",
},
},
"required": ["location"],
},
},
}
]
# Map tool names to functions
available_tools = {"get_weather": get_current_weather, "get_local_time": get_local_time}
# -----------------------------
# Agent execution with reasoning
# -----------------------------
def run_agent_with_reasoning(query: str, max_iterations: int = 100):
"""
Run agent with interleaved reasoning support.
Properly handles tool calls and reasoning traces.
"""
messages = [{"role": "user", "content": query}]
for iteration in range(max_iterations):
print(f"\n--- Iteration {iteration + 1} ---")
# Call LLM with tools
response = client.chat.completions.create(
model=MODEL_NAME,
messages=messages,
tools=tools,
tool_choice="auto",
)
response_message = response.choices[0].message
# Store the complete response for history
# Convert response_message to dict to preserve all fields
assistant_message = {
"role": "assistant",
"content": response_message.content,
"reasoning": response_message.reasoning,
}
# Check if there are tool calls
if response_message.tool_calls:
assistant_message["tool_calls"] = response_message.tool_calls
print(f"Reasoning: {response_message.reasoning}")
print(f"Tool calls: {[tc.function.name for tc in response_message.tool_calls]}")
# Append assistant message with tool calls and reasoning
messages.append({
"role": "assistant",
"tool_calls": response_message.tool_calls,
"reasoning": response_message.reasoning,
})
# Execute each tool call
for tool_call in response_message.tool_calls:
tool_name = tool_call.function.name
tool_args = json.loads(tool_call.function.arguments)
print(f"DEBUG - Tool: {tool_name}, Args: {tool_args}")
# Execute the tool
if tool_name in available_tools:
tool_result = available_tools[tool_name](**tool_args)
else:
tool_result = f"Error: Tool {tool_name} not found"
print(f"Tool {tool_name} result: {tool_result}")
# Append tool result to messages
messages.append({
"role": "tool",
"content": tool_result,
"tool_call_id": tool_call.id,
"name": tool_name,
})
# Continue to next iteration to get final response
continue
else:
# No tool calls - this is the final answer
print(f"\nFinal reasoning: {response_message.reasoning}")
print(f"Final answer: {response_message.content}")
return {
"answer": response_message.content,
"reasoning": response_message.reasoning,
"messages": messages
}
raise RuntimeError(f"Max iterations ({max_iterations}) exceeded without final answer")
# -----------------------------
# Run the agent
# -----------------------------
if __name__ == "__main__":
result = run_agent_with_reasoning(
"What's the weather in San Francisco in celcius? (I need strictly in celcius, do not forget it when calling tools) Also, what's the local time there?"
)
print("\n" + "="*50)
print("FINAL RESULT:")
print("="*50)
print(f"Answer: {result['answer']}")
print(f"\nReasoning: {result['reasoning']}")
print("\n" + "="*50)
print("COMPLETE MESSAGE HISTORY:")
print("="*50)
for i, msg in enumerate(result['messages']):
print(f"\nMessage {i + 1}:")
print(json.dumps(msg, indent=2, default=str))