chat_bot_server

Running

chat_bot_server / app.py

Update app.py

d86b208 verified 2 days ago

1.1 kB

	from fastapi import FastAPI
	from pydantic import BaseModel
	from llama_cpp import Llama

	app = FastAPI()

	qwen3_gguf_llm = Llama.from_pretrained(
	repo_id="unsloth/Qwen3-0.6B-GGUF",
	filename="Qwen3-0.6B-UD-Q8_K_XL.gguf"
	)

	class PromptRequest(BaseModel):
	prompt: str

	class GenerateResponse(BaseModel):
	generated_text: str

	# Simple in-memory conversation memory (list of messages)
	conversation_history = []

	@app.post("/generate/qwen3-0.6b-gguf", response_model=GenerateResponse)
	async def generate_qwen3_gguf_endpoint(request: PromptRequest):
	# Append user message to history
	conversation_history.append({"role": "user", "content": request.prompt})

	# Call the model with full conversation history
	response = qwen3_gguf_llm.create_chat_completion(messages=conversation_history)

	# Extract assistant reply
	assistant_message = response['choices'][0]['message']['content']

	# Append assistant reply to history
	conversation_history.append({"role": "assistant", "content": assistant_message})

	return GenerateResponse(generated_text=assistant_message)