from llama_cpp import Llama llm = Llama(model_path="models/llama-3-8b-instruct.Q4_K_M.gguf") output = llm("Q: What is the capital of Kenya? A:", max_tokens=32) print(output["choices"][0]["text"])