# !pip install llama-cpp-python import gradio as gr from huggingface_hub import InferenceClient from llama_cpp import Llama llm = Llama.from_pretrained( repo_id="gultar/OpenHermes-Llama-3b-GGUF", filename="openhermes-llama-3b-q5_0.gguf", ) output = llm( "Once upon a time,", max_tokens=512, echo=True ) print(output)