import gradio as gr from huggingface_hub import InferenceClient import os MODEL_NAME = "meta-llama/Llama-2-7b-chat" HF_TOKEN = os.getenv("API_TOKEN_2") def query_model(prompt): if not prompt or not prompt.strip(): return "Please enter a prompt." try: client = InferenceClient(model=MODEL_NAME, token=HF_TOKEN) formatted_prompt = f"[INST] {prompt.strip()} [/INST]" response = client.text_generation( formatted_prompt, max_new_tokens=300, temperature=0.6, top_p=0.9, repetition_penalty=1.1, do_sample=True, return_full_text=False ) return response if response else "(No response from model.)" except Exception as e: return f"Error: {str(e)}\n\nThis can happen if the model is gated, requires a Hugging Face token, or you need to accept its terms of use on the Hugging Face website." gr.Interface( fn=query_model, inputs=gr.Textbox(lines=4, label="Enter your prompt:"), outputs=gr.Textbox(lines=10, label="Model Response"), title="Simple Mistral-7B-Instruct Demo", description="Enter a prompt and get a response from mistralai/Mistral-7B-Instruct-v0.1." ).launch()