import gradio as gr
from huggingface_hub import InferenceClient
import os

MODEL_NAME = "meta-llama/Llama-2-7b-chat"
HF_TOKEN = os.getenv("API_TOKEN_2")

def query_model(prompt):
    if not prompt or not prompt.strip():
        return "Please enter a prompt."
    try:
        client = InferenceClient(model=MODEL_NAME, token=HF_TOKEN)
        formatted_prompt = f"<s>[INST] {prompt.strip()} [/INST]"
        response = client.text_generation(
            formatted_prompt,
            max_new_tokens=300,
            temperature=0.6,
            top_p=0.9,
            repetition_penalty=1.1,
            do_sample=True,
            return_full_text=False
        )
        return response if response else "(No response from model.)"
    except Exception as e:
        return f"Error: {str(e)}\n\nThis can happen if the model is gated, requires a Hugging Face token, or you need to accept its terms of use on the Hugging Face website."

gr.Interface(
    fn=query_model,
    inputs=gr.Textbox(lines=4, label="Enter your prompt:"),
    outputs=gr.Textbox(lines=10, label="Model Response"),
    title="Simple Mistral-7B-Instruct Demo",
    description="Enter a prompt and get a response from mistralai/Mistral-7B-Instruct-v0.1."
).launch()