import gradio as gr from huggingface_hub import InferenceClient # -- 1) DEFINE YOUR MODELS HERE -- models = [ { "name": "Tiny Model", "description": "A small chat model.", "id": "amusktweewt/tiny-model-500M-chat-v2", "enabled": True }, { "name": "Another Model", "description": "A bigger chat model (disabled).", "id": "another-model", "enabled": False } ] # Build the custom HTML for a disabled-capable {dropdown_options} """ def respond(message, history: list[tuple[str, str]], model_id, system_message, max_tokens, temperature, top_p): """ Build a chat prompt and stream the response token-by-token from the model. """ client = InferenceClient(model_id) messages = [] if system_message: messages.append({"role": "system", "content": system_message}) if history: for user_msg, bot_msg in history: messages.append({"role": "user", "content": user_msg}) messages.append({"role": "assistant", "content": bot_msg}) messages.append({"role": "user", "content": message}) messages.append({"role": "assistant", "content": ""}) response_text = "" for resp in client.chat_completion( messages, max_tokens=max_tokens, stream=True, temperature=temperature, top_p=top_p, ): token = resp.choices[0].delta.content response_text += token yield response_text # -- 3) BUILD THE UI IN A BLOCKS CONTEXT -- with gr.Blocks() as demo: # Custom HTML dropdown for model selection. gr.HTML(value=dropdown_html) # Hidden textbox to store the current model ID. hidden_model = gr.Textbox( value=models[0]["id"], # Default to the first model visible=False, elem_id="hidden_model" ) # ChatInterface with an element ID for styling. chat = gr.ChatInterface( respond, additional_inputs=[ hidden_model, gr.Textbox( value="You are a friendly Chatbot.", label="System message" ), gr.Slider( minimum=1, maximum=2048, value=512, step=1, label="Max new tokens" ), gr.Slider( minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature" ), gr.Slider( minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)" ), ], elem_id="chat_interface" ) if __name__ == "__main__": demo.launch()