import gradio as gr from huggingface_hub import InferenceClient client = InferenceClient("moonshotai/Kimi-Dev-72B") def chat(message, system_prompt="", max_tokens=1024, temperature=0.9, top_p=0.95, top_k=40, repetition_penalty=1.0): prompt = f"{system_prompt}\n{message}" if system_prompt else message result = client.text_generation( prompt=prompt, max_new_tokens=max_tokens, temperature=temperature, top_p=top_p, repetition_penalty=repetition_penalty, do_sample=True, return_full_text=False ) return result demo = gr.Interface( fn=chat, inputs=[ gr.Textbox(label="Prompt"), gr.Textbox(label="System Prompt", value=""), gr.Slider(1, 2048, value=1024, label="Max Tokens"), gr.Slider(0.1, 1.0, value=0.9, label="Temperature"), gr.Slider(0.1, 1.0, value=0.95, label="Top-p"), gr.Slider(1, 100, value=40, label="Top-k"), gr.Slider(1.0, 2.0, value=1.0, label="Repetition Penalty") ], outputs=gr.Textbox(label="Response"), ) demo.launch()