|
import gradio as gr |
|
from huggingface_hub import InferenceClient |
|
|
|
client = InferenceClient("moonshotai/Kimi-Dev-72B") |
|
|
|
def chat(message, system_prompt="", max_tokens=1024, temperature=0.9, top_p=0.95, top_k=40, repetition_penalty=1.0): |
|
prompt = f"{system_prompt}\n{message}" if system_prompt else message |
|
result = client.text_generation( |
|
prompt=prompt, |
|
max_new_tokens=max_tokens, |
|
temperature=temperature, |
|
top_p=top_p, |
|
repetition_penalty=repetition_penalty, |
|
do_sample=True, |
|
return_full_text=False |
|
) |
|
return result |
|
|
|
demo = gr.Interface( |
|
fn=chat, |
|
inputs=[ |
|
gr.Textbox(label="Prompt"), |
|
gr.Textbox(label="System Prompt", value=""), |
|
gr.Slider(1, 2048, value=1024, label="Max Tokens"), |
|
gr.Slider(0.1, 1.0, value=0.9, label="Temperature"), |
|
gr.Slider(0.1, 1.0, value=0.95, label="Top-p"), |
|
gr.Slider(1, 100, value=40, label="Top-k"), |
|
gr.Slider(1.0, 2.0, value=1.0, label="Repetition Penalty") |
|
], |
|
outputs=gr.Textbox(label="Response"), |
|
) |
|
|
|
demo.launch() |
|
|