Spaces:
Sleeping
Sleeping
File size: 2,586 Bytes
87e8c3e 8642e97 87e8c3e 8642e97 c9180f1 87e8c3e c5d6f8a 3f72dd0 c5d6f8a 3f72dd0 d0dc3ee 8642e97 c5d6f8a 8642e97 87e8c3e 8642e97 c5d6f8a 8642e97 58f5aab 8642e97 87e8c3e 8642e97 87e8c3e 8642e97 87e8c3e d91c6b8 5780d63 87e8c3e 8642e97 87e8c3e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 |
import gradio as gr
from huggingface_hub import InferenceClient
# ←–– set this to the exact name of your HF repo
HF_MODEL_ID = "rieon/DeepCoder-14B-Preview-Suger"
# explicitly tell the client you want text-generation
client = InferenceClient(model=HF_MODEL_ID)
# def respond(
# message: str,
# history: list[dict], # [{"role":"user"/"assistant","content":…}, …]
# system_message: str,
# max_tokens: int,
# temperature: float,
# top_p: float,
# ):
# # 1️⃣ Build one raw-text prompt from system + chat history + new user turn
# prompt = system_message.strip() + "\n"
# for msg in history:
# role = msg["role"]
# content = msg["content"]
# if role == "user":
# prompt += f"User: {content}\n"
# elif role == "assistant":
# prompt += f"Assistant: {content}\n"
# prompt += f"User: {message}\nAssistant:"
# # 2️⃣ Stream tokens from the text-generation endpoint
# generated = ""
# for chunk in client.text_generation(
# prompt, # first positional arg
# max_new_tokens=max_tokens,
# temperature=temperature,
# top_p=top_p,
# stream=True,
# ):
# generated += chunk.generated_text
# yield generated
def respond(
message: str,
history: list[dict],
system_message: str,
max_tokens: int,
temperature: float,
top_p: float,
):
# assemble a single prompt from system message + history
prompt = system_message.strip() + "\n"
# for user, bot in history:
# prompt += f"User: {user}\nAssistant: {bot}\n"
prompt += f"User: {message}\nAssistant:"
# stream back tokens
generated = ""
for chunk in client.text_generation(
prompt,
max_new_tokens=max_tokens,
temperature=temperature,
top_p=top_p,
stream=True,
):
# the API returns a small JSON with .generated_text
generated += chunk.generated_text
yield generated
demo = gr.ChatInterface(
fn=respond,
type="messages",
title="DeepCoder with Suger",
description="Upload any text or pdf files and ask questions about them!",
additional_inputs=[
gr.Textbox(value="You are a helpful coding assistant.", label="System message"),
gr.Slider(1, 2048, value=512, step=1, label="Max new tokens"),
gr.Slider(0.1, 4.0, value=0.7, step=0.1, label="Temperature"),
gr.Slider(0.1, 1.0, value=0.95, step=0.05, label="Top-p"),
],
)
if __name__ == "__main__":
demo.launch()
|