File size: 2,586 Bytes
87e8c3e
 
 
8642e97
 
87e8c3e
8642e97
c9180f1
87e8c3e
c5d6f8a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3f72dd0
c5d6f8a
 
 
 
 
 
 
 
 
 
 
3f72dd0
d0dc3ee
8642e97
c5d6f8a
8642e97
 
 
 
87e8c3e
8642e97
 
c5d6f8a
 
8642e97
 
 
 
 
58f5aab
8642e97
87e8c3e
 
8642e97
87e8c3e
8642e97
 
 
87e8c3e
 
d91c6b8
 
5780d63
 
87e8c3e
8642e97
 
 
 
87e8c3e
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
import gradio as gr
from huggingface_hub import InferenceClient

# ←–– set this to the exact name of your HF repo
HF_MODEL_ID = "rieon/DeepCoder-14B-Preview-Suger"

# explicitly tell the client you want text-generation
client = InferenceClient(model=HF_MODEL_ID)

# def respond(
#     message: str,
#     history: list[dict],       # [{"role":"user"/"assistant","content":…}, …]
#     system_message: str,
#     max_tokens: int,
#     temperature: float,
#     top_p: float,
# ):
#     # 1️⃣ Build one raw-text prompt from system + chat history + new user turn
#     prompt = system_message.strip() + "\n"
#     for msg in history:
#         role = msg["role"]
#         content = msg["content"]
#         if role == "user":
#             prompt += f"User: {content}\n"
#         elif role == "assistant":
#             prompt += f"Assistant: {content}\n"
#     prompt += f"User: {message}\nAssistant:"

#     # 2️⃣ Stream tokens from the text-generation endpoint
#     generated = ""
#     for chunk in client.text_generation(
#         prompt,                     # first positional arg
#         max_new_tokens=max_tokens,
#         temperature=temperature,
#         top_p=top_p,
#         stream=True,
#     ):
#         generated += chunk.generated_text
#         yield generated

def respond(
    message: str,
    history: list[dict],
    system_message: str,
    max_tokens: int,
    temperature: float,
    top_p: float,
):
    # assemble a single prompt from system message + history
    prompt = system_message.strip() + "\n"
    # for user, bot in history:
    #     prompt += f"User: {user}\nAssistant: {bot}\n"
    prompt += f"User: {message}\nAssistant:"

    # stream back tokens
    generated = ""
    for chunk in client.text_generation(
        prompt,
        max_new_tokens=max_tokens,
        temperature=temperature,
        top_p=top_p,
        stream=True,
    ):
        # the API returns a small JSON with .generated_text
        generated += chunk.generated_text
        yield generated

demo = gr.ChatInterface(
    fn=respond,
    type="messages",
    title="DeepCoder with Suger",
    description="Upload any text or pdf files and ask questions about them!",
    additional_inputs=[
        gr.Textbox(value="You are a helpful coding assistant.", label="System message"),
        gr.Slider(1, 2048, value=512, step=1, label="Max new tokens"),
        gr.Slider(0.1, 4.0, value=0.7, step=0.1, label="Temperature"),
        gr.Slider(0.1, 1.0, value=0.95, step=0.05, label="Top-p"),
    ],
)

if __name__ == "__main__":
    demo.launch()