Spaces:
Sleeping
Sleeping
from huggingface_hub import InferenceClient | |
client = InferenceClient("HuggingFaceH4/zephyr-7b-beta") | |
def respond(message, history): | |
system_message = "Your default system message" | |
max_tokens = 150 | |
temperature = 0.7 | |
top_p = 0.9 | |
messages = [{"role": "system", "content": system_message}] | |
for val in history: | |
if val[0]: | |
messages.append({"role": "user", "content": val[0]}) | |
if val[1]: | |
messages.append({"role": "assistant", "content": val[1]}) | |
messages.append({"role": "user", "content": message}) | |
response = "" | |
for message in client.chat_completion( | |
messages, | |
max_tokens=max_tokens, | |
stream=True, | |
temperature=temperature, | |
top_p=top_p, | |
): | |
token = message.choices[0].delta.content | |
response += token | |
yield response |