Spaces:
Runtime error
Runtime error
Update space
Browse files
app.py
CHANGED
@@ -31,6 +31,9 @@ def respond(message, history, max_tokens, temperature, top_p):
|
|
31 |
# Tokenize and generate response
|
32 |
inputs = tokenizer.apply_chat_template(messages, tokenize=False)
|
33 |
input_tokens = tokenizer(inputs, return_tensors="pt").to("cuda" if torch.cuda.is_available() else "cpu")
|
|
|
|
|
|
|
34 |
|
35 |
output_tokens = model.generate(
|
36 |
**input_tokens,
|
@@ -40,8 +43,16 @@ def respond(message, history, max_tokens, temperature, top_p):
|
|
40 |
pad_token_id=tokenizer.pad_token_id,
|
41 |
eos_token_id=tokenizer.eos_token_id,
|
42 |
)
|
43 |
-
|
44 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
45 |
return response
|
46 |
|
47 |
# Define Gradio Chat Interface
|
|
|
31 |
# Tokenize and generate response
|
32 |
inputs = tokenizer.apply_chat_template(messages, tokenize=False)
|
33 |
input_tokens = tokenizer(inputs, return_tensors="pt").to("cuda" if torch.cuda.is_available() else "cpu")
|
34 |
+
|
35 |
+
# Get the length of input tokens to separate new response
|
36 |
+
input_length = input_tokens.input_ids.shape[1]
|
37 |
|
38 |
output_tokens = model.generate(
|
39 |
**input_tokens,
|
|
|
43 |
pad_token_id=tokenizer.pad_token_id,
|
44 |
eos_token_id=tokenizer.eos_token_id,
|
45 |
)
|
46 |
+
|
47 |
+
# Extract only the new tokens (the model's response)
|
48 |
+
new_tokens = output_tokens[0][input_length:]
|
49 |
+
response = tokenizer.decode(new_tokens, skip_special_tokens=True)
|
50 |
+
|
51 |
+
# Clean up any remaining system prompt or formatting artifacts
|
52 |
+
response = response.strip()
|
53 |
+
if response.startswith("assistant:"):
|
54 |
+
response = response[len("assistant:"):].strip()
|
55 |
+
|
56 |
return response
|
57 |
|
58 |
# Define Gradio Chat Interface
|