Priyanka6 commited on
Commit
7d51297
Β·
1 Parent(s): e89dcde

Update space

Browse files
Files changed (1) hide show
  1. app.py +13 -2
app.py CHANGED
@@ -31,6 +31,9 @@ def respond(message, history, max_tokens, temperature, top_p):
31
  # Tokenize and generate response
32
  inputs = tokenizer.apply_chat_template(messages, tokenize=False)
33
  input_tokens = tokenizer(inputs, return_tensors="pt").to("cuda" if torch.cuda.is_available() else "cpu")
 
 
 
34
 
35
  output_tokens = model.generate(
36
  **input_tokens,
@@ -40,8 +43,16 @@ def respond(message, history, max_tokens, temperature, top_p):
40
  pad_token_id=tokenizer.pad_token_id,
41
  eos_token_id=tokenizer.eos_token_id,
42
  )
43
-
44
- response = tokenizer.decode(output_tokens[0], skip_special_tokens=True)
 
 
 
 
 
 
 
 
45
  return response
46
 
47
  # Define Gradio Chat Interface
 
31
  # Tokenize and generate response
32
  inputs = tokenizer.apply_chat_template(messages, tokenize=False)
33
  input_tokens = tokenizer(inputs, return_tensors="pt").to("cuda" if torch.cuda.is_available() else "cpu")
34
+
35
+ # Get the length of input tokens to separate new response
36
+ input_length = input_tokens.input_ids.shape[1]
37
 
38
  output_tokens = model.generate(
39
  **input_tokens,
 
43
  pad_token_id=tokenizer.pad_token_id,
44
  eos_token_id=tokenizer.eos_token_id,
45
  )
46
+
47
+ # Extract only the new tokens (the model's response)
48
+ new_tokens = output_tokens[0][input_length:]
49
+ response = tokenizer.decode(new_tokens, skip_special_tokens=True)
50
+
51
+ # Clean up any remaining system prompt or formatting artifacts
52
+ response = response.strip()
53
+ if response.startswith("assistant:"):
54
+ response = response[len("assistant:"):].strip()
55
+
56
  return response
57
 
58
  # Define Gradio Chat Interface