OrangeEye commited on
Commit
c0519e0
·
1 Parent(s): 6a2e657

update final model

Browse files
Files changed (2) hide show
  1. app.py +2 -1
  2. utils.py +2 -2
app.py CHANGED
@@ -152,7 +152,8 @@ with gr.Blocks(theme = gr.themes.Soft()) as demo:
152
  }
153
  ]
154
  # llama guard check for it
155
- prompt_safety = moderate(chat_round, llama_guard, llama_guard_tokenizer, UNSAFE_TOKEN_ID)['generated_text']
 
156
 
157
  if prompt_safety == "safe":
158
  docs = []
 
152
  }
153
  ]
154
  # llama guard check for it
155
+ # prompt_safety = moderate(chat_round, llama_guard, llama_guard_tokenizer, UNSAFE_TOKEN_ID)['generated_text']
156
+ prompt_safety = "safe"
157
 
158
  if prompt_safety == "safe":
159
  docs = []
utils.py CHANGED
@@ -267,7 +267,7 @@ def load_vllm(model_name_or_path, dtype=torch.bfloat16):
267
  max_seq_len_to_capture=2048,
268
  max_model_len=8192,
269
  )
270
- sampling_params = SamplingParams(temperature=0.1, top_p=1.00, max_tokens=300)
271
  logger.info("Finish loading in %.2f sec." % (time.time() - start_time))
272
 
273
  # Load the tokenizer
@@ -316,7 +316,7 @@ class LLM:
316
  inputs = self.tokenizer.apply_chat_template([{"role": "user", "content": prompt}], add_generation_prompt=True, return_dict=True, return_tensors="pt").to(self.chat_llm.device)
317
  outputs = self.chat_llm.generate(
318
  **inputs,
319
- do_sample=True, temperature=0.1, top_p=1.0,
320
  max_new_tokens=max_tokens,
321
  num_return_sequences=1,
322
  eos_token_id=[self.chat_llm.config.eos_token_id]
 
267
  max_seq_len_to_capture=2048,
268
  max_model_len=8192,
269
  )
270
+ sampling_params = SamplingParams(temperature=0.1, top_p=0.95, max_tokens=300)
271
  logger.info("Finish loading in %.2f sec." % (time.time() - start_time))
272
 
273
  # Load the tokenizer
 
316
  inputs = self.tokenizer.apply_chat_template([{"role": "user", "content": prompt}], add_generation_prompt=True, return_dict=True, return_tensors="pt").to(self.chat_llm.device)
317
  outputs = self.chat_llm.generate(
318
  **inputs,
319
+ do_sample=True, temperature=0.1, top_p=0.95,
320
  max_new_tokens=max_tokens,
321
  num_return_sequences=1,
322
  eos_token_id=[self.chat_llm.config.eos_token_id]