Spaces:
Running
on
Zero
Running
on
Zero
update final model
Browse files
app.py
CHANGED
@@ -152,7 +152,8 @@ with gr.Blocks(theme = gr.themes.Soft()) as demo:
|
|
152 |
}
|
153 |
]
|
154 |
# llama guard check for it
|
155 |
-
prompt_safety = moderate(chat_round, llama_guard, llama_guard_tokenizer, UNSAFE_TOKEN_ID)['generated_text']
|
|
|
156 |
|
157 |
if prompt_safety == "safe":
|
158 |
docs = []
|
|
|
152 |
}
|
153 |
]
|
154 |
# llama guard check for it
|
155 |
+
# prompt_safety = moderate(chat_round, llama_guard, llama_guard_tokenizer, UNSAFE_TOKEN_ID)['generated_text']
|
156 |
+
prompt_safety = "safe"
|
157 |
|
158 |
if prompt_safety == "safe":
|
159 |
docs = []
|
utils.py
CHANGED
@@ -267,7 +267,7 @@ def load_vllm(model_name_or_path, dtype=torch.bfloat16):
|
|
267 |
max_seq_len_to_capture=2048,
|
268 |
max_model_len=8192,
|
269 |
)
|
270 |
-
sampling_params = SamplingParams(temperature=0.1, top_p=
|
271 |
logger.info("Finish loading in %.2f sec." % (time.time() - start_time))
|
272 |
|
273 |
# Load the tokenizer
|
@@ -316,7 +316,7 @@ class LLM:
|
|
316 |
inputs = self.tokenizer.apply_chat_template([{"role": "user", "content": prompt}], add_generation_prompt=True, return_dict=True, return_tensors="pt").to(self.chat_llm.device)
|
317 |
outputs = self.chat_llm.generate(
|
318 |
**inputs,
|
319 |
-
do_sample=True, temperature=0.1, top_p=
|
320 |
max_new_tokens=max_tokens,
|
321 |
num_return_sequences=1,
|
322 |
eos_token_id=[self.chat_llm.config.eos_token_id]
|
|
|
267 |
max_seq_len_to_capture=2048,
|
268 |
max_model_len=8192,
|
269 |
)
|
270 |
+
sampling_params = SamplingParams(temperature=0.1, top_p=0.95, max_tokens=300)
|
271 |
logger.info("Finish loading in %.2f sec." % (time.time() - start_time))
|
272 |
|
273 |
# Load the tokenizer
|
|
|
316 |
inputs = self.tokenizer.apply_chat_template([{"role": "user", "content": prompt}], add_generation_prompt=True, return_dict=True, return_tensors="pt").to(self.chat_llm.device)
|
317 |
outputs = self.chat_llm.generate(
|
318 |
**inputs,
|
319 |
+
do_sample=True, temperature=0.1, top_p=0.95,
|
320 |
max_new_tokens=max_tokens,
|
321 |
num_return_sequences=1,
|
322 |
eos_token_id=[self.chat_llm.config.eos_token_id]
|