Luigi commited on
Commit
76a0b57
·
1 Parent(s): 65b3c3a

reduce ctx and max tokens for performance

Browse files
Files changed (1) hide show
  1. app.py +2 -2
app.py CHANGED
@@ -75,7 +75,7 @@ def load_llm():
75
  llm = Llama(
76
  model_path=MODEL_FILE,
77
  chat_handler=handler,
78
- n_ctx=8192,
79
  verbose=False,
80
  )
81
  logging.info("Llama model loaded successfully.")
@@ -129,7 +129,7 @@ def caption_frame(frame):
129
  logging.debug("Sending chat completion request...")
130
  resp = llm.create_chat_completion(
131
  messages=messages,
132
- max_tokens=256,
133
  temperature=0.1,
134
  stop=["<end_of_utterance>"],
135
  )
 
75
  llm = Llama(
76
  model_path=MODEL_FILE,
77
  chat_handler=handler,
78
+ n_ctx=1024,
79
  verbose=False,
80
  )
81
  logging.info("Llama model loaded successfully.")
 
129
  logging.debug("Sending chat completion request...")
130
  resp = llm.create_chat_completion(
131
  messages=messages,
132
+ max_tokens=128,
133
  temperature=0.1,
134
  stop=["<end_of_utterance>"],
135
  )