gouravbhadraDev commited on
Commit
f138f18
·
verified ·
1 Parent(s): f73717a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -1
app.py CHANGED
@@ -253,7 +253,11 @@ def generate_qwen3_gguf(prompt: str) -> (str, str):
253
  messages = [
254
  {"role": "user", "content": prompt}
255
  ]
256
- response = qwen3_gguf_llm.create_chat_completion(messages=messages)
 
 
 
 
257
  generated_text = response['choices'][0]['message']['content']
258
 
259
  if "</think>" in generated_text:
@@ -264,6 +268,7 @@ def generate_qwen3_gguf(prompt: str) -> (str, str):
264
 
265
 
266
 
 
267
  @app.post("/generate/{model_name}", response_model=GenerateResponse)
268
  async def generate(
269
  request: PromptRequest,
 
253
  messages = [
254
  {"role": "user", "content": prompt}
255
  ]
256
+ # Set max_tokens or max_new_tokens to keep total tokens <= 512
257
+ response = qwen3_gguf_llm.create_chat_completion(
258
+ messages=messages,
259
+ max_tokens=512 # or smaller, adjust to fit your use case
260
+ )
261
  generated_text = response['choices'][0]['message']['content']
262
 
263
  if "</think>" in generated_text:
 
268
 
269
 
270
 
271
+
272
  @app.post("/generate/{model_name}", response_model=GenerateResponse)
273
  async def generate(
274
  request: PromptRequest,