gouravbhadraDev commited on
Commit
ad67d60
·
verified ·
1 Parent(s): a0b62ab

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +24 -3
app.py CHANGED
@@ -154,6 +154,11 @@ qwen3_tokenizer = AutoTokenizer.from_pretrained(qwen3_model_name)
154
  qwen3_model = AutoModelForCausalLM.from_pretrained(qwen3_model_name)
155
  qwen3_model = qwen3_model.to(device)
156
 
 
 
 
 
 
157
 
158
  # --- Generation Functions ---
159
 
@@ -243,11 +248,25 @@ def generate_qwen3(prompt: str) -> (str, str):
243
  else:
244
  return "", generated_text.strip()
245
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
246
 
247
  @app.post("/generate/{model_name}", response_model=GenerateResponse)
248
  async def generate(
249
  request: PromptRequest,
250
- model_name: str = Path(..., description="Model to use: 'deepseekr1-qwen', 't5-large', 'pegasus-large', or 'qwen3-0.6b'")
251
  ):
252
  if model_name == "deepseekr1-qwen":
253
  reasoning, text = generate_deepseek(request.prompt)
@@ -255,8 +274,10 @@ async def generate(
255
  reasoning, text = generate_t5(request.prompt)
256
  elif model_name == "pegasus-large":
257
  reasoning, text = generate_pegasus(request.prompt)
258
- elif model_name == "qwen3-0.6b":
259
- reasoning, text = generate_qwen3(request.prompt)
 
 
260
  else:
261
  return GenerateResponse(reasoning_content="", generated_text=f"Error: Unknown model '{model_name}'.")
262
 
 
154
  qwen3_model = AutoModelForCausalLM.from_pretrained(qwen3_model_name)
155
  qwen3_model = qwen3_model.to(device)
156
 
157
+ qwen3_gguf_llm = Llama.from_pretrained(
158
+ repo_id="unsloth/Qwen3-0.6B-GGUF",
159
+ filename="Qwen3-0.6B-BF16.gguf",
160
+ )
161
+
162
 
163
  # --- Generation Functions ---
164
 
 
248
  else:
249
  return "", generated_text.strip()
250
 
251
+ def generate_qwen3_gguf(prompt: str) -> (str, str):
252
+ messages = [
253
+ {"role": "user", "content": prompt}
254
+ ]
255
+ response = qwen3_gguf_llm.create_chat_completion(messages=messages)
256
+ generated_text = response['choices'][0]['message']['content']
257
+
258
+ if "</think>" in generated_text:
259
+ reasoning_content, content = generated_text.split("</think>", 1)
260
+ return reasoning_content.strip() + "</think>", content.strip()
261
+ else:
262
+ return "", generated_text.strip()
263
+
264
+
265
 
266
  @app.post("/generate/{model_name}", response_model=GenerateResponse)
267
  async def generate(
268
  request: PromptRequest,
269
+ model_name: str = Path(..., description="Model to use: 'deepseekr1-qwen', 't5-large', 'pegasus-large', 'qwen3-0.6b-hf', or 'qwen3-0.6b-gguf'")
270
  ):
271
  if model_name == "deepseekr1-qwen":
272
  reasoning, text = generate_deepseek(request.prompt)
 
274
  reasoning, text = generate_t5(request.prompt)
275
  elif model_name == "pegasus-large":
276
  reasoning, text = generate_pegasus(request.prompt)
277
+ elif model_name == "qwen3-0.6b-hf":
278
+ reasoning, text = generate_qwen3_hf(request.prompt)
279
+ elif model_name == "qwen3-0.6b-gguf":
280
+ reasoning, text = generate_qwen3_gguf(request.prompt)
281
  else:
282
  return GenerateResponse(reasoning_content="", generated_text=f"Error: Unknown model '{model_name}'.")
283