gouravbhadraDev commited on
Commit
e237568
·
verified ·
1 Parent(s): f138f18

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +43 -4
app.py CHANGED
@@ -249,23 +249,62 @@ def generate_qwen3(prompt: str) -> (str, str):
249
  else:
250
  return "", generated_text.strip()
251
 
252
- def generate_qwen3_gguf(prompt: str) -> (str, str):
253
  messages = [
254
  {"role": "user", "content": prompt}
255
  ]
256
- # Set max_tokens or max_new_tokens to keep total tokens <= 512
257
  response = qwen3_gguf_llm.create_chat_completion(
258
  messages=messages,
259
- max_tokens=512 # or smaller, adjust to fit your use case
260
  )
261
  generated_text = response['choices'][0]['message']['content']
262
-
263
  if "</think>" in generated_text:
264
  reasoning_content, content = generated_text.split("</think>", 1)
265
  return reasoning_content.strip() + "</think>", content.strip()
266
  else:
267
  return "", generated_text.strip()
268
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
269
 
270
 
271
 
 
249
  else:
250
  return "", generated_text.strip()
251
 
252
+ def generate_qwen3_gguf(prompt: str, max_tokens: int = 256) -> (str, str):
253
  messages = [
254
  {"role": "user", "content": prompt}
255
  ]
 
256
  response = qwen3_gguf_llm.create_chat_completion(
257
  messages=messages,
258
+ max_tokens=max_tokens,
259
  )
260
  generated_text = response['choices'][0]['message']['content']
 
261
  if "</think>" in generated_text:
262
  reasoning_content, content = generated_text.split("</think>", 1)
263
  return reasoning_content.strip() + "</think>", content.strip()
264
  else:
265
  return "", generated_text.strip()
266
 
267
+ # --- New summarization endpoint ---
268
+
269
+ @app.post("/summarize_thread", response_model=SummarizeResponse)
270
+ async def summarize_thread(request: SummarizeRequest):
271
+ if request.task.lower() != "summarisation":
272
+ return JSONResponse(
273
+ status_code=400,
274
+ content={"error": "Unsupported task. Only 'summarisation' is supported."}
275
+ )
276
+
277
+ individual_summaries = {}
278
+ combined_reasonings = []
279
+ combined_summaries = []
280
+
281
+ # Summarize each reply individually
282
+ for idx, reply in enumerate(request.replies):
283
+ reasoning, summary = generate_qwen3_gguf(reply, max_tokens=256)
284
+ individual_summaries[idx] = {
285
+ "reasoning": reasoning,
286
+ "summary": summary
287
+ }
288
+ if reasoning:
289
+ combined_reasonings.append(reasoning)
290
+ combined_summaries.append(summary)
291
+
292
+ # Combine all individual summaries into one text
293
+ combined_summary_text = " ".join(combined_summaries)
294
+
295
+ # Recursively summarize combined summary if too long (optional)
296
+ # Here, we summarize combined summary to get final reasoning and summary
297
+ final_reasoning, final_summary = generate_qwen3_gguf(combined_summary_text, max_tokens=256)
298
+
299
+ # Append final reasoning to combined reasonings
300
+ if final_reasoning:
301
+ combined_reasonings.append(final_reasoning)
302
+
303
+ return SummarizeResponse(
304
+ individual_summaries=individual_summaries,
305
+ combined_reasoning="\n\n".join(combined_reasonings).strip(),
306
+ combined_summary=final_summary.strip()
307
+ )
308
 
309
 
310