Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -249,23 +249,62 @@ def generate_qwen3(prompt: str) -> (str, str):
|
|
249 |
else:
|
250 |
return "", generated_text.strip()
|
251 |
|
252 |
-
def generate_qwen3_gguf(prompt: str) -> (str, str):
|
253 |
messages = [
|
254 |
{"role": "user", "content": prompt}
|
255 |
]
|
256 |
-
# Set max_tokens or max_new_tokens to keep total tokens <= 512
|
257 |
response = qwen3_gguf_llm.create_chat_completion(
|
258 |
messages=messages,
|
259 |
-
max_tokens=
|
260 |
)
|
261 |
generated_text = response['choices'][0]['message']['content']
|
262 |
-
|
263 |
if "</think>" in generated_text:
|
264 |
reasoning_content, content = generated_text.split("</think>", 1)
|
265 |
return reasoning_content.strip() + "</think>", content.strip()
|
266 |
else:
|
267 |
return "", generated_text.strip()
|
268 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
269 |
|
270 |
|
271 |
|
|
|
249 |
else:
|
250 |
return "", generated_text.strip()
|
251 |
|
252 |
+
def generate_qwen3_gguf(prompt: str, max_tokens: int = 256) -> (str, str):
|
253 |
messages = [
|
254 |
{"role": "user", "content": prompt}
|
255 |
]
|
|
|
256 |
response = qwen3_gguf_llm.create_chat_completion(
|
257 |
messages=messages,
|
258 |
+
max_tokens=max_tokens,
|
259 |
)
|
260 |
generated_text = response['choices'][0]['message']['content']
|
|
|
261 |
if "</think>" in generated_text:
|
262 |
reasoning_content, content = generated_text.split("</think>", 1)
|
263 |
return reasoning_content.strip() + "</think>", content.strip()
|
264 |
else:
|
265 |
return "", generated_text.strip()
|
266 |
|
267 |
+
# --- New summarization endpoint ---
|
268 |
+
|
269 |
+
@app.post("/summarize_thread", response_model=SummarizeResponse)
|
270 |
+
async def summarize_thread(request: SummarizeRequest):
|
271 |
+
if request.task.lower() != "summarisation":
|
272 |
+
return JSONResponse(
|
273 |
+
status_code=400,
|
274 |
+
content={"error": "Unsupported task. Only 'summarisation' is supported."}
|
275 |
+
)
|
276 |
+
|
277 |
+
individual_summaries = {}
|
278 |
+
combined_reasonings = []
|
279 |
+
combined_summaries = []
|
280 |
+
|
281 |
+
# Summarize each reply individually
|
282 |
+
for idx, reply in enumerate(request.replies):
|
283 |
+
reasoning, summary = generate_qwen3_gguf(reply, max_tokens=256)
|
284 |
+
individual_summaries[idx] = {
|
285 |
+
"reasoning": reasoning,
|
286 |
+
"summary": summary
|
287 |
+
}
|
288 |
+
if reasoning:
|
289 |
+
combined_reasonings.append(reasoning)
|
290 |
+
combined_summaries.append(summary)
|
291 |
+
|
292 |
+
# Combine all individual summaries into one text
|
293 |
+
combined_summary_text = " ".join(combined_summaries)
|
294 |
+
|
295 |
+
# Recursively summarize combined summary if too long (optional)
|
296 |
+
# Here, we summarize combined summary to get final reasoning and summary
|
297 |
+
final_reasoning, final_summary = generate_qwen3_gguf(combined_summary_text, max_tokens=256)
|
298 |
+
|
299 |
+
# Append final reasoning to combined reasonings
|
300 |
+
if final_reasoning:
|
301 |
+
combined_reasonings.append(final_reasoning)
|
302 |
+
|
303 |
+
return SummarizeResponse(
|
304 |
+
individual_summaries=individual_summaries,
|
305 |
+
combined_reasoning="\n\n".join(combined_reasonings).strip(),
|
306 |
+
combined_summary=final_summary.strip()
|
307 |
+
)
|
308 |
|
309 |
|
310 |
|