gouravbhadraDev commited on
Commit
80c3a84
·
verified ·
1 Parent(s): b99cebf

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -343
app.py CHANGED
@@ -1,356 +1,24 @@
1
- '''
2
- from fastapi import FastAPI, Query
3
  from pydantic import BaseModel
4
- import cloudscraper
5
- from bs4 import BeautifulSoup
6
- from transformers import pipeline
7
- import torch
8
- import re
9
- import os
10
-
11
- #os.environ["HF_HOME"] = "/home/user/huggingface"
12
- #os.environ["TRANSFORMERS_CACHE"] = "/home/user/huggingface"
13
-
14
- app = FastAPI()
15
-
16
- class ThreadResponse(BaseModel):
17
- question: str
18
- replies: list[str]
19
-
20
- def clean_text(text: str) -> str:
21
- text = text.strip()
22
- text = re.sub(r"\b\d+\s*likes?,?\s*\d*\s*replies?$", "", text, flags=re.IGNORECASE).strip()
23
- return text
24
-
25
- @app.get("/scrape", response_model=ThreadResponse)
26
- def scrape(url: str = Query(...)):
27
- scraper = cloudscraper.create_scraper()
28
- response = scraper.get(url)
29
-
30
- if response.status_code == 200:
31
- soup = BeautifulSoup(response.content, 'html.parser')
32
- comment_containers = soup.find_all('div', class_='post__content')
33
-
34
- if comment_containers:
35
- question = clean_text(comment_containers[0].get_text(strip=True, separator="\n"))
36
- replies = [clean_text(comment.get_text(strip=True, separator="\n")) for comment in comment_containers[1:]]
37
- return ThreadResponse(question=question, replies=replies)
38
- return ThreadResponse(question="", replies=[])
39
-
40
- MODEL_NAME = "microsoft/phi-2"
41
-
42
- # Load the text-generation pipeline once at startup
43
- text_generator = pipeline(
44
- "text-generation",
45
- model=MODEL_NAME,
46
- trust_remote_code=True,
47
- device=0 if torch.cuda.is_available() else -1, # GPU if available, else CPU
48
- )
49
-
50
- class PromptRequest(BaseModel):
51
- prompt: str
52
-
53
- @app.post("/generate")
54
- async def generate_text(request: PromptRequest):
55
- # The model expects a string prompt, so pass request.prompt directly
56
- outputs = text_generator(
57
- request.prompt,
58
- max_new_tokens=512,
59
- temperature=0.7,
60
- top_p=0.9,
61
- do_sample=True,
62
- num_return_sequences=1,
63
- )
64
-
65
- generated_text = outputs[0]['generated_text']
66
-
67
- # Optional: parse reasoning and content if your model uses special tags like </think>
68
- if "</think>" in generated_text:
69
- reasoning_content = generated_text.split("</think>")[0].strip()
70
- content = generated_text.split("</think>")[1].strip()
71
- else:
72
- reasoning_content = ""
73
- content = generated_text.strip()
74
-
75
- return {
76
- "reasoning_content": reasoning_content,
77
- "generated_text": content
78
- }
79
-
80
- '''
81
-
82
- from fastapi import FastAPI, Query, Path
83
- from pydantic import BaseModel
84
- import cloudscraper
85
- from bs4 import BeautifulSoup
86
- from transformers import AutoTokenizer, AutoModelForCausalLM, T5Tokenizer, T5ForConditionalGeneration, PegasusTokenizer, PegasusForConditionalGeneration
87
- import torch
88
- import re
89
- from fastapi.responses import JSONResponse
90
- from fastapi.requests import Request
91
- from fastapi import status
92
- from typing import List, Dict, Optional
93
  from llama_cpp import Llama
94
 
95
  app = FastAPI()
96
 
97
- # --- Data Models ---
98
-
99
- class ThreadResponse(BaseModel):
100
- question: str
101
- replies: list[str]
102
-
103
- class PromptRequest(BaseModel):
104
- prompt: str
105
-
106
- class GenerateResponse(BaseModel):
107
- reasoning_content: str
108
- generated_text: str
109
-
110
- # New model for summarization request
111
- class SummarizeRequest(BaseModel):
112
- replies: List[str]
113
- task: str # expecting "summarisation"
114
-
115
- # New model for summarization response
116
- class SummarizeResponse(BaseModel):
117
- individual_summaries: Dict[int, Dict[str, str]] # {index: {"reasoning": str, "summary": str}}
118
- combined_reasoning: str
119
- combined_summary: str
120
-
121
- # --- Utility Functions ---
122
-
123
- def clean_text(text: str) -> str:
124
- text = text.strip()
125
- text = re.sub(r"\b\d+\s*likes?,?\s*\d*\s*replies?$", "", text, flags=re.IGNORECASE).strip()
126
- return text
127
-
128
- # --- Scraping Endpoint ---
129
-
130
- @app.get("/scrape", response_model=ThreadResponse)
131
- def scrape(url: str):
132
- scraper = cloudscraper.create_scraper()
133
- response = scraper.get(url)
134
-
135
- if response.status_code == 200:
136
- soup = BeautifulSoup(response.content, "html.parser")
137
- comment_containers = soup.find_all("div", class_="post__content")
138
-
139
- if comment_containers:
140
- question = clean_text(comment_containers[0].get_text(strip=True, separator="\n"))
141
- replies = [clean_text(comment.get_text(strip=True, separator="\n")) for comment in comment_containers[1:]]
142
- return ThreadResponse(question=question, replies=replies)
143
- return ThreadResponse(question="", replies=[])
144
-
145
- # --- Load DeepSeek-R1-Distill-Qwen-1.5B Model & Tokenizer ---
146
-
147
- deepseek_model_name = "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"
148
- deepseek_tokenizer = AutoTokenizer.from_pretrained(deepseek_model_name)
149
- deepseek_model = AutoModelForCausalLM.from_pretrained(deepseek_model_name)
150
- device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
151
- deepseek_model = deepseek_model.to(device)
152
-
153
- # --- Load T5-Large Model & Tokenizer ---
154
-
155
- t5_model_name = "google-t5/t5-large"
156
- t5_tokenizer = T5Tokenizer.from_pretrained(t5_model_name)
157
- t5_model = T5ForConditionalGeneration.from_pretrained(t5_model_name)
158
- t5_model = t5_model.to(device)
159
-
160
- pegasus_model_name = "google/pegasus-large"
161
- pegasus_tokenizer = PegasusTokenizer.from_pretrained(pegasus_model_name)
162
- pegasus_model = PegasusForConditionalGeneration.from_pretrained(pegasus_model_name)
163
- pegasus_model = pegasus_model.to(device)
164
-
165
- qwen3_model_name = "Qwen/Qwen3-0.6B"
166
- qwen3_tokenizer = AutoTokenizer.from_pretrained(qwen3_model_name)
167
- qwen3_model = AutoModelForCausalLM.from_pretrained(qwen3_model_name)
168
- qwen3_model = qwen3_model.to(device)
169
-
170
  qwen3_gguf_llm = Llama.from_pretrained(
171
  repo_id="unsloth/Qwen3-0.6B-GGUF",
172
  filename="Qwen3-0.6B-BF16.gguf",
173
  )
174
 
 
 
175
 
176
- # --- Generation Functions ---
177
-
178
- def generate_deepseek(prompt: str) -> (str, str):
179
- inputs = deepseek_tokenizer(prompt, return_tensors="pt", truncation=True, max_length=1024).to(device)
180
- outputs = deepseek_model.generate(
181
- **inputs,
182
- max_new_tokens=512,
183
- temperature=0.7,
184
- top_p=0.9,
185
- do_sample=True,
186
- num_return_sequences=1,
187
- pad_token_id=deepseek_tokenizer.eos_token_id,
188
- )
189
- generated_text = deepseek_tokenizer.decode(outputs[0], skip_special_tokens=True)
190
-
191
- if "</think>" in generated_text:
192
- reasoning_content, content = generated_text.split("</think>", 1)
193
- return reasoning_content.strip(), content.strip()
194
- else:
195
- return "", generated_text.strip()
196
-
197
- def generate_t5(prompt: str) -> (str, str):
198
- inputs = t5_tokenizer.encode(prompt, return_tensors="pt", max_length=512, truncation=True).to(device)
199
- outputs = t5_model.generate(
200
- inputs,
201
- max_length=512,
202
- num_beams=4,
203
- repetition_penalty=2.5,
204
- length_penalty=1.0,
205
- early_stopping=True,
206
- )
207
- generated_text = t5_tokenizer.decode(outputs[0], skip_special_tokens=True)
208
-
209
- if "</think>" in generated_text:
210
- reasoning_content, content = generated_text.split("</think>", 1)
211
- return reasoning_content.strip(), content.strip()
212
- else:
213
- return "", generated_text.strip()
214
-
215
- # --- API Endpoints ---
216
-
217
- def generate_pegasus(prompt: str) -> (str, str):
218
- # Pegasus expects raw text input (no prefix needed)
219
- inputs = pegasus_tokenizer(
220
- prompt,
221
- return_tensors="pt",
222
- truncation=True,
223
- max_length=1024,
224
- ).to(device)
225
-
226
- outputs = pegasus_model.generate(
227
- **inputs,
228
- max_new_tokens=150,
229
- num_beams=4,
230
- length_penalty=2.0,
231
- early_stopping=True,
232
- )
233
- generated_text = pegasus_tokenizer.decode(outputs[0], skip_special_tokens=True)
234
-
235
- # Pegasus does not use <think> tags, so no reasoning extraction
236
- return "", generated_text.strip()
237
-
238
- def generate_qwen3(prompt: str) -> (str, str):
239
- inputs = qwen3_tokenizer(
240
- prompt,
241
- return_tensors="pt",
242
- truncation=True,
243
- max_length=1024,
244
- ).to(device)
245
-
246
- outputs = qwen3_model.generate(
247
- **inputs,
248
- max_new_tokens=512,
249
- temperature=0.7,
250
- top_p=0.9,
251
- do_sample=True,
252
- num_return_sequences=1,
253
- pad_token_id=qwen3_tokenizer.eos_token_id,
254
- )
255
-
256
- generated_text = qwen3_tokenizer.decode(outputs[0], skip_special_tokens=True)
257
-
258
- if "</think>" in generated_text:
259
- reasoning_content, content = generated_text.split("</think>", 1)
260
- return reasoning_content.strip(), content.strip()
261
- else:
262
- return "", generated_text.strip()
263
 
264
- def generate_qwen3_gguf(prompt: str, max_tokens: int = 256) -> (str, str):
265
- messages = [
266
- {"role": "user", "content": prompt}
267
- ]
268
- response = qwen3_gguf_llm.create_chat_completion(
269
- messages=messages,
270
- max_tokens=max_tokens,
271
- )
272
  generated_text = response['choices'][0]['message']['content']
273
- if "</think>" in generated_text:
274
- reasoning_content, content = generated_text.split("</think>", 1)
275
- return reasoning_content.strip() + "</think>", content.strip()
276
- else:
277
- return "", generated_text.strip()
278
-
279
- # --- New summarization endpoint ---
280
-
281
- @app.post("/summarize_thread", response_model=SummarizeResponse)
282
- async def summarize_thread(request: SummarizeRequest):
283
- if request.task.lower() != "summarisation":
284
- return JSONResponse(
285
- status_code=400,
286
- content={"error": "Unsupported task. Only 'summarisation' is supported."}
287
- )
288
-
289
- individual_summaries = {}
290
- combined_reasonings = []
291
- combined_summaries = []
292
-
293
- # Summarize each reply individually
294
- for idx, reply in enumerate(request.replies):
295
- reasoning, summary = generate_qwen3_gguf(reply, max_tokens=256)
296
- individual_summaries[idx] = {
297
- "reasoning": reasoning,
298
- "summary": summary
299
- }
300
- if reasoning:
301
- combined_reasonings.append(reasoning)
302
- combined_summaries.append(summary)
303
-
304
- # Combine all individual summaries into one text
305
- combined_summary_text = " ".join(combined_summaries)
306
-
307
- # Recursively summarize combined summary if too long (optional)
308
- # Here, we summarize combined summary to get final reasoning and summary
309
- final_reasoning, final_summary = generate_qwen3_gguf(combined_summary_text, max_tokens=256)
310
-
311
- # Append final reasoning to combined reasonings
312
- if final_reasoning:
313
- combined_reasonings.append(final_reasoning)
314
-
315
- return SummarizeResponse(
316
- individual_summaries=individual_summaries,
317
- combined_reasoning="\n\n".join(combined_reasonings).strip(),
318
- combined_summary=final_summary.strip()
319
- )
320
-
321
-
322
-
323
- @app.post("/generate/{model_name}", response_model=GenerateResponse)
324
- async def generate(
325
- request: PromptRequest,
326
- model_name: str = Path(..., description="Model to use: 'deepseekr1-qwen', 't5-large', 'pegasus-large', 'qwen3-0.6b-hf', or 'qwen3-0.6b-gguf'")
327
- ):
328
- if model_name == "deepseekr1-qwen":
329
- reasoning, text = generate_deepseek(request.prompt)
330
- elif model_name == "t5-large":
331
- reasoning, text = generate_t5(request.prompt)
332
- elif model_name == "pegasus-large":
333
- reasoning, text = generate_pegasus(request.prompt)
334
- elif model_name == "qwen3-0.6b-hf":
335
- reasoning, text = generate_qwen3_hf(request.prompt)
336
- elif model_name == "qwen3-0.6b-gguf":
337
- reasoning, text = generate_qwen3_gguf(request.prompt)
338
- else:
339
- return GenerateResponse(reasoning_content="", generated_text=f"Error: Unknown model '{model_name}'.")
340
-
341
- return GenerateResponse(reasoning_content=reasoning, generated_text=text)
342
-
343
-
344
-
345
- # --- Global Exception Handler ---
346
-
347
- @app.exception_handler(Exception)
348
- async def global_exception_handler(request: Request, exc: Exception):
349
- print(f"Exception: {exc}")
350
- return JSONResponse(
351
- status_code=status.HTTP_200_OK,
352
- content={
353
- "reasoning_content": "",
354
- "generated_text": f"Error: {str(exc)}"
355
- }
356
- )
 
1
+ from fastapi import FastAPI
 
2
  from pydantic import BaseModel
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
  from llama_cpp import Llama
4
 
5
  app = FastAPI()
6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
  qwen3_gguf_llm = Llama.from_pretrained(
8
  repo_id="unsloth/Qwen3-0.6B-GGUF",
9
  filename="Qwen3-0.6B-BF16.gguf",
10
  )
11
 
12
+ class PromptRequest(BaseModel):
13
+ prompt: str
14
 
15
+ class GenerateResponse(BaseModel):
16
+ reasoning_content: str = ""
17
+ generated_text: str
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
 
19
+ @app.post("/generate/qwen3-0.6b-gguf", response_model=GenerateResponse)
20
+ async def generate_qwen3_gguf_endpoint(request: PromptRequest):
21
+ messages = [{"role": "user", "content": request.prompt}]
22
+ response = qwen3_gguf_llm.create_chat_completion(messages=messages, max_tokens=256)
 
 
 
 
23
  generated_text = response['choices'][0]['message']['content']
24
+ return GenerateResponse(generated_text=generated_text)