''' from fastapi import FastAPI, Query from pydantic import BaseModel import cloudscraper from bs4 import BeautifulSoup from transformers import pipeline import torch import re import os #os.environ["HF_HOME"] = "/home/user/huggingface" #os.environ["TRANSFORMERS_CACHE"] = "/home/user/huggingface" app = FastAPI() class ThreadResponse(BaseModel): question: str replies: list[str] def clean_text(text: str) -> str: text = text.strip() text = re.sub(r"\b\d+\s*likes?,?\s*\d*\s*replies?$", "", text, flags=re.IGNORECASE).strip() return text @app.get("/scrape", response_model=ThreadResponse) def scrape(url: str = Query(...)): scraper = cloudscraper.create_scraper() response = scraper.get(url) if response.status_code == 200: soup = BeautifulSoup(response.content, 'html.parser') comment_containers = soup.find_all('div', class_='post__content') if comment_containers: question = clean_text(comment_containers[0].get_text(strip=True, separator="\n")) replies = [clean_text(comment.get_text(strip=True, separator="\n")) for comment in comment_containers[1:]] return ThreadResponse(question=question, replies=replies) return ThreadResponse(question="", replies=[]) MODEL_NAME = "microsoft/phi-2" # Load the text-generation pipeline once at startup text_generator = pipeline( "text-generation", model=MODEL_NAME, trust_remote_code=True, device=0 if torch.cuda.is_available() else -1, # GPU if available, else CPU ) class PromptRequest(BaseModel): prompt: str @app.post("/generate") async def generate_text(request: PromptRequest): # The model expects a string prompt, so pass request.prompt directly outputs = text_generator( request.prompt, max_new_tokens=512, temperature=0.7, top_p=0.9, do_sample=True, num_return_sequences=1, ) generated_text = outputs[0]['generated_text'] # Optional: parse reasoning and content if your model uses special tags like if "" in generated_text: reasoning_content = generated_text.split("")[0].strip() content = generated_text.split("")[1].strip() else: reasoning_content = "" content = generated_text.strip() return { "reasoning_content": reasoning_content, "generated_text": content } ''' from fastapi import FastAPI, Query from pydantic import BaseModel import cloudscraper from bs4 import BeautifulSoup from transformers import T5Tokenizer, T5ForConditionalGeneration import torch import re app = FastAPI() # --- Data Models --- class ThreadResponse(BaseModel): question: str replies: list[str] class PromptRequest(BaseModel): prompt: str class GenerateResponse(BaseModel): reasoning_content: str generated_text: str # --- Utility Functions --- def clean_text(text: str) -> str: text = text.strip() text = re.sub(r"\b\d+\s*likes?,?\s*\d*\s*replies?$", "", text, flags=re.IGNORECASE).strip() return text # --- Scraping Endpoint --- @app.get("/scrape", response_model=ThreadResponse) def scrape(url: str): scraper = cloudscraper.create_scraper() response = scraper.get(url) if response.status_code == 200: soup = BeautifulSoup(response.content, "html.parser") comment_containers = soup.find_all("div", class_="post__content") if comment_containers: question = clean_text(comment_containers[0].get_text(strip=True, separator="\n")) replies = [clean_text(comment.get_text(strip=True, separator="\n")) for comment in comment_containers[1:]] return ThreadResponse(question=question, replies=replies) return ThreadResponse(question="", replies=[]) # --- Load DeepSeek-R1-Distill-Qwen-1.5B Model & Tokenizer --- deepseek_model_name = "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B" deepseek_tokenizer = AutoTokenizer.from_pretrained(deepseek_model_name) deepseek_model = AutoModelForCausalLM.from_pretrained(deepseek_model_name) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") deepseek_model = deepseek_model.to(device) # --- Load T5-Large Model & Tokenizer --- t5_model_name = "google-t5/t5-large" t5_tokenizer = T5Tokenizer.from_pretrained(t5_model_name) t5_model = T5ForConditionalGeneration.from_pretrained(t5_model_name) t5_model = t5_model.to(device) # --- Generation Functions --- def generate_deepseek(prompt: str) -> (str, str): inputs = deepseek_tokenizer(prompt, return_tensors="pt", truncation=True, max_length=1024).to(device) outputs = deepseek_model.generate( **inputs, max_length=512, temperature=0.7, top_p=0.9, do_sample=True, num_return_sequences=1, pad_token_id=deepseek_tokenizer.eos_token_id, ) generated_text = deepseek_tokenizer.decode(outputs[0], skip_special_tokens=True) # DeepSeek models usually do not have a special reasoning delimiter, so return empty reasoning return "", generated_text.strip() def generate_t5(prompt: str) -> (str, str): # T5 expects prompt with task prefix, e.g. "summarize: ..." inputs = t5_tokenizer.encode(prompt, return_tensors="pt", max_length=512, truncation=True).to(device) outputs = t5_model.generate( inputs, max_length=512, num_beams=4, repetition_penalty=2.5, length_penalty=1.0, early_stopping=True, ) generated_text = t5_tokenizer.decode(outputs[0], skip_special_tokens=True) # Optional reasoning parsing if is used if "" in generated_text: reasoning_content, content = generated_text.split("", 1) return reasoning_content.strip(), content.strip() else: return "", generated_text.strip() # --- API Endpoints --- @app.post("/generate/{model_name}", response_model=GenerateResponse) async def generate( request: PromptRequest, model_name: str = Path(..., description="Model to use: 'deepseekr1-qwen' or 't5-large'") ): if model_name == "deepseekr1-qwen": reasoning, text = generate_deepseek(request.prompt) elif model_name == "t5-large": reasoning, text = generate_t5(request.prompt) else: return {"reasoning_content": "", "generated_text": f"Error: Unknown model '{model_name}'."} return GenerateResponse(reasoning_content=reasoning, generated_text=text)