from fastapi import FastAPI, Query from pydantic import BaseModel import cloudscraper from bs4 import BeautifulSoup from transformers import pipeline import torch import re import os os.environ["HF_HOME"] = "/home/user/huggingface" os.environ["TRANSFORMERS_CACHE"] = "/home/user/huggingface" app = FastAPI() class ThreadResponse(BaseModel): question: str replies: list[str] def clean_text(text: str) -> str: text = text.strip() text = re.sub(r"\b\d+\s*likes?,?\s*\d*\s*replies?$", "", text, flags=re.IGNORECASE).strip() return text @app.get("/scrape", response_model=ThreadResponse) def scrape(url: str = Query(...)): scraper = cloudscraper.create_scraper() response = scraper.get(url) if response.status_code == 200: soup = BeautifulSoup(response.content, 'html.parser') comment_containers = soup.find_all('div', class_='post__content') if comment_containers: question = clean_text(comment_containers[0].get_text(strip=True, separator="\n")) replies = [clean_text(comment.get_text(strip=True, separator="\n")) for comment in comment_containers[1:]] return ThreadResponse(question=question, replies=replies) return ThreadResponse(question="", replies=[]) MODEL_NAME = "google/flan-t5-small" # Load the pipeline once at startup with device auto-mapping text_generator = pipeline( "text2text-generation", model=MODEL_NAME, device=0 if torch.cuda.is_available() else -1, max_new_tokens=512, temperature=0.5 ) class PromptRequest(BaseModel): prompt: str @app.post("/generate") async def generate_text(request: PromptRequest): # Use the pipeline to generate text directly output = text_generator(request.prompt)[0]['generated_text'] # Extract reasoning and content parts if thinking tags are present if "" in output: reasoning_content = output.split("")[0].strip() content = output.split("")[1].strip().rstrip("") else: reasoning_content = "" content = output.strip().rstrip("") return { "reasoning_content": reasoning_content, "generated_text": content }