from fastapi import FastAPI, Query from pydantic import BaseModel import cloudscraper from bs4 import BeautifulSoup from transformers import pipeline import torch import re import os #os.environ["HF_HOME"] = "/home/user/huggingface" #os.environ["TRANSFORMERS_CACHE"] = "/home/user/huggingface" app = FastAPI() class ThreadResponse(BaseModel): question: str replies: list[str] def clean_text(text: str) -> str: text = text.strip() text = re.sub(r"\b\d+\s*likes?,?\s*\d*\s*replies?$", "", text, flags=re.IGNORECASE).strip() return text @app.get("/scrape", response_model=ThreadResponse) def scrape(url: str = Query(...)): scraper = cloudscraper.create_scraper() response = scraper.get(url) if response.status_code == 200: soup = BeautifulSoup(response.content, 'html.parser') comment_containers = soup.find_all('div', class_='post__content') if comment_containers: question = clean_text(comment_containers[0].get_text(strip=True, separator="\n")) replies = [clean_text(comment.get_text(strip=True, separator="\n")) for comment in comment_containers[1:]] return ThreadResponse(question=question, replies=replies) return ThreadResponse(question="", replies=[]) MODEL_NAME = "microsoft/phi-2" # Load the pipeline once at startup with device auto-mapping text_generator = pipeline( "text-generation", model=MODEL_NAME, trust_remote_code=True, device=0 if torch.cuda.is_available() else -1, ) class PromptRequest(BaseModel): prompt: str @app.post("/generate") async def generate_text(request: PromptRequest): # Prepare messages as expected by the model pipeline messages = [{"role": "user", "content": request.prompt}] # Call the pipeline with messages outputs = text_generator(messages) # The pipeline returns a list of dicts with 'generated_text' generated_text = outputs[0]['generated_text'] # Optional: parse reasoning and content if your model uses special tags like if "" in generated_text: reasoning_content = generated_text.split("")[0].strip() content = generated_text.split("")[1].strip() else: reasoning_content = "" content = generated_text.strip() return { "reasoning_content": reasoning_content, "generated_text": content }