Spaces:
Sleeping
Sleeping
from fastapi import FastAPI, HTTPException | |
from pydantic import BaseModel | |
from playwright.async_api import async_playwright | |
import asyncio | |
import base64 | |
from typing import List, Optional | |
app = FastAPI() | |
class ScrapeRequest(BaseModel): | |
url: str | |
screenshot: bool = True | |
get_links: bool = True | |
get_content: bool = True | |
class LinkInfo(BaseModel): | |
text: str | |
href: str | |
class ScrapeResponse(BaseModel): | |
content: Optional[str] = None | |
screenshot: Optional[str] = None | |
links: Optional[List[LinkInfo]] = None | |
async def scrape_page(request: ScrapeRequest): | |
async with async_playwright() as p: | |
browser = await p.chromium.launch() | |
page = await browser.new_page() | |
try: | |
await page.goto(request.url, wait_until="networkidle") | |
response = ScrapeResponse() | |
# Get page content | |
if request.get_content: | |
response.content = await page.content() | |
# Get screenshot | |
if request.screenshot: | |
screenshot_bytes = await page.screenshot() | |
response.screenshot = base64.b64encode(screenshot_bytes).decode('utf-8') | |
# Get links | |
if request.get_links: | |
links = await page.evaluate(""" | |
() => { | |
return Array.from(document.querySelectorAll('a')).map(a => { | |
return { | |
text: a.innerText.trim(), | |
href: a.href | |
} | |
}); | |
} | |
""") | |
response.links = [LinkInfo(**link) for link in links] | |
await browser.close() | |
return response | |
except Exception as e: | |
await browser.close() | |
raise HTTPException(status_code=500, detail=str(e)) | |