|
from fastapi import FastAPI, HTTPException, Query |
|
from pydantic import BaseModel |
|
from playwright.async_api import async_playwright |
|
import asyncio |
|
import base64 |
|
import logging |
|
from typing import List, Optional |
|
from urllib.parse import urlparse |
|
|
|
app = FastAPI(title="BrowserAutomation API", version="1.0") |
|
logger = logging.getLogger("browser-api") |
|
|
|
|
|
class ScreenshotRequest(BaseModel): |
|
url: str |
|
full_page: bool = True |
|
device: Optional[str] = "desktop" |
|
format: str = "png" |
|
delay_ms: int = 2000 |
|
|
|
class ScrapeRequest(BaseModel): |
|
url: str |
|
extract_scripts: List[str] = [] |
|
css_selectors: List[str] = [] |
|
|
|
|
|
DEVICES = { |
|
"mobile": {"width": 375, "height": 812, "mobile": True}, |
|
"tablet": {"width": 768, "height": 1024, "mobile": True}, |
|
"desktop": {"width": 1366, "height": 768, "mobile": False} |
|
} |
|
|
|
@app.on_event("startup") |
|
async def init_browser(): |
|
app.state.playwright = await async_playwright().start() |
|
app.state.browser = await app.state.playwright.chromium.launch() |
|
|
|
@app.post("/screenshot") |
|
async def capture_screenshot(req: ScreenshotRequest): |
|
"""Capture website screenshot with device emulation""" |
|
if not valid_url(req.url): |
|
raise HTTPException(400, "Invalid URL") |
|
|
|
device = DEVICES.get(req.device, DEVICES["desktop"]) |
|
browser = app.state.browser |
|
|
|
try: |
|
context = await browser.new_context(**device) |
|
page = await context.new_page() |
|
await page.goto(req.url) |
|
await asyncio.sleep(req.delay_ms / 1000) |
|
|
|
if req.format == "pdf": |
|
pdf = await page.pdf() |
|
return Response(content=pdf, media_type="application/pdf") |
|
else: |
|
screenshot = await page.screenshot(full_page=req.full_page, type=req.format) |
|
return {"image": base64.b64encode(screenshot).decode()} |
|
|
|
except Exception as e: |
|
logger.error(f"Screenshot failed: {str(e)}") |
|
raise HTTPException(500, "Capture failed") |
|
|
|
@app.post("/scrape") |
|
async def scrape_page(req: ScrapeRequest): |
|
"""Execute JS and extract page content""" |
|
|
|
|
|
|
|
|
|
pass |
|
|
|
|
|
def valid_url(url: str) -> bool: |
|
parsed = urlparse(url) |
|
return all([parsed.scheme, parsed.netloc]) |