File size: 2,523 Bytes
99b81db
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
from fastapi import FastAPI, HTTPException, Query
from pydantic import BaseModel
from playwright.async_api import async_playwright
import asyncio
import base64
import logging
from typing import List, Optional
from urllib.parse import urlparse

app = FastAPI(title="BrowserAutomation API", version="1.0")
logger = logging.getLogger("browser-api")

# Pydantic Models
class ScreenshotRequest(BaseModel):
    url: str
    full_page: bool = True
    device: Optional[str] = "desktop"  # mobile/tablet/desktop
    format: str = "png"  # png/jpeg/pdf
    delay_ms: int = 2000  # wait after load

class ScrapeRequest(BaseModel):
    url: str
    extract_scripts: List[str] = []  # JS to execute
    css_selectors: List[str] = []    # Elements to extract

# Device presets
DEVICES = {
    "mobile": {"width": 375, "height": 812, "mobile": True},
    "tablet": {"width": 768, "height": 1024, "mobile": True},
    "desktop": {"width": 1366, "height": 768, "mobile": False}
}

@app.on_event("startup")
async def init_browser():
    app.state.playwright = await async_playwright().start()
    app.state.browser = await app.state.playwright.chromium.launch()

@app.post("/screenshot")
async def capture_screenshot(req: ScreenshotRequest):
    """Capture website screenshot with device emulation"""
    if not valid_url(req.url):
        raise HTTPException(400, "Invalid URL")
    
    device = DEVICES.get(req.device, DEVICES["desktop"])
    browser = app.state.browser
    
    try:
        context = await browser.new_context(**device)
        page = await context.new_page()
        await page.goto(req.url)
        await asyncio.sleep(req.delay_ms / 1000)
        
        if req.format == "pdf":
            pdf = await page.pdf()
            return Response(content=pdf, media_type="application/pdf")
        else:
            screenshot = await page.screenshot(full_page=req.full_page, type=req.format)
            return {"image": base64.b64encode(screenshot).decode()}
            
    except Exception as e:
        logger.error(f"Screenshot failed: {str(e)}")
        raise HTTPException(500, "Capture failed")

@app.post("/scrape")
async def scrape_page(req: ScrapeRequest):
    """Execute JS and extract page content"""
    # Implementation similar to screenshot but:
    # 1. Execute provided JS scripts
    # 2. Extract DOM elements by CSS selectors
    # 3. Return structured JSON data
    pass

# Helper function
def valid_url(url: str) -> bool:
    parsed = urlparse(url)
    return all([parsed.scheme, parsed.netloc])