|
from __future__ import annotations |
|
|
|
import asyncio |
|
import os |
|
import re |
|
|
|
from ..datastructures import Headers |
|
from ..exceptions import SecurityError |
|
|
|
|
|
__all__ = ["read_request", "read_response"] |
|
|
|
MAX_NUM_HEADERS = int(os.environ.get("WEBSOCKETS_MAX_NUM_HEADERS", "128")) |
|
MAX_LINE_LENGTH = int(os.environ.get("WEBSOCKETS_MAX_LINE_LENGTH", "8192")) |
|
|
|
|
|
def d(value: bytes) -> str: |
|
""" |
|
Decode a bytestring for interpolating into an error message. |
|
|
|
""" |
|
return value.decode(errors="backslashreplace") |
|
|
|
|
|
|
|
|
|
|
|
|
|
_token_re = re.compile(rb"[-!#$%&\'*+.^_`|~0-9a-zA-Z]+") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
_value_re = re.compile(rb"[\x09\x20-\x7e\x80-\xff]*") |
|
|
|
|
|
async def read_request(stream: asyncio.StreamReader) -> tuple[str, Headers]: |
|
""" |
|
Read an HTTP/1.1 GET request and return ``(path, headers)``. |
|
|
|
``path`` isn't URL-decoded or validated in any way. |
|
|
|
``path`` and ``headers`` are expected to contain only ASCII characters. |
|
Other characters are represented with surrogate escapes. |
|
|
|
:func:`read_request` doesn't attempt to read the request body because |
|
WebSocket handshake requests don't have one. If the request contains a |
|
body, it may be read from ``stream`` after this coroutine returns. |
|
|
|
Args: |
|
stream: Input to read the request from. |
|
|
|
Raises: |
|
EOFError: If the connection is closed without a full HTTP request. |
|
SecurityError: If the request exceeds a security limit. |
|
ValueError: If the request isn't well formatted. |
|
|
|
""" |
|
|
|
|
|
|
|
|
|
|
|
|
|
try: |
|
request_line = await read_line(stream) |
|
except EOFError as exc: |
|
raise EOFError("connection closed while reading HTTP request line") from exc |
|
|
|
try: |
|
method, raw_path, version = request_line.split(b" ", 2) |
|
except ValueError: |
|
raise ValueError(f"invalid HTTP request line: {d(request_line)}") from None |
|
|
|
if method != b"GET": |
|
raise ValueError(f"unsupported HTTP method: {d(method)}") |
|
if version != b"HTTP/1.1": |
|
raise ValueError(f"unsupported HTTP version: {d(version)}") |
|
path = raw_path.decode("ascii", "surrogateescape") |
|
|
|
headers = await read_headers(stream) |
|
|
|
return path, headers |
|
|
|
|
|
async def read_response(stream: asyncio.StreamReader) -> tuple[int, str, Headers]: |
|
""" |
|
Read an HTTP/1.1 response and return ``(status_code, reason, headers)``. |
|
|
|
``reason`` and ``headers`` are expected to contain only ASCII characters. |
|
Other characters are represented with surrogate escapes. |
|
|
|
:func:`read_request` doesn't attempt to read the response body because |
|
WebSocket handshake responses don't have one. If the response contains a |
|
body, it may be read from ``stream`` after this coroutine returns. |
|
|
|
Args: |
|
stream: Input to read the response from. |
|
|
|
Raises: |
|
EOFError: If the connection is closed without a full HTTP response. |
|
SecurityError: If the response exceeds a security limit. |
|
ValueError: If the response isn't well formatted. |
|
|
|
""" |
|
|
|
|
|
|
|
|
|
|
|
try: |
|
status_line = await read_line(stream) |
|
except EOFError as exc: |
|
raise EOFError("connection closed while reading HTTP status line") from exc |
|
|
|
try: |
|
version, raw_status_code, raw_reason = status_line.split(b" ", 2) |
|
except ValueError: |
|
raise ValueError(f"invalid HTTP status line: {d(status_line)}") from None |
|
|
|
if version != b"HTTP/1.1": |
|
raise ValueError(f"unsupported HTTP version: {d(version)}") |
|
try: |
|
status_code = int(raw_status_code) |
|
except ValueError: |
|
raise ValueError(f"invalid HTTP status code: {d(raw_status_code)}") from None |
|
if not 100 <= status_code < 1000: |
|
raise ValueError(f"unsupported HTTP status code: {d(raw_status_code)}") |
|
if not _value_re.fullmatch(raw_reason): |
|
raise ValueError(f"invalid HTTP reason phrase: {d(raw_reason)}") |
|
reason = raw_reason.decode() |
|
|
|
headers = await read_headers(stream) |
|
|
|
return status_code, reason, headers |
|
|
|
|
|
async def read_headers(stream: asyncio.StreamReader) -> Headers: |
|
""" |
|
Read HTTP headers from ``stream``. |
|
|
|
Non-ASCII characters are represented with surrogate escapes. |
|
|
|
""" |
|
|
|
|
|
|
|
|
|
headers = Headers() |
|
for _ in range(MAX_NUM_HEADERS + 1): |
|
try: |
|
line = await read_line(stream) |
|
except EOFError as exc: |
|
raise EOFError("connection closed while reading HTTP headers") from exc |
|
if line == b"": |
|
break |
|
|
|
try: |
|
raw_name, raw_value = line.split(b":", 1) |
|
except ValueError: |
|
raise ValueError(f"invalid HTTP header line: {d(line)}") from None |
|
if not _token_re.fullmatch(raw_name): |
|
raise ValueError(f"invalid HTTP header name: {d(raw_name)}") |
|
raw_value = raw_value.strip(b" \t") |
|
if not _value_re.fullmatch(raw_value): |
|
raise ValueError(f"invalid HTTP header value: {d(raw_value)}") |
|
|
|
name = raw_name.decode("ascii") |
|
value = raw_value.decode("ascii", "surrogateescape") |
|
headers[name] = value |
|
|
|
else: |
|
raise SecurityError("too many HTTP headers") |
|
|
|
return headers |
|
|
|
|
|
async def read_line(stream: asyncio.StreamReader) -> bytes: |
|
""" |
|
Read a single line from ``stream``. |
|
|
|
CRLF is stripped from the return value. |
|
|
|
""" |
|
|
|
line = await stream.readline() |
|
|
|
if len(line) > MAX_LINE_LENGTH: |
|
raise SecurityError("line too long") |
|
|
|
if not line.endswith(b"\r\n"): |
|
raise EOFError("line without CRLF") |
|
return line[:-2] |
|
|