|
from __future__ import annotations |
|
|
|
import dataclasses |
|
import os |
|
import re |
|
import sys |
|
import warnings |
|
from collections.abc import Generator |
|
from typing import Callable |
|
|
|
from .datastructures import Headers |
|
from .exceptions import SecurityError |
|
from .version import version as websockets_version |
|
|
|
|
|
__all__ = [ |
|
"SERVER", |
|
"USER_AGENT", |
|
"Request", |
|
"Response", |
|
] |
|
|
|
|
|
PYTHON_VERSION = "{}.{}".format(*sys.version_info) |
|
|
|
|
|
USER_AGENT = os.environ.get( |
|
"WEBSOCKETS_USER_AGENT", |
|
f"Python/{PYTHON_VERSION} websockets/{websockets_version}", |
|
) |
|
|
|
|
|
SERVER = os.environ.get( |
|
"WEBSOCKETS_SERVER", |
|
f"Python/{PYTHON_VERSION} websockets/{websockets_version}", |
|
) |
|
|
|
|
|
MAX_NUM_HEADERS = int(os.environ.get("WEBSOCKETS_MAX_NUM_HEADERS", "128")) |
|
|
|
|
|
|
|
MAX_LINE_LENGTH = int(os.environ.get("WEBSOCKETS_MAX_LINE_LENGTH", "8192")) |
|
|
|
|
|
|
|
MAX_BODY_SIZE = int(os.environ.get("WEBSOCKETS_MAX_BODY_SIZE", "1_048_576")) |
|
|
|
|
|
def d(value: bytes) -> str: |
|
""" |
|
Decode a bytestring for interpolating into an error message. |
|
|
|
""" |
|
return value.decode(errors="backslashreplace") |
|
|
|
|
|
|
|
|
|
|
|
|
|
_token_re = re.compile(rb"[-!#$%&\'*+.^_`|~0-9a-zA-Z]+") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
_value_re = re.compile(rb"[\x09\x20-\x7e\x80-\xff]*") |
|
|
|
|
|
@dataclasses.dataclass |
|
class Request: |
|
""" |
|
WebSocket handshake request. |
|
|
|
Attributes: |
|
path: Request path, including optional query. |
|
headers: Request headers. |
|
""" |
|
|
|
path: str |
|
headers: Headers |
|
|
|
|
|
_exception: Exception | None = None |
|
|
|
@property |
|
def exception(self) -> Exception | None: |
|
warnings.warn( |
|
"Request.exception is deprecated; use ServerProtocol.handshake_exc instead", |
|
DeprecationWarning, |
|
) |
|
return self._exception |
|
|
|
@classmethod |
|
def parse( |
|
cls, |
|
read_line: Callable[[int], Generator[None, None, bytes]], |
|
) -> Generator[None, None, Request]: |
|
""" |
|
Parse a WebSocket handshake request. |
|
|
|
This is a generator-based coroutine. |
|
|
|
The request path isn't URL-decoded or validated in any way. |
|
|
|
The request path and headers are expected to contain only ASCII |
|
characters. Other characters are represented with surrogate escapes. |
|
|
|
:meth:`parse` doesn't attempt to read the request body because |
|
WebSocket handshake requests don't have one. If the request contains a |
|
body, it may be read from the data stream after :meth:`parse` returns. |
|
|
|
Args: |
|
read_line: Generator-based coroutine that reads a LF-terminated |
|
line or raises an exception if there isn't enough data |
|
|
|
Raises: |
|
EOFError: If the connection is closed without a full HTTP request. |
|
SecurityError: If the request exceeds a security limit. |
|
ValueError: If the request isn't well formatted. |
|
|
|
""" |
|
|
|
|
|
|
|
|
|
|
|
|
|
try: |
|
request_line = yield from parse_line(read_line) |
|
except EOFError as exc: |
|
raise EOFError("connection closed while reading HTTP request line") from exc |
|
|
|
try: |
|
method, raw_path, protocol = request_line.split(b" ", 2) |
|
except ValueError: |
|
raise ValueError(f"invalid HTTP request line: {d(request_line)}") from None |
|
if protocol != b"HTTP/1.1": |
|
raise ValueError( |
|
f"unsupported protocol; expected HTTP/1.1: {d(request_line)}" |
|
) |
|
if method != b"GET": |
|
raise ValueError(f"unsupported HTTP method; expected GET; got {d(method)}") |
|
path = raw_path.decode("ascii", "surrogateescape") |
|
|
|
headers = yield from parse_headers(read_line) |
|
|
|
|
|
|
|
if "Transfer-Encoding" in headers: |
|
raise NotImplementedError("transfer codings aren't supported") |
|
|
|
if "Content-Length" in headers: |
|
raise ValueError("unsupported request body") |
|
|
|
return cls(path, headers) |
|
|
|
def serialize(self) -> bytes: |
|
""" |
|
Serialize a WebSocket handshake request. |
|
|
|
""" |
|
|
|
|
|
request = f"GET {self.path} HTTP/1.1\r\n".encode() |
|
request += self.headers.serialize() |
|
return request |
|
|
|
|
|
@dataclasses.dataclass |
|
class Response: |
|
""" |
|
WebSocket handshake response. |
|
|
|
Attributes: |
|
status_code: Response code. |
|
reason_phrase: Response reason. |
|
headers: Response headers. |
|
body: Response body. |
|
|
|
""" |
|
|
|
status_code: int |
|
reason_phrase: str |
|
headers: Headers |
|
body: bytes = b"" |
|
|
|
_exception: Exception | None = None |
|
|
|
@property |
|
def exception(self) -> Exception | None: |
|
warnings.warn( |
|
"Response.exception is deprecated; " |
|
"use ClientProtocol.handshake_exc instead", |
|
DeprecationWarning, |
|
) |
|
return self._exception |
|
|
|
@classmethod |
|
def parse( |
|
cls, |
|
read_line: Callable[[int], Generator[None, None, bytes]], |
|
read_exact: Callable[[int], Generator[None, None, bytes]], |
|
read_to_eof: Callable[[int], Generator[None, None, bytes]], |
|
include_body: bool = True, |
|
) -> Generator[None, None, Response]: |
|
""" |
|
Parse a WebSocket handshake response. |
|
|
|
This is a generator-based coroutine. |
|
|
|
The reason phrase and headers are expected to contain only ASCII |
|
characters. Other characters are represented with surrogate escapes. |
|
|
|
Args: |
|
read_line: Generator-based coroutine that reads a LF-terminated |
|
line or raises an exception if there isn't enough data. |
|
read_exact: Generator-based coroutine that reads the requested |
|
bytes or raises an exception if there isn't enough data. |
|
read_to_eof: Generator-based coroutine that reads until the end |
|
of the stream. |
|
|
|
Raises: |
|
EOFError: If the connection is closed without a full HTTP response. |
|
SecurityError: If the response exceeds a security limit. |
|
LookupError: If the response isn't well formatted. |
|
ValueError: If the response isn't well formatted. |
|
|
|
""" |
|
|
|
|
|
try: |
|
status_line = yield from parse_line(read_line) |
|
except EOFError as exc: |
|
raise EOFError("connection closed while reading HTTP status line") from exc |
|
|
|
try: |
|
protocol, raw_status_code, raw_reason = status_line.split(b" ", 2) |
|
except ValueError: |
|
raise ValueError(f"invalid HTTP status line: {d(status_line)}") from None |
|
if protocol != b"HTTP/1.1": |
|
raise ValueError( |
|
f"unsupported protocol; expected HTTP/1.1: {d(status_line)}" |
|
) |
|
try: |
|
status_code = int(raw_status_code) |
|
except ValueError: |
|
raise ValueError( |
|
f"invalid status code; expected integer; got {d(raw_status_code)}" |
|
) from None |
|
if not 100 <= status_code < 600: |
|
raise ValueError( |
|
f"invalid status code; expected 100β599; got {d(raw_status_code)}" |
|
) |
|
if not _value_re.fullmatch(raw_reason): |
|
raise ValueError(f"invalid HTTP reason phrase: {d(raw_reason)}") |
|
reason = raw_reason.decode("ascii", "surrogateescape") |
|
|
|
headers = yield from parse_headers(read_line) |
|
|
|
if include_body: |
|
body = yield from read_body( |
|
status_code, headers, read_line, read_exact, read_to_eof |
|
) |
|
else: |
|
body = b"" |
|
|
|
return cls(status_code, reason, headers, body) |
|
|
|
def serialize(self) -> bytes: |
|
""" |
|
Serialize a WebSocket handshake response. |
|
|
|
""" |
|
|
|
|
|
response = f"HTTP/1.1 {self.status_code} {self.reason_phrase}\r\n".encode() |
|
response += self.headers.serialize() |
|
response += self.body |
|
return response |
|
|
|
|
|
def parse_line( |
|
read_line: Callable[[int], Generator[None, None, bytes]], |
|
) -> Generator[None, None, bytes]: |
|
""" |
|
Parse a single line. |
|
|
|
CRLF is stripped from the return value. |
|
|
|
Args: |
|
read_line: Generator-based coroutine that reads a LF-terminated line |
|
or raises an exception if there isn't enough data. |
|
|
|
Raises: |
|
EOFError: If the connection is closed without a CRLF. |
|
SecurityError: If the response exceeds a security limit. |
|
|
|
""" |
|
try: |
|
line = yield from read_line(MAX_LINE_LENGTH) |
|
except RuntimeError: |
|
raise SecurityError("line too long") |
|
|
|
if not line.endswith(b"\r\n"): |
|
raise EOFError("line without CRLF") |
|
return line[:-2] |
|
|
|
|
|
def parse_headers( |
|
read_line: Callable[[int], Generator[None, None, bytes]], |
|
) -> Generator[None, None, Headers]: |
|
""" |
|
Parse HTTP headers. |
|
|
|
Non-ASCII characters are represented with surrogate escapes. |
|
|
|
Args: |
|
read_line: Generator-based coroutine that reads a LF-terminated line |
|
or raises an exception if there isn't enough data. |
|
|
|
Raises: |
|
EOFError: If the connection is closed without complete headers. |
|
SecurityError: If the request exceeds a security limit. |
|
ValueError: If the request isn't well formatted. |
|
|
|
""" |
|
|
|
|
|
|
|
|
|
headers = Headers() |
|
for _ in range(MAX_NUM_HEADERS + 1): |
|
try: |
|
line = yield from parse_line(read_line) |
|
except EOFError as exc: |
|
raise EOFError("connection closed while reading HTTP headers") from exc |
|
if line == b"": |
|
break |
|
|
|
try: |
|
raw_name, raw_value = line.split(b":", 1) |
|
except ValueError: |
|
raise ValueError(f"invalid HTTP header line: {d(line)}") from None |
|
if not _token_re.fullmatch(raw_name): |
|
raise ValueError(f"invalid HTTP header name: {d(raw_name)}") |
|
raw_value = raw_value.strip(b" \t") |
|
if not _value_re.fullmatch(raw_value): |
|
raise ValueError(f"invalid HTTP header value: {d(raw_value)}") |
|
|
|
name = raw_name.decode("ascii") |
|
value = raw_value.decode("ascii", "surrogateescape") |
|
headers[name] = value |
|
|
|
else: |
|
raise SecurityError("too many HTTP headers") |
|
|
|
return headers |
|
|
|
|
|
def read_body( |
|
status_code: int, |
|
headers: Headers, |
|
read_line: Callable[[int], Generator[None, None, bytes]], |
|
read_exact: Callable[[int], Generator[None, None, bytes]], |
|
read_to_eof: Callable[[int], Generator[None, None, bytes]], |
|
) -> Generator[None, None, bytes]: |
|
|
|
|
|
|
|
|
|
if 100 <= status_code < 200 or status_code == 204 or status_code == 304: |
|
return b"" |
|
|
|
|
|
|
|
|
|
|
|
|
|
elif (coding := headers.get("Transfer-Encoding")) is not None: |
|
if coding != "chunked": |
|
raise NotImplementedError(f"transfer coding {coding} isn't supported") |
|
|
|
body = b"" |
|
while True: |
|
chunk_size_line = yield from parse_line(read_line) |
|
raw_chunk_size = chunk_size_line.split(b";", 1)[0] |
|
|
|
if len(raw_chunk_size) > 15: |
|
str_chunk_size = raw_chunk_size.decode(errors="backslashreplace") |
|
raise SecurityError(f"chunk too large: 0x{str_chunk_size} bytes") |
|
chunk_size = int(raw_chunk_size, 16) |
|
if chunk_size == 0: |
|
break |
|
if len(body) + chunk_size > MAX_BODY_SIZE: |
|
raise SecurityError( |
|
f"chunk too large: {chunk_size} bytes after {len(body)} bytes" |
|
) |
|
body += yield from read_exact(chunk_size) |
|
if (yield from read_exact(2)) != b"\r\n": |
|
raise ValueError("chunk without CRLF") |
|
|
|
yield from parse_headers(read_line) |
|
return body |
|
|
|
elif (raw_content_length := headers.get("Content-Length")) is not None: |
|
|
|
if len(raw_content_length) > 18: |
|
raise SecurityError(f"body too large: {raw_content_length} bytes") |
|
content_length = int(raw_content_length) |
|
if content_length > MAX_BODY_SIZE: |
|
raise SecurityError(f"body too large: {content_length} bytes") |
|
return (yield from read_exact(content_length)) |
|
|
|
else: |
|
try: |
|
return (yield from read_to_eof(MAX_BODY_SIZE)) |
|
except RuntimeError: |
|
raise SecurityError(f"body too large: over {MAX_BODY_SIZE} bytes") |
|
|