|
""" |
|
Internal cookie handling helpers. |
|
|
|
This module contains internal utilities for cookie parsing and manipulation. |
|
These are not part of the public API and may change without notice. |
|
""" |
|
|
|
import re |
|
import sys |
|
from http.cookies import Morsel |
|
from typing import List, Optional, Sequence, Tuple, cast |
|
|
|
from .log import internal_logger |
|
|
|
__all__ = ( |
|
"parse_set_cookie_headers", |
|
"parse_cookie_header", |
|
"preserve_morsel_with_coded_value", |
|
) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
_COOKIE_NAME_RE = re.compile(r"^[!#$%&\'()*+\-./0-9:<=>?@A-Z\[\]^_`a-z{|}~]+$") |
|
_COOKIE_KNOWN_ATTRS = frozenset( |
|
( |
|
"path", |
|
"domain", |
|
"max-age", |
|
"expires", |
|
"secure", |
|
"httponly", |
|
"samesite", |
|
"partitioned", |
|
"version", |
|
"comment", |
|
) |
|
) |
|
_COOKIE_BOOL_ATTRS = frozenset( |
|
("secure", "httponly", "partitioned") |
|
) |
|
|
|
|
|
|
|
|
|
_COOKIE_PATTERN = re.compile( |
|
r""" |
|
\s* # Optional whitespace at start of cookie |
|
(?P<key> # Start of group 'key' |
|
# aiohttp has extended to include [] for compatibility with real-world cookies |
|
[\w\d!#%&'~_`><@,:/\$\*\+\-\.\^\|\)\(\?\}\{\=\[\]]+? # Any word of at least one letter |
|
) # End of group 'key' |
|
( # Optional group: there may not be a value. |
|
\s*=\s* # Equal Sign |
|
(?P<val> # Start of group 'val' |
|
"(?:[^\\"]|\\.)*" # Any double-quoted string (properly closed) |
|
| # or |
|
"[^";]* # Unmatched opening quote (differs from SimpleCookie - issue #7993) |
|
| # or |
|
# Special case for "expires" attr - RFC 822, RFC 850, RFC 1036, RFC 1123 |
|
(\w{3,6}day|\w{3}),\s # Day of the week or abbreviated day (with comma) |
|
[\w\d\s-]{9,11}\s[\d:]{8}\s # Date and time in specific format |
|
(GMT|[+-]\d{4}) # Timezone: GMT or RFC 2822 offset like -0000, +0100 |
|
# NOTE: RFC 2822 timezone support is an aiohttp extension |
|
# for issue #4493 - SimpleCookie does NOT support this |
|
| # or |
|
# ANSI C asctime() format: "Wed Jun 9 10:18:14 2021" |
|
# NOTE: This is an aiohttp extension for issue #4327 - SimpleCookie does NOT support this format |
|
\w{3}\s+\w{3}\s+[\s\d]\d\s+\d{2}:\d{2}:\d{2}\s+\d{4} |
|
| # or |
|
[\w\d!#%&'~_`><@,:/\$\*\+\-\.\^\|\)\(\?\}\{\=\[\]]* # Any word or empty string |
|
) # End of group 'val' |
|
)? # End of optional value group |
|
\s* # Any number of spaces. |
|
(\s+|;|$) # Ending either at space, semicolon, or EOS. |
|
""", |
|
re.VERBOSE | re.ASCII, |
|
) |
|
|
|
|
|
def preserve_morsel_with_coded_value(cookie: Morsel[str]) -> Morsel[str]: |
|
""" |
|
Preserve a Morsel's coded_value exactly as received from the server. |
|
|
|
This function ensures that cookie encoding is preserved exactly as sent by |
|
the server, which is critical for compatibility with old servers that have |
|
strict requirements about cookie formats. |
|
|
|
This addresses the issue described in https://github.com/aio-libs/aiohttp/pull/1453 |
|
where Python's SimpleCookie would re-encode cookies, breaking authentication |
|
with certain servers. |
|
|
|
Args: |
|
cookie: A Morsel object from SimpleCookie |
|
|
|
Returns: |
|
A Morsel object with preserved coded_value |
|
|
|
""" |
|
mrsl_val = cast("Morsel[str]", cookie.get(cookie.key, Morsel())) |
|
|
|
|
|
|
|
|
|
mrsl_val.__setstate__( |
|
{"key": cookie.key, "value": cookie.value, "coded_value": cookie.coded_value} |
|
) |
|
return mrsl_val |
|
|
|
|
|
_unquote_sub = re.compile(r"\\(?:([0-3][0-7][0-7])|(.))").sub |
|
|
|
|
|
def _unquote_replace(m: re.Match[str]) -> str: |
|
""" |
|
Replace function for _unquote_sub regex substitution. |
|
|
|
Handles escaped characters in cookie values: |
|
- Octal sequences are converted to their character representation |
|
- Other escaped characters are unescaped by removing the backslash |
|
""" |
|
if m[1]: |
|
return chr(int(m[1], 8)) |
|
return m[2] |
|
|
|
|
|
def _unquote(value: str) -> str: |
|
""" |
|
Unquote a cookie value. |
|
|
|
Vendored from http.cookies._unquote to ensure compatibility. |
|
|
|
Note: The original implementation checked for None, but we've removed |
|
that check since all callers already ensure the value is not None. |
|
""" |
|
|
|
|
|
if len(value) < 2: |
|
return value |
|
if value[0] != '"' or value[-1] != '"': |
|
return value |
|
|
|
|
|
|
|
|
|
|
|
value = value[1:-1] |
|
|
|
|
|
|
|
|
|
|
|
return _unquote_sub(_unquote_replace, value) |
|
|
|
|
|
def parse_cookie_header(header: str) -> List[Tuple[str, Morsel[str]]]: |
|
""" |
|
Parse a Cookie header according to RFC 6265 Section 5.4. |
|
|
|
Cookie headers contain only name-value pairs separated by semicolons. |
|
There are no attributes in Cookie headers - even names that match |
|
attribute names (like 'path' or 'secure') should be treated as cookies. |
|
|
|
This parser uses the same regex-based approach as parse_set_cookie_headers |
|
to properly handle quoted values that may contain semicolons. |
|
|
|
Args: |
|
header: The Cookie header value to parse |
|
|
|
Returns: |
|
List of (name, Morsel) tuples for compatibility with SimpleCookie.update() |
|
""" |
|
if not header: |
|
return [] |
|
|
|
cookies: List[Tuple[str, Morsel[str]]] = [] |
|
i = 0 |
|
n = len(header) |
|
|
|
while i < n: |
|
|
|
match = _COOKIE_PATTERN.match(header, i) |
|
if not match: |
|
break |
|
|
|
key = match.group("key") |
|
value = match.group("val") or "" |
|
i = match.end(0) |
|
|
|
|
|
if not key or not _COOKIE_NAME_RE.match(key): |
|
internal_logger.warning("Can not load cookie: Illegal cookie name %r", key) |
|
continue |
|
|
|
|
|
morsel: Morsel[str] = Morsel() |
|
|
|
|
|
|
|
|
|
|
|
morsel.__setstate__( |
|
{"key": key, "value": _unquote(value), "coded_value": value} |
|
) |
|
|
|
cookies.append((key, morsel)) |
|
|
|
return cookies |
|
|
|
|
|
def parse_set_cookie_headers(headers: Sequence[str]) -> List[Tuple[str, Morsel[str]]]: |
|
""" |
|
Parse cookie headers using a vendored version of SimpleCookie parsing. |
|
|
|
This implementation is based on SimpleCookie.__parse_string to ensure |
|
compatibility with how SimpleCookie parses cookies, including handling |
|
of malformed cookies with missing semicolons. |
|
|
|
This function is used for both Cookie and Set-Cookie headers in order to be |
|
forgiving. Ideally we would have followed RFC 6265 Section 5.2 (for Cookie |
|
headers) and RFC 6265 Section 4.2.1 (for Set-Cookie headers), but the |
|
real world data makes it impossible since we need to be a bit more forgiving. |
|
|
|
NOTE: This implementation differs from SimpleCookie in handling unmatched quotes. |
|
SimpleCookie will stop parsing when it encounters a cookie value with an unmatched |
|
quote (e.g., 'cookie="value'), causing subsequent cookies to be silently dropped. |
|
This implementation handles unmatched quotes more gracefully to prevent cookie loss. |
|
See https://github.com/aio-libs/aiohttp/issues/7993 |
|
""" |
|
parsed_cookies: List[Tuple[str, Morsel[str]]] = [] |
|
|
|
for header in headers: |
|
if not header: |
|
continue |
|
|
|
|
|
i = 0 |
|
n = len(header) |
|
current_morsel: Optional[Morsel[str]] = None |
|
morsel_seen = False |
|
|
|
while 0 <= i < n: |
|
|
|
match = _COOKIE_PATTERN.match(header, i) |
|
if not match: |
|
|
|
break |
|
|
|
key, value = match.group("key"), match.group("val") |
|
i = match.end(0) |
|
lower_key = key.lower() |
|
|
|
if key[0] == "$": |
|
if not morsel_seen: |
|
|
|
|
|
continue |
|
|
|
if current_morsel is not None: |
|
attr_lower_key = lower_key[1:] |
|
if attr_lower_key in _COOKIE_KNOWN_ATTRS: |
|
current_morsel[attr_lower_key] = value or "" |
|
elif lower_key in _COOKIE_KNOWN_ATTRS: |
|
if not morsel_seen: |
|
|
|
break |
|
if lower_key in _COOKIE_BOOL_ATTRS: |
|
|
|
if current_morsel is not None: |
|
if lower_key == "partitioned" and sys.version_info < (3, 14): |
|
dict.__setitem__(current_morsel, lower_key, True) |
|
else: |
|
current_morsel[lower_key] = True |
|
elif value is None: |
|
|
|
break |
|
elif current_morsel is not None: |
|
|
|
current_morsel[lower_key] = _unquote(value) |
|
elif value is not None: |
|
|
|
|
|
if key in _COOKIE_KNOWN_ATTRS or not _COOKIE_NAME_RE.match(key): |
|
internal_logger.warning( |
|
"Can not load cookies: Illegal cookie name %r", key |
|
) |
|
current_morsel = None |
|
else: |
|
|
|
current_morsel = Morsel() |
|
|
|
|
|
|
|
|
|
|
|
current_morsel.__setstate__( |
|
{"key": key, "value": _unquote(value), "coded_value": value} |
|
) |
|
parsed_cookies.append((key, current_morsel)) |
|
morsel_seen = True |
|
else: |
|
|
|
break |
|
|
|
return parsed_cookies |
|
|