""" Internal cookie handling helpers. This module contains internal utilities for cookie parsing and manipulation. These are not part of the public API and may change without notice. """ import re import sys from http.cookies import Morsel from typing import List, Optional, Sequence, Tuple, cast from .log import internal_logger __all__ = ( "parse_set_cookie_headers", "parse_cookie_header", "preserve_morsel_with_coded_value", ) # Cookie parsing constants # Allow more characters in cookie names to handle real-world cookies # that don't strictly follow RFC standards (fixes #2683) # RFC 6265 defines cookie-name token as per RFC 2616 Section 2.2, # but many servers send cookies with characters like {} [] () etc. # This makes the cookie parser more tolerant of real-world cookies # while still providing some validation to catch obviously malformed names. _COOKIE_NAME_RE = re.compile(r"^[!#$%&\'()*+\-./0-9:<=>?@A-Z\[\]^_`a-z{|}~]+$") _COOKIE_KNOWN_ATTRS = frozenset( # AKA Morsel._reserved ( "path", "domain", "max-age", "expires", "secure", "httponly", "samesite", "partitioned", "version", "comment", ) ) _COOKIE_BOOL_ATTRS = frozenset( # AKA Morsel._flags ("secure", "httponly", "partitioned") ) # SimpleCookie's pattern for parsing cookies with relaxed validation # Based on http.cookies pattern but extended to allow more characters in cookie names # to handle real-world cookies (fixes #2683) _COOKIE_PATTERN = re.compile( r""" \s* # Optional whitespace at start of cookie (?P # Start of group 'key' # aiohttp has extended to include [] for compatibility with real-world cookies [\w\d!#%&'~_`><@,:/\$\*\+\-\.\^\|\)\(\?\}\{\=\[\]]+? # Any word of at least one letter ) # End of group 'key' ( # Optional group: there may not be a value. \s*=\s* # Equal Sign (?P # Start of group 'val' "(?:[^\\"]|\\.)*" # Any double-quoted string (properly closed) | # or "[^";]* # Unmatched opening quote (differs from SimpleCookie - issue #7993) | # or # Special case for "expires" attr - RFC 822, RFC 850, RFC 1036, RFC 1123 (\w{3,6}day|\w{3}),\s # Day of the week or abbreviated day (with comma) [\w\d\s-]{9,11}\s[\d:]{8}\s # Date and time in specific format (GMT|[+-]\d{4}) # Timezone: GMT or RFC 2822 offset like -0000, +0100 # NOTE: RFC 2822 timezone support is an aiohttp extension # for issue #4493 - SimpleCookie does NOT support this | # or # ANSI C asctime() format: "Wed Jun 9 10:18:14 2021" # NOTE: This is an aiohttp extension for issue #4327 - SimpleCookie does NOT support this format \w{3}\s+\w{3}\s+[\s\d]\d\s+\d{2}:\d{2}:\d{2}\s+\d{4} | # or [\w\d!#%&'~_`><@,:/\$\*\+\-\.\^\|\)\(\?\}\{\=\[\]]* # Any word or empty string ) # End of group 'val' )? # End of optional value group \s* # Any number of spaces. (\s+|;|$) # Ending either at space, semicolon, or EOS. """, re.VERBOSE | re.ASCII, ) def preserve_morsel_with_coded_value(cookie: Morsel[str]) -> Morsel[str]: """ Preserve a Morsel's coded_value exactly as received from the server. This function ensures that cookie encoding is preserved exactly as sent by the server, which is critical for compatibility with old servers that have strict requirements about cookie formats. This addresses the issue described in https://github.com/aio-libs/aiohttp/pull/1453 where Python's SimpleCookie would re-encode cookies, breaking authentication with certain servers. Args: cookie: A Morsel object from SimpleCookie Returns: A Morsel object with preserved coded_value """ mrsl_val = cast("Morsel[str]", cookie.get(cookie.key, Morsel())) # We use __setstate__ instead of the public set() API because it allows us to # bypass validation and set already validated state. This is more stable than # setting protected attributes directly and unlikely to change since it would # break pickling. mrsl_val.__setstate__( # type: ignore[attr-defined] {"key": cookie.key, "value": cookie.value, "coded_value": cookie.coded_value} ) return mrsl_val _unquote_sub = re.compile(r"\\(?:([0-3][0-7][0-7])|(.))").sub def _unquote_replace(m: re.Match[str]) -> str: """ Replace function for _unquote_sub regex substitution. Handles escaped characters in cookie values: - Octal sequences are converted to their character representation - Other escaped characters are unescaped by removing the backslash """ if m[1]: return chr(int(m[1], 8)) return m[2] def _unquote(value: str) -> str: """ Unquote a cookie value. Vendored from http.cookies._unquote to ensure compatibility. Note: The original implementation checked for None, but we've removed that check since all callers already ensure the value is not None. """ # If there aren't any doublequotes, # then there can't be any special characters. See RFC 2109. if len(value) < 2: return value if value[0] != '"' or value[-1] != '"': return value # We have to assume that we must decode this string. # Down to work. # Remove the "s value = value[1:-1] # Check for special sequences. Examples: # \012 --> \n # \" --> " # return _unquote_sub(_unquote_replace, value) def parse_cookie_header(header: str) -> List[Tuple[str, Morsel[str]]]: """ Parse a Cookie header according to RFC 6265 Section 5.4. Cookie headers contain only name-value pairs separated by semicolons. There are no attributes in Cookie headers - even names that match attribute names (like 'path' or 'secure') should be treated as cookies. This parser uses the same regex-based approach as parse_set_cookie_headers to properly handle quoted values that may contain semicolons. Args: header: The Cookie header value to parse Returns: List of (name, Morsel) tuples for compatibility with SimpleCookie.update() """ if not header: return [] cookies: List[Tuple[str, Morsel[str]]] = [] i = 0 n = len(header) while i < n: # Use the same pattern as parse_set_cookie_headers to find cookies match = _COOKIE_PATTERN.match(header, i) if not match: break key = match.group("key") value = match.group("val") or "" i = match.end(0) # Validate the name if not key or not _COOKIE_NAME_RE.match(key): internal_logger.warning("Can not load cookie: Illegal cookie name %r", key) continue # Create new morsel morsel: Morsel[str] = Morsel() # Preserve the original value as coded_value (with quotes if present) # We use __setstate__ instead of the public set() API because it allows us to # bypass validation and set already validated state. This is more stable than # setting protected attributes directly and unlikely to change since it would # break pickling. morsel.__setstate__( # type: ignore[attr-defined] {"key": key, "value": _unquote(value), "coded_value": value} ) cookies.append((key, morsel)) return cookies def parse_set_cookie_headers(headers: Sequence[str]) -> List[Tuple[str, Morsel[str]]]: """ Parse cookie headers using a vendored version of SimpleCookie parsing. This implementation is based on SimpleCookie.__parse_string to ensure compatibility with how SimpleCookie parses cookies, including handling of malformed cookies with missing semicolons. This function is used for both Cookie and Set-Cookie headers in order to be forgiving. Ideally we would have followed RFC 6265 Section 5.2 (for Cookie headers) and RFC 6265 Section 4.2.1 (for Set-Cookie headers), but the real world data makes it impossible since we need to be a bit more forgiving. NOTE: This implementation differs from SimpleCookie in handling unmatched quotes. SimpleCookie will stop parsing when it encounters a cookie value with an unmatched quote (e.g., 'cookie="value'), causing subsequent cookies to be silently dropped. This implementation handles unmatched quotes more gracefully to prevent cookie loss. See https://github.com/aio-libs/aiohttp/issues/7993 """ parsed_cookies: List[Tuple[str, Morsel[str]]] = [] for header in headers: if not header: continue # Parse cookie string using SimpleCookie's algorithm i = 0 n = len(header) current_morsel: Optional[Morsel[str]] = None morsel_seen = False while 0 <= i < n: # Start looking for a cookie match = _COOKIE_PATTERN.match(header, i) if not match: # No more cookies break key, value = match.group("key"), match.group("val") i = match.end(0) lower_key = key.lower() if key[0] == "$": if not morsel_seen: # We ignore attributes which pertain to the cookie # mechanism as a whole, such as "$Version". continue # Process as attribute if current_morsel is not None: attr_lower_key = lower_key[1:] if attr_lower_key in _COOKIE_KNOWN_ATTRS: current_morsel[attr_lower_key] = value or "" elif lower_key in _COOKIE_KNOWN_ATTRS: if not morsel_seen: # Invalid cookie string - attribute before cookie break if lower_key in _COOKIE_BOOL_ATTRS: # Boolean attribute with any value should be True if current_morsel is not None: if lower_key == "partitioned" and sys.version_info < (3, 14): dict.__setitem__(current_morsel, lower_key, True) else: current_morsel[lower_key] = True elif value is None: # Invalid cookie string - non-boolean attribute without value break elif current_morsel is not None: # Regular attribute with value current_morsel[lower_key] = _unquote(value) elif value is not None: # This is a cookie name=value pair # Validate the name if key in _COOKIE_KNOWN_ATTRS or not _COOKIE_NAME_RE.match(key): internal_logger.warning( "Can not load cookies: Illegal cookie name %r", key ) current_morsel = None else: # Create new morsel current_morsel = Morsel() # Preserve the original value as coded_value (with quotes if present) # We use __setstate__ instead of the public set() API because it allows us to # bypass validation and set already validated state. This is more stable than # setting protected attributes directly and unlikely to change since it would # break pickling. current_morsel.__setstate__( # type: ignore[attr-defined] {"key": key, "value": _unquote(value), "coded_value": value} ) parsed_cookies.append((key, current_morsel)) morsel_seen = True else: # Invalid cookie string - no value for non-attribute break return parsed_cookies