File size: 12,418 Bytes
9c6594c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 |
"""
Internal cookie handling helpers.
This module contains internal utilities for cookie parsing and manipulation.
These are not part of the public API and may change without notice.
"""
import re
import sys
from http.cookies import Morsel
from typing import List, Optional, Sequence, Tuple, cast
from .log import internal_logger
__all__ = (
"parse_set_cookie_headers",
"parse_cookie_header",
"preserve_morsel_with_coded_value",
)
# Cookie parsing constants
# Allow more characters in cookie names to handle real-world cookies
# that don't strictly follow RFC standards (fixes #2683)
# RFC 6265 defines cookie-name token as per RFC 2616 Section 2.2,
# but many servers send cookies with characters like {} [] () etc.
# This makes the cookie parser more tolerant of real-world cookies
# while still providing some validation to catch obviously malformed names.
_COOKIE_NAME_RE = re.compile(r"^[!#$%&\'()*+\-./0-9:<=>?@A-Z\[\]^_`a-z{|}~]+$")
_COOKIE_KNOWN_ATTRS = frozenset( # AKA Morsel._reserved
(
"path",
"domain",
"max-age",
"expires",
"secure",
"httponly",
"samesite",
"partitioned",
"version",
"comment",
)
)
_COOKIE_BOOL_ATTRS = frozenset( # AKA Morsel._flags
("secure", "httponly", "partitioned")
)
# SimpleCookie's pattern for parsing cookies with relaxed validation
# Based on http.cookies pattern but extended to allow more characters in cookie names
# to handle real-world cookies (fixes #2683)
_COOKIE_PATTERN = re.compile(
r"""
\s* # Optional whitespace at start of cookie
(?P<key> # Start of group 'key'
# aiohttp has extended to include [] for compatibility with real-world cookies
[\w\d!#%&'~_`><@,:/\$\*\+\-\.\^\|\)\(\?\}\{\=\[\]]+? # Any word of at least one letter
) # End of group 'key'
( # Optional group: there may not be a value.
\s*=\s* # Equal Sign
(?P<val> # Start of group 'val'
"(?:[^\\"]|\\.)*" # Any double-quoted string (properly closed)
| # or
"[^";]* # Unmatched opening quote (differs from SimpleCookie - issue #7993)
| # or
# Special case for "expires" attr - RFC 822, RFC 850, RFC 1036, RFC 1123
(\w{3,6}day|\w{3}),\s # Day of the week or abbreviated day (with comma)
[\w\d\s-]{9,11}\s[\d:]{8}\s # Date and time in specific format
(GMT|[+-]\d{4}) # Timezone: GMT or RFC 2822 offset like -0000, +0100
# NOTE: RFC 2822 timezone support is an aiohttp extension
# for issue #4493 - SimpleCookie does NOT support this
| # or
# ANSI C asctime() format: "Wed Jun 9 10:18:14 2021"
# NOTE: This is an aiohttp extension for issue #4327 - SimpleCookie does NOT support this format
\w{3}\s+\w{3}\s+[\s\d]\d\s+\d{2}:\d{2}:\d{2}\s+\d{4}
| # or
[\w\d!#%&'~_`><@,:/\$\*\+\-\.\^\|\)\(\?\}\{\=\[\]]* # Any word or empty string
) # End of group 'val'
)? # End of optional value group
\s* # Any number of spaces.
(\s+|;|$) # Ending either at space, semicolon, or EOS.
""",
re.VERBOSE | re.ASCII,
)
def preserve_morsel_with_coded_value(cookie: Morsel[str]) -> Morsel[str]:
"""
Preserve a Morsel's coded_value exactly as received from the server.
This function ensures that cookie encoding is preserved exactly as sent by
the server, which is critical for compatibility with old servers that have
strict requirements about cookie formats.
This addresses the issue described in https://github.com/aio-libs/aiohttp/pull/1453
where Python's SimpleCookie would re-encode cookies, breaking authentication
with certain servers.
Args:
cookie: A Morsel object from SimpleCookie
Returns:
A Morsel object with preserved coded_value
"""
mrsl_val = cast("Morsel[str]", cookie.get(cookie.key, Morsel()))
# We use __setstate__ instead of the public set() API because it allows us to
# bypass validation and set already validated state. This is more stable than
# setting protected attributes directly and unlikely to change since it would
# break pickling.
mrsl_val.__setstate__( # type: ignore[attr-defined]
{"key": cookie.key, "value": cookie.value, "coded_value": cookie.coded_value}
)
return mrsl_val
_unquote_sub = re.compile(r"\\(?:([0-3][0-7][0-7])|(.))").sub
def _unquote_replace(m: re.Match[str]) -> str:
"""
Replace function for _unquote_sub regex substitution.
Handles escaped characters in cookie values:
- Octal sequences are converted to their character representation
- Other escaped characters are unescaped by removing the backslash
"""
if m[1]:
return chr(int(m[1], 8))
return m[2]
def _unquote(value: str) -> str:
"""
Unquote a cookie value.
Vendored from http.cookies._unquote to ensure compatibility.
Note: The original implementation checked for None, but we've removed
that check since all callers already ensure the value is not None.
"""
# If there aren't any doublequotes,
# then there can't be any special characters. See RFC 2109.
if len(value) < 2:
return value
if value[0] != '"' or value[-1] != '"':
return value
# We have to assume that we must decode this string.
# Down to work.
# Remove the "s
value = value[1:-1]
# Check for special sequences. Examples:
# \012 --> \n
# \" --> "
#
return _unquote_sub(_unquote_replace, value)
def parse_cookie_header(header: str) -> List[Tuple[str, Morsel[str]]]:
"""
Parse a Cookie header according to RFC 6265 Section 5.4.
Cookie headers contain only name-value pairs separated by semicolons.
There are no attributes in Cookie headers - even names that match
attribute names (like 'path' or 'secure') should be treated as cookies.
This parser uses the same regex-based approach as parse_set_cookie_headers
to properly handle quoted values that may contain semicolons.
Args:
header: The Cookie header value to parse
Returns:
List of (name, Morsel) tuples for compatibility with SimpleCookie.update()
"""
if not header:
return []
cookies: List[Tuple[str, Morsel[str]]] = []
i = 0
n = len(header)
while i < n:
# Use the same pattern as parse_set_cookie_headers to find cookies
match = _COOKIE_PATTERN.match(header, i)
if not match:
break
key = match.group("key")
value = match.group("val") or ""
i = match.end(0)
# Validate the name
if not key or not _COOKIE_NAME_RE.match(key):
internal_logger.warning("Can not load cookie: Illegal cookie name %r", key)
continue
# Create new morsel
morsel: Morsel[str] = Morsel()
# Preserve the original value as coded_value (with quotes if present)
# We use __setstate__ instead of the public set() API because it allows us to
# bypass validation and set already validated state. This is more stable than
# setting protected attributes directly and unlikely to change since it would
# break pickling.
morsel.__setstate__( # type: ignore[attr-defined]
{"key": key, "value": _unquote(value), "coded_value": value}
)
cookies.append((key, morsel))
return cookies
def parse_set_cookie_headers(headers: Sequence[str]) -> List[Tuple[str, Morsel[str]]]:
"""
Parse cookie headers using a vendored version of SimpleCookie parsing.
This implementation is based on SimpleCookie.__parse_string to ensure
compatibility with how SimpleCookie parses cookies, including handling
of malformed cookies with missing semicolons.
This function is used for both Cookie and Set-Cookie headers in order to be
forgiving. Ideally we would have followed RFC 6265 Section 5.2 (for Cookie
headers) and RFC 6265 Section 4.2.1 (for Set-Cookie headers), but the
real world data makes it impossible since we need to be a bit more forgiving.
NOTE: This implementation differs from SimpleCookie in handling unmatched quotes.
SimpleCookie will stop parsing when it encounters a cookie value with an unmatched
quote (e.g., 'cookie="value'), causing subsequent cookies to be silently dropped.
This implementation handles unmatched quotes more gracefully to prevent cookie loss.
See https://github.com/aio-libs/aiohttp/issues/7993
"""
parsed_cookies: List[Tuple[str, Morsel[str]]] = []
for header in headers:
if not header:
continue
# Parse cookie string using SimpleCookie's algorithm
i = 0
n = len(header)
current_morsel: Optional[Morsel[str]] = None
morsel_seen = False
while 0 <= i < n:
# Start looking for a cookie
match = _COOKIE_PATTERN.match(header, i)
if not match:
# No more cookies
break
key, value = match.group("key"), match.group("val")
i = match.end(0)
lower_key = key.lower()
if key[0] == "$":
if not morsel_seen:
# We ignore attributes which pertain to the cookie
# mechanism as a whole, such as "$Version".
continue
# Process as attribute
if current_morsel is not None:
attr_lower_key = lower_key[1:]
if attr_lower_key in _COOKIE_KNOWN_ATTRS:
current_morsel[attr_lower_key] = value or ""
elif lower_key in _COOKIE_KNOWN_ATTRS:
if not morsel_seen:
# Invalid cookie string - attribute before cookie
break
if lower_key in _COOKIE_BOOL_ATTRS:
# Boolean attribute with any value should be True
if current_morsel is not None:
if lower_key == "partitioned" and sys.version_info < (3, 14):
dict.__setitem__(current_morsel, lower_key, True)
else:
current_morsel[lower_key] = True
elif value is None:
# Invalid cookie string - non-boolean attribute without value
break
elif current_morsel is not None:
# Regular attribute with value
current_morsel[lower_key] = _unquote(value)
elif value is not None:
# This is a cookie name=value pair
# Validate the name
if key in _COOKIE_KNOWN_ATTRS or not _COOKIE_NAME_RE.match(key):
internal_logger.warning(
"Can not load cookies: Illegal cookie name %r", key
)
current_morsel = None
else:
# Create new morsel
current_morsel = Morsel()
# Preserve the original value as coded_value (with quotes if present)
# We use __setstate__ instead of the public set() API because it allows us to
# bypass validation and set already validated state. This is more stable than
# setting protected attributes directly and unlikely to change since it would
# break pickling.
current_morsel.__setstate__( # type: ignore[attr-defined]
{"key": key, "value": _unquote(value), "coded_value": value}
)
parsed_cookies.append((key, current_morsel))
morsel_seen = True
else:
# Invalid cookie string - no value for non-attribute
break
return parsed_cookies
|