https://github.com/python/cpython/commit/7ebbd271444d89218870169624921b795a717470 commit: 7ebbd271444d89218870169624921b795a717470 branch: main author: Serhiy Storchaka <storch...@gmail.com> committer: serhiy-storchaka <storch...@gmail.com> date: 2025-04-09T11:08:04+03:00 summary:
gh-130631: Make join_header_words() more similar to the original Perl version (GH-130632) * Always quote strings with non-ASCII characters. * Allow some non-separator and non-control characters (like "." or "-") be unquoted. * Always quote strings that end with "\n". * Use the fullmatch() method for clarity and optimization. files: A Misc/NEWS.d/next/Library/2025-02-27-14-25-01.gh-issue-130631.dmZcZM.rst M Lib/http/cookiejar.py M Lib/test/test_http_cookiejar.py diff --git a/Lib/http/cookiejar.py b/Lib/http/cookiejar.py index fb0fd2e97999af..68cf16c93cc1c8 100644 --- a/Lib/http/cookiejar.py +++ b/Lib/http/cookiejar.py @@ -430,6 +430,7 @@ def split_header_words(header_values): if pairs: result.append(pairs) return result +HEADER_JOIN_TOKEN_RE = re.compile(r"[!#$%&'*+\-.^_`|~0-9A-Za-z]+") HEADER_JOIN_ESCAPE_RE = re.compile(r"([\"\\])") def join_header_words(lists): """Do the inverse (almost) of the conversion done by split_header_words. @@ -437,10 +438,10 @@ def join_header_words(lists): Takes a list of lists of (key, value) pairs and produces a single header value. Attribute values are quoted if needed. - >>> join_header_words([[("text/plain", None), ("charset", "iso-8859-1")]]) - 'text/plain; charset="iso-8859-1"' - >>> join_header_words([[("text/plain", None)], [("charset", "iso-8859-1")]]) - 'text/plain, charset="iso-8859-1"' + >>> join_header_words([[("text/plain", None), ("charset", "iso-8859/1")]]) + 'text/plain; charset="iso-8859/1"' + >>> join_header_words([[("text/plain", None)], [("charset", "iso-8859/1")]]) + 'text/plain, charset="iso-8859/1"' """ headers = [] @@ -448,7 +449,7 @@ def join_header_words(lists): attr = [] for k, v in pairs: if v is not None: - if not re.search(r"^\w+$", v): + if not HEADER_JOIN_TOKEN_RE.fullmatch(v): v = HEADER_JOIN_ESCAPE_RE.sub(r"\\\1", v) # escape " and \ v = '"%s"' % v k = "%s=%s" % (k, v) diff --git a/Lib/test/test_http_cookiejar.py b/Lib/test/test_http_cookiejar.py index 25a671809d4499..cf02c5b43a2e43 100644 --- a/Lib/test/test_http_cookiejar.py +++ b/Lib/test/test_http_cookiejar.py @@ -285,11 +285,21 @@ def test_roundtrip(self): ("foo=bar;bar=baz", "foo=bar; bar=baz"), ('foo bar baz', "foo; bar; baz"), (r'foo="\"" bar="\\"', r'foo="\""; bar="\\"'), + ("föo=bär", 'föo="bär"'), ('foo,,,bar', 'foo, bar'), ('foo=bar,bar=baz', 'foo=bar, bar=baz'), + ("foo=\n", 'foo=""'), + ('foo="\n"', 'foo="\n"'), + ('foo=bar\n', 'foo=bar'), + ('foo="bar\n"', 'foo="bar\n"'), + ('foo=bar\nbaz', 'foo=bar; baz'), + ('foo="bar\nbaz"', 'foo="bar\nbaz"'), ('text/html; charset=iso-8859-1', - 'text/html; charset="iso-8859-1"'), + 'text/html; charset=iso-8859-1'), + + ('text/html; charset="iso-8859/1"', + 'text/html; charset="iso-8859/1"'), ('foo="bar"; port="80,81"; discard, bar=baz', 'foo=bar; port="80,81"; discard, bar=baz'), @@ -297,8 +307,8 @@ def test_roundtrip(self): (r'Basic realm="\"foo\\\\bar\""', r'Basic; realm="\"foo\\\\bar\""'), - ('n; foo="foo;_", bar=foo!_', - 'n; foo="foo;_", bar="foo!_"'), + ('n; foo="foo;_", bar="foo,_"', + 'n; foo="foo;_", bar="foo,_"'), ] for arg, expect in tests: @@ -553,7 +563,7 @@ def test_missing_value(self): self.assertIsNone(cookie.value) self.assertEqual(cookie.name, '"spam"') self.assertEqual(lwp_cookie_str(cookie), ( - r'"spam"; path="/foo/"; domain="www.acme.com"; ' + r'"spam"; path="/foo/"; domain=www.acme.com; ' 'path_spec; discard; version=0')) old_str = repr(c) c.save(ignore_expires=True, ignore_discard=True) diff --git a/Misc/NEWS.d/next/Library/2025-02-27-14-25-01.gh-issue-130631.dmZcZM.rst b/Misc/NEWS.d/next/Library/2025-02-27-14-25-01.gh-issue-130631.dmZcZM.rst new file mode 100644 index 00000000000000..c9dc9ba87878c8 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-02-27-14-25-01.gh-issue-130631.dmZcZM.rst @@ -0,0 +1,3 @@ +:func:`!http.cookiejar.join_header_words` is now more similar to the original +Perl version. It now quotes the same set of characters and always quote +values that end with ``"\n"``. _______________________________________________ Python-checkins mailing list -- python-checkins@python.org To unsubscribe send an email to python-checkins-le...@python.org https://mail.python.org/mailman3/lists/python-checkins.python.org/ Member address: arch...@mail-archive.com