[Python-checkins] [3.12] gh-85110: Preserve relative path in URL without netloc in urllib.parse.urlunsplit() (GH-123179) (#123188)

ambv Fri, 06 Sep 2024 06:48:46 -0700

https://github.com/python/cpython/commit/0edfc668e73d313396ec7e2a3228227b1317bfa4
commit: 0edfc668e73d313396ec7e2a3228227b1317bfa4
branch: 3.12
author: Miss Islington (bot) <[email protected]>
committer: ambv <[email protected]>
date: 2024-09-06T15:48:23+02:00
summary:


[3.12] gh-85110: Preserve relative path in URL without netloc in 
urllib.parse.urlunsplit() (GH-123179) (#123188)

(cherry picked from commit 90c892efeaae28bd849a01b42842f19dcd67b9f4)

Co-authored-by: Serhiy Storchaka <[email protected]>

files:
A Misc/NEWS.d/next/Library/2024-08-20-18-02-27.gh-issue-85110.8_iDQy.rst
M Lib/test/test_urlparse.py
M Lib/urllib/parse.py

diff --git a/Lib/test/test_urlparse.py b/Lib/test/test_urlparse.py
index 4faad733245df9..818e7e93dbbe11 100644
--- a/Lib/test/test_urlparse.py
+++ b/Lib/test/test_urlparse.py
@@ -207,6 +207,9 @@ def test_roundtrips(self):
             ('scheme://///path/to/file',
              ('scheme', '', '///path/to/file', '', '', ''),
              ('scheme', '', '///path/to/file', '', '')),
+            ('file:tmp/junk.txt',
+             ('file', '', 'tmp/junk.txt', '', '', ''),
+             ('file', '', 'tmp/junk.txt', '', '')),
             ('file:///tmp/junk.txt',
              ('file', '', '/tmp/junk.txt', '', '', ''),
              ('file', '', '/tmp/junk.txt', '', '')),
@@ -216,6 +219,18 @@ def test_roundtrips(self):
             ('file://///tmp/junk.txt',
              ('file', '', '///tmp/junk.txt', '', '', ''),
              ('file', '', '///tmp/junk.txt', '', '')),
+            ('http:tmp/junk.txt',
+             ('http', '', 'tmp/junk.txt', '', '', ''),
+             ('http', '', 'tmp/junk.txt', '', '')),
+            ('http://example.com/tmp/junk.txt',
+             ('http', 'example.com', '/tmp/junk.txt', '', '', ''),
+             ('http', 'example.com', '/tmp/junk.txt', '', '')),
+            ('http:///example.com/tmp/junk.txt',
+             ('http', '', '/example.com/tmp/junk.txt', '', '', ''),
+             ('http', '', '/example.com/tmp/junk.txt', '', '')),
+            ('http:////example.com/tmp/junk.txt',
+             ('http', '', '//example.com/tmp/junk.txt', '', '', ''),
+             ('http', '', '//example.com/tmp/junk.txt', '', '')),
             ('imap://mail.python.org/mbox1',
              ('imap', 'mail.python.org', '/mbox1', '', '', ''),
              ('imap', 'mail.python.org', '/mbox1', '', '')),
@@ -260,7 +275,8 @@ def _encode(t):
              ('', '', 'schème:path/to/file', '', '')),
             ]
         for url, parsed, split in str_cases + bytes_cases:
-            self.checkRoundtrips(url, parsed, split)
+            with self.subTest(url):
+                self.checkRoundtrips(url, parsed, split)
 
     def test_roundtrips_normalization(self):
         str_cases = [
@@ -292,7 +308,8 @@ def _encode(t):
                     tuple(x.encode('ascii') for x in t[3]))
         bytes_cases = [_encode(x) for x in str_cases]
         for url, url2, parsed, split in str_cases + bytes_cases:
-            self.checkRoundtrips(url, parsed, split, url2)
+            with self.subTest(url):
+                self.checkRoundtrips(url, parsed, split, url2)
 
     def test_http_roundtrips(self):
         # urllib.parse.urlsplit treats 'http:' as an optimized special case,
@@ -333,11 +350,17 @@ def _encode(t):
                     self.checkRoundtrips(url, parsed, split)
 
     def checkJoin(self, base, relurl, expected):
-        str_components = (base, relurl, expected)
-        self.assertEqual(urllib.parse.urljoin(base, relurl), expected)
-        bytes_components = baseb, relurlb, expectedb = [
-                            x.encode('ascii') for x in str_components]
-        self.assertEqual(urllib.parse.urljoin(baseb, relurlb), expectedb)
+        with self.subTest(base=base, relurl=relurl):
+            self.assertEqual(urllib.parse.urljoin(base, relurl), expected)
+            baseb = base.encode('ascii')
+            relurlb = relurl.encode('ascii')
+            expectedb = expected.encode('ascii')
+            self.assertEqual(urllib.parse.urljoin(baseb, relurlb), expectedb)
+
+            relurl = urllib.parse.urlunsplit(urllib.parse.urlsplit(relurl))
+            self.assertEqual(urllib.parse.urljoin(base, relurl), expected)
+            relurlb = urllib.parse.urlunsplit(urllib.parse.urlsplit(relurlb))
+            self.assertEqual(urllib.parse.urljoin(baseb, relurlb), expectedb)
 
     def test_unparse_parse(self):
         str_cases = ['Python', 
'./Python','x-newscheme://foo.com/stuff','x://y','x:/y','x:/','/',]
diff --git a/Lib/urllib/parse.py b/Lib/urllib/parse.py
index 3932bb99c7e7d1..24815952037fef 100644
--- a/Lib/urllib/parse.py
+++ b/Lib/urllib/parse.py
@@ -525,9 +525,13 @@ def urlunsplit(components):
     empty query; the RFC states that these are equivalent)."""
     scheme, netloc, url, query, fragment, _coerce_result = (
                                           _coerce_args(*components))
-    if netloc or (scheme and scheme in uses_netloc) or url[:2] == '//':
+    if netloc:
         if url and url[:1] != '/': url = '/' + url
-        url = '//' + (netloc or '') + url
+        url = '//' + netloc + url
+    elif url[:2] == '//':
+        url = '//' + url
+    elif scheme and scheme in uses_netloc and (not url or url[:1] == '/'):
+        url = '//' + url
     if scheme:
         url = scheme + ':' + url
     if query:
diff --git 
a/Misc/NEWS.d/next/Library/2024-08-20-18-02-27.gh-issue-85110.8_iDQy.rst 
b/Misc/NEWS.d/next/Library/2024-08-20-18-02-27.gh-issue-85110.8_iDQy.rst
new file mode 100644
index 00000000000000..f22fac16b79c0b
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2024-08-20-18-02-27.gh-issue-85110.8_iDQy.rst
@@ -0,0 +1,2 @@
+Preserve relative path in URL without netloc in
+:func:`urllib.parse.urlunsplit` and :func:`urllib.parse.urlunparse`.

_______________________________________________
Python-checkins mailing list -- [email protected]
To unsubscribe send an email to [email protected]
https://mail.python.org/mailman3/lists/python-checkins.python.org/
Member address: [email protected]

[Python-checkins] [3.12] gh-85110: Preserve relative path in URL without netloc in urllib.parse.urlunsplit() (GH-123179) (#123188)

Reply via email to