https://github.com/python/cpython/commit/d783d7b51d31db568de6b3438f4e805acff663da commit: d783d7b51d31db568de6b3438f4e805acff663da branch: main author: Barney Gale <barney.g...@gmail.com> committer: barneygale <barney.g...@gmail.com> date: 2025-03-18T23:37:12Z summary:
GH-126367: `url2pathname()`: handle NTFS alternate data streams (#131428) Adjust `url2pathname()` to decode embedded colon characters in Windows URIs, rather than bailing out with an `OSError`. files: A Misc/NEWS.d/next/Library/2025-03-18-19-52-49.gh-issue-126367.PRxnuu.rst M Doc/library/urllib.request.rst M Lib/nturl2path.py M Lib/test/test_urllib.py diff --git a/Doc/library/urllib.request.rst b/Doc/library/urllib.request.rst index 969e7daea7105b..14785d21e74a11 100644 --- a/Doc/library/urllib.request.rst +++ b/Doc/library/urllib.request.rst @@ -182,7 +182,9 @@ The :mod:`urllib.request` module defines the following functions: 'C:\\Program Files' .. versionchanged:: 3.14 - Windows drive letters are no longer converted to uppercase. + Windows drive letters are no longer converted to uppercase, and ``:`` + characters not following a drive letter no longer cause an + :exc:`OSError` exception to be raised on Windows. .. function:: getproxies() diff --git a/Lib/nturl2path.py b/Lib/nturl2path.py index 7e13ae3128333d..7b5b82068e989f 100644 --- a/Lib/nturl2path.py +++ b/Lib/nturl2path.py @@ -14,7 +14,7 @@ def url2pathname(url): # ///C:/foo/bar/spam.foo # become # C:\foo\bar\spam.foo - import string, urllib.parse + import urllib.parse if url[:3] == '///': # URL has an empty authority section, so the path begins on the third # character. @@ -25,19 +25,14 @@ def url2pathname(url): if url[:3] == '///': # Skip past extra slash before UNC drive in URL path. url = url[1:] - # Windows itself uses ":" even in URLs. - url = url.replace(':', '|') - if not '|' in url: - # No drive specifier, just convert slashes - # make sure not to convert quoted slashes :-) - return urllib.parse.unquote(url.replace('/', '\\')) - comp = url.split('|') - if len(comp) != 2 or comp[0][-1] not in string.ascii_letters: - error = 'Bad URL: ' + url - raise OSError(error) - drive = comp[0][-1] - tail = urllib.parse.unquote(comp[1].replace('/', '\\')) - return drive + ':' + tail + else: + if url[:1] == '/' and url[2:3] in (':', '|'): + # Skip past extra slash before DOS drive in URL path. + url = url[1:] + if url[1:2] == '|': + # Older URLs use a pipe after a drive letter + url = url[:1] + ':' + url[2:] + return urllib.parse.unquote(url.replace('/', '\\')) def pathname2url(p): """OS-specific conversion from a file system path to a relative URL diff --git a/Lib/test/test_urllib.py b/Lib/test/test_urllib.py index 4842428d6fd103..ed23215c4d0ab7 100644 --- a/Lib/test/test_urllib.py +++ b/Lib/test/test_urllib.py @@ -1484,6 +1484,7 @@ def test_pathname2url_nonascii(self): 'test specific to Windows pathnames.') def test_url2pathname_win(self): fn = urllib.request.url2pathname + self.assertEqual(fn('/'), '\\') self.assertEqual(fn('/C:/'), 'C:\\') self.assertEqual(fn("///C|"), 'C:') self.assertEqual(fn("///C:"), 'C:') @@ -1502,8 +1503,10 @@ def test_url2pathname_win(self): self.assertEqual(fn('/C|/path/to/file'), 'C:\\path\\to\\file') self.assertEqual(fn('///C|/path/to/file'), 'C:\\path\\to\\file') self.assertEqual(fn("///C|/foo/bar/spam.foo"), 'C:\\foo\\bar\\spam.foo') - # Non-ASCII drive letter - self.assertRaises(IOError, fn, "///\u00e8|/") + # Colons in URI + self.assertEqual(fn('///\u00e8|/'), '\u00e8:\\') + self.assertEqual(fn('//host/share/spam.txt:eggs'), '\\\\host\\share\\spam.txt:eggs') + self.assertEqual(fn('///c:/spam.txt:eggs'), 'c:\\spam.txt:eggs') # UNC paths self.assertEqual(fn('//server/path/to/file'), '\\\\server\\path\\to\\file') self.assertEqual(fn('////server/path/to/file'), '\\\\server\\path\\to\\file') diff --git a/Misc/NEWS.d/next/Library/2025-03-18-19-52-49.gh-issue-126367.PRxnuu.rst b/Misc/NEWS.d/next/Library/2025-03-18-19-52-49.gh-issue-126367.PRxnuu.rst new file mode 100644 index 00000000000000..cebfefbda486f7 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-03-18-19-52-49.gh-issue-126367.PRxnuu.rst @@ -0,0 +1,3 @@ +Fix issue where :func:`urllib.request.url2pathname` raised :exc:`OSError` +when given a Windows URI containing a colon character not following a drive +letter, such as before an NTFS alternate data stream. _______________________________________________ Python-checkins mailing list -- python-checkins@python.org To unsubscribe send an email to python-checkins-le...@python.org https://mail.python.org/mailman3/lists/python-checkins.python.org/ Member address: arch...@mail-archive.com