https://github.com/python/cpython/commit/d783d7b51d31db568de6b3438f4e805acff663da
commit: d783d7b51d31db568de6b3438f4e805acff663da
branch: main
author: Barney Gale <barney.g...@gmail.com>
committer: barneygale <barney.g...@gmail.com>
date: 2025-03-18T23:37:12Z
summary:

GH-126367: `url2pathname()`: handle NTFS alternate data streams (#131428)

Adjust `url2pathname()` to decode embedded colon characters in Windows
URIs, rather than bailing out with an `OSError`.

files:
A Misc/NEWS.d/next/Library/2025-03-18-19-52-49.gh-issue-126367.PRxnuu.rst
M Doc/library/urllib.request.rst
M Lib/nturl2path.py
M Lib/test/test_urllib.py

diff --git a/Doc/library/urllib.request.rst b/Doc/library/urllib.request.rst
index 969e7daea7105b..14785d21e74a11 100644
--- a/Doc/library/urllib.request.rst
+++ b/Doc/library/urllib.request.rst
@@ -182,7 +182,9 @@ The :mod:`urllib.request` module defines the following 
functions:
       'C:\\Program Files'
 
    .. versionchanged:: 3.14
-      Windows drive letters are no longer converted to uppercase.
+      Windows drive letters are no longer converted to uppercase, and ``:``
+      characters not following a drive letter no longer cause an
+      :exc:`OSError` exception to be raised on Windows.
 
 
 .. function:: getproxies()
diff --git a/Lib/nturl2path.py b/Lib/nturl2path.py
index 7e13ae3128333d..7b5b82068e989f 100644
--- a/Lib/nturl2path.py
+++ b/Lib/nturl2path.py
@@ -14,7 +14,7 @@ def url2pathname(url):
     #   ///C:/foo/bar/spam.foo
     # become
     #   C:\foo\bar\spam.foo
-    import string, urllib.parse
+    import urllib.parse
     if url[:3] == '///':
         # URL has an empty authority section, so the path begins on the third
         # character.
@@ -25,19 +25,14 @@ def url2pathname(url):
     if url[:3] == '///':
         # Skip past extra slash before UNC drive in URL path.
         url = url[1:]
-    # Windows itself uses ":" even in URLs.
-    url = url.replace(':', '|')
-    if not '|' in url:
-        # No drive specifier, just convert slashes
-        # make sure not to convert quoted slashes :-)
-        return urllib.parse.unquote(url.replace('/', '\\'))
-    comp = url.split('|')
-    if len(comp) != 2 or comp[0][-1] not in string.ascii_letters:
-        error = 'Bad URL: ' + url
-        raise OSError(error)
-    drive = comp[0][-1]
-    tail = urllib.parse.unquote(comp[1].replace('/', '\\'))
-    return drive + ':' + tail
+    else:
+        if url[:1] == '/' and url[2:3] in (':', '|'):
+            # Skip past extra slash before DOS drive in URL path.
+            url = url[1:]
+        if url[1:2] == '|':
+            # Older URLs use a pipe after a drive letter
+            url = url[:1] + ':' + url[2:]
+    return urllib.parse.unquote(url.replace('/', '\\'))
 
 def pathname2url(p):
     """OS-specific conversion from a file system path to a relative URL
diff --git a/Lib/test/test_urllib.py b/Lib/test/test_urllib.py
index 4842428d6fd103..ed23215c4d0ab7 100644
--- a/Lib/test/test_urllib.py
+++ b/Lib/test/test_urllib.py
@@ -1484,6 +1484,7 @@ def test_pathname2url_nonascii(self):
                          'test specific to Windows pathnames.')
     def test_url2pathname_win(self):
         fn = urllib.request.url2pathname
+        self.assertEqual(fn('/'), '\\')
         self.assertEqual(fn('/C:/'), 'C:\\')
         self.assertEqual(fn("///C|"), 'C:')
         self.assertEqual(fn("///C:"), 'C:')
@@ -1502,8 +1503,10 @@ def test_url2pathname_win(self):
         self.assertEqual(fn('/C|/path/to/file'), 'C:\\path\\to\\file')
         self.assertEqual(fn('///C|/path/to/file'), 'C:\\path\\to\\file')
         self.assertEqual(fn("///C|/foo/bar/spam.foo"), 
'C:\\foo\\bar\\spam.foo')
-        # Non-ASCII drive letter
-        self.assertRaises(IOError, fn, "///\u00e8|/")
+        # Colons in URI
+        self.assertEqual(fn('///\u00e8|/'), '\u00e8:\\')
+        self.assertEqual(fn('//host/share/spam.txt:eggs'), 
'\\\\host\\share\\spam.txt:eggs')
+        self.assertEqual(fn('///c:/spam.txt:eggs'), 'c:\\spam.txt:eggs')
         # UNC paths
         self.assertEqual(fn('//server/path/to/file'), 
'\\\\server\\path\\to\\file')
         self.assertEqual(fn('////server/path/to/file'), 
'\\\\server\\path\\to\\file')
diff --git 
a/Misc/NEWS.d/next/Library/2025-03-18-19-52-49.gh-issue-126367.PRxnuu.rst 
b/Misc/NEWS.d/next/Library/2025-03-18-19-52-49.gh-issue-126367.PRxnuu.rst
new file mode 100644
index 00000000000000..cebfefbda486f7
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2025-03-18-19-52-49.gh-issue-126367.PRxnuu.rst
@@ -0,0 +1,3 @@
+Fix issue where :func:`urllib.request.url2pathname` raised :exc:`OSError`
+when given a Windows URI containing a colon character not following a drive
+letter, such as before an NTFS alternate data stream.

_______________________________________________
Python-checkins mailing list -- python-checkins@python.org
To unsubscribe send an email to python-checkins-le...@python.org
https://mail.python.org/mailman3/lists/python-checkins.python.org/
Member address: arch...@mail-archive.com

Reply via email to