https://github.com/python/cpython/commit/fd133d4f21cd7f5cbf6bcf332290ce52e5501167
commit: fd133d4f21cd7f5cbf6bcf332290ce52e5501167
branch: main
author: Barney Gale <[email protected]>
committer: barneygale <[email protected]>
date: 2024-11-22T00:29:05Z
summary:

GH-126601: `pathname2url()`: handle NTFS alternate data streams (#126760)

Adjust `pathname2url()` to encode embedded colon characters in Windows
paths, rather than bailing out with an `OSError`.

Co-authored-by: Steve Dower <[email protected]>

files:
A Misc/NEWS.d/next/Library/2024-11-12-20-05-09.gh-issue-126601.Nj7bA9.rst
M Doc/library/urllib.request.rst
M Lib/nturl2path.py
M Lib/test/test_urllib.py

diff --git a/Doc/library/urllib.request.rst b/Doc/library/urllib.request.rst
index cdd58b84a995b7..e0831bf7e65ad2 100644
--- a/Doc/library/urllib.request.rst
+++ b/Doc/library/urllib.request.rst
@@ -152,6 +152,11 @@ The :mod:`urllib.request` module defines the following 
functions:
    the path component of a URL.  This does not produce a complete URL.  The 
return
    value will already be quoted using the :func:`~urllib.parse.quote` function.
 
+   .. versionchanged:: 3.14
+      On Windows, ``:`` characters not following a drive letter are quoted. In
+      previous versions, :exc:`OSError` was raised if a colon character was
+      found in any position other than the second character.
+
 
 .. function:: url2pathname(path)
 
diff --git a/Lib/nturl2path.py b/Lib/nturl2path.py
index 255eb2f547c2ce..ed7880fd1a775f 100644
--- a/Lib/nturl2path.py
+++ b/Lib/nturl2path.py
@@ -40,6 +40,7 @@ def pathname2url(p):
     #   C:\foo\bar\spam.foo
     # becomes
     #   ///C:/foo/bar/spam.foo
+    import ntpath
     import urllib.parse
     # First, clean up some special forms. We are going to sacrifice
     # the additional information anyway
@@ -48,16 +49,13 @@ def pathname2url(p):
         p = p[4:]
         if p[:4].upper() == 'UNC/':
             p = '//' + p[4:]
-        elif p[1:2] != ':':
-            raise OSError('Bad path: ' + p)
-    if not ':' in p:
-        # No DOS drive specified, just quote the pathname
-        return urllib.parse.quote(p)
-    comp = p.split(':', maxsplit=2)
-    if len(comp) != 2 or len(comp[0]) > 1:
-        error = 'Bad path: ' + p
-        raise OSError(error)
+    drive, tail = ntpath.splitdrive(p)
+    if drive[1:] == ':':
+        # DOS drive specified. Add three slashes to the start, producing
+        # an authority section with a zero-length authority, and a path
+        # section starting with a single slash.
+        drive = f'///{drive.upper()}'
 
-    drive = urllib.parse.quote(comp[0].upper())
-    tail = urllib.parse.quote(comp[1])
-    return '///' + drive + ':' + tail
+    drive = urllib.parse.quote(drive, safe='/:')
+    tail = urllib.parse.quote(tail)
+    return drive + tail
diff --git a/Lib/test/test_urllib.py b/Lib/test/test_urllib.py
index c66b1c49c316e6..3e5dc256d317a7 100644
--- a/Lib/test/test_urllib.py
+++ b/Lib/test/test_urllib.py
@@ -1429,8 +1429,9 @@ def test_pathname2url_win(self):
         self.assertEqual(fn('C:\\a\\b%#c'), '///C:/a/b%25%23c')
         self.assertEqual(fn('C:\\a\\b\xe9'), '///C:/a/b%C3%A9')
         self.assertEqual(fn('C:\\foo\\bar\\spam.foo'), 
"///C:/foo/bar/spam.foo")
-        # Long drive letter
-        self.assertRaises(IOError, fn, "XX:\\")
+        # NTFS alternate data streams
+        self.assertEqual(fn('C:\\foo:bar'), '///C:/foo%3Abar')
+        self.assertEqual(fn('foo:bar'), 'foo%3Abar')
         # No drive letter
         self.assertEqual(fn("\\folder\\test\\"), '/folder/test/')
         self.assertEqual(fn("\\\\folder\\test\\"), '//folder/test/')
diff --git 
a/Misc/NEWS.d/next/Library/2024-11-12-20-05-09.gh-issue-126601.Nj7bA9.rst 
b/Misc/NEWS.d/next/Library/2024-11-12-20-05-09.gh-issue-126601.Nj7bA9.rst
new file mode 100644
index 00000000000000..11e2b7350a0e48
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2024-11-12-20-05-09.gh-issue-126601.Nj7bA9.rst
@@ -0,0 +1,3 @@
+Fix issue where :func:`urllib.request.pathname2url` raised :exc:`OSError`
+when given a Windows path containing a colon character not following a
+drive letter, such as before an NTFS alternate data stream.

_______________________________________________
Python-checkins mailing list -- [email protected]
To unsubscribe send an email to [email protected]
https://mail.python.org/mailman3/lists/python-checkins.python.org/
Member address: [email protected]

Reply via email to