https://github.com/python/cpython/commit/ccad61e35d240972d14f993507566706fbf419f1
commit: ccad61e35d240972d14f993507566706fbf419f1
branch: main
author: Barney Gale <barney.g...@gmail.com>
committer: barneygale <barney.g...@gmail.com>
date: 2025-04-14T01:49:02+01:00
summary:

GH-125866: Support complete "file:" URLs in urllib (#132378)

Add optional *add_scheme* argument to `urllib.request.pathname2url()`; when
set to true, a complete URL is returned. Likewise add optional
*require_scheme* argument to `url2pathname()`; when set to true, a complete
URL is accepted.

Co-authored-by: Bénédikt Tran <10796600+picn...@users.noreply.github.com>

files:
A Misc/NEWS.d/next/Library/2025-04-10-21-43-04.gh-issue-125866.EZ9X8D.rst
M Doc/library/urllib.request.rst
M Doc/whatsnew/3.14.rst
M Lib/pathlib/__init__.py
M Lib/test/test_pathlib/test_pathlib.py
M Lib/test/test_urllib.py
M Lib/test/test_urllib2.py
M Lib/test/test_urllib2net.py
M Lib/urllib/request.py

diff --git a/Doc/library/urllib.request.rst b/Doc/library/urllib.request.rst
index edfc249eb43c78..a5f1b9b292a85a 100644
--- a/Doc/library/urllib.request.rst
+++ b/Doc/library/urllib.request.rst
@@ -146,16 +146,19 @@ The :mod:`urllib.request` module defines the following 
functions:
    attribute to modify its position in the handlers list.
 
 
-.. function:: pathname2url(path)
+.. function:: pathname2url(path, *, add_scheme=False)
 
    Convert the given local path to a ``file:`` URL. This function uses
-   :func:`~urllib.parse.quote` function to encode the path. For historical
-   reasons, the return value omits the ``file:`` scheme prefix. This example
-   shows the function being used on Windows::
+   :func:`~urllib.parse.quote` function to encode the path.
+
+   If *add_scheme* is false (the default), the return value omits the
+   ``file:`` scheme prefix. Set *add_scheme* to true to return a complete URL.
+
+   This example shows the function being used on Windows::
 
       >>> from urllib.request import pathname2url
       >>> path = 'C:\\Program Files'
-      >>> 'file:' + pathname2url(path)
+      >>> pathname2url(path, add_scheme=True)
       'file:///C:/Program%20Files'
 
    .. versionchanged:: 3.14
@@ -168,17 +171,25 @@ The :mod:`urllib.request` module defines the following 
functions:
       sections. For example, the path ``/etc/hosts`` is converted to
       the URL ``///etc/hosts``.
 
+   .. versionchanged:: next
+      The *add_scheme* argument was added.
+
 
-.. function:: url2pathname(url)
+.. function:: url2pathname(url, *, require_scheme=False)
 
    Convert the given ``file:`` URL to a local path. This function uses
-   :func:`~urllib.parse.unquote` to decode the URL. For historical reasons,
-   the given value *must* omit the ``file:`` scheme prefix. This example shows
-   the function being used on Windows::
+   :func:`~urllib.parse.unquote` to decode the URL.
+
+   If *require_scheme* is false (the default), the given value should omit a
+   ``file:`` scheme prefix. If *require_scheme* is set to true, the given
+   value should include the prefix; a :exc:`~urllib.error.URLError` is raised
+   if it doesn't.
+
+   This example shows the function being used on Windows::
 
       >>> from urllib.request import url2pathname
       >>> url = 'file:///C:/Program%20Files'
-      >>> url2pathname(url.removeprefix('file:'))
+      >>> url2pathname(url, require_scheme=True)
       'C:\\Program Files'
 
    .. versionchanged:: 3.14
@@ -193,6 +204,9 @@ The :mod:`urllib.request` module defines the following 
functions:
       returned (as before), and on other platforms a
       :exc:`~urllib.error.URLError` is raised.
 
+   .. versionchanged:: next
+      The *require_scheme* argument was added.
+
 
 .. function:: getproxies()
 
diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst
index 421d12660b7956..0e30500fc9b997 100644
--- a/Doc/whatsnew/3.14.rst
+++ b/Doc/whatsnew/3.14.rst
@@ -1218,16 +1218,20 @@ urllib
   supporting SHA-256 digest authentication as specified in :rfc:`7616`.
   (Contributed by Calvin Bui in :gh:`128193`.)
 
-* Improve standards compliance when parsing and emitting ``file:`` URLs.
+* Improve ergonomics and standards compliance when parsing and emitting
+  ``file:`` URLs.
 
   In :func:`urllib.request.url2pathname`:
 
+  - Accept a complete URL when the new *require_scheme* argument is set to
+    true.
   - Discard URL authorities that resolve to a local IP address.
   - Raise :exc:`~urllib.error.URLError` if a URL authority doesn't resolve
-    to ``localhost``, except on Windows where we return a UNC path.
+    to a local IP address, except on Windows where we return a UNC path.
 
   In :func:`urllib.request.pathname2url`:
 
+  - Return a complete URL when the new *add_scheme* argument is set to true.
   - Include an empty URL authority when a path begins with a slash. For
     example, the path ``/etc/hosts`` is converted to the URL ``///etc/hosts``.
 
diff --git a/Lib/pathlib/__init__.py b/Lib/pathlib/__init__.py
index 43a5440e0132ff..12cf9f579cb32d 100644
--- a/Lib/pathlib/__init__.py
+++ b/Lib/pathlib/__init__.py
@@ -1271,17 +1271,15 @@ def as_uri(self):
         if not self.is_absolute():
             raise ValueError("relative paths can't be expressed as file URIs")
         from urllib.request import pathname2url
-        return f'file:{pathname2url(str(self))}'
+        return pathname2url(str(self), add_scheme=True)
 
     @classmethod
     def from_uri(cls, uri):
         """Return a new path from the given 'file' URI."""
-        if not uri.startswith('file:'):
-            raise ValueError(f"URI does not start with 'file:': {uri!r}")
         from urllib.error import URLError
         from urllib.request import url2pathname
         try:
-            path = cls(url2pathname(uri.removeprefix('file:')))
+            path = cls(url2pathname(uri, require_scheme=True))
         except URLError as exc:
             raise ValueError(exc.reason) from None
         if not path.is_absolute():
diff --git a/Lib/test/test_pathlib/test_pathlib.py 
b/Lib/test/test_pathlib/test_pathlib.py
index 21bc2f9e68b811..41a79d0dceb0eb 100644
--- a/Lib/test/test_pathlib/test_pathlib.py
+++ b/Lib/test/test_pathlib/test_pathlib.py
@@ -3302,8 +3302,8 @@ def test_from_uri_posix(self):
     @needs_posix
     def test_from_uri_pathname2url_posix(self):
         P = self.cls
-        self.assertEqual(P.from_uri('file:' + pathname2url('/foo/bar')), 
P('/foo/bar'))
-        self.assertEqual(P.from_uri('file:' + pathname2url('//foo/bar')), 
P('//foo/bar'))
+        self.assertEqual(P.from_uri(pathname2url('/foo/bar', 
add_scheme=True)), P('/foo/bar'))
+        self.assertEqual(P.from_uri(pathname2url('//foo/bar', 
add_scheme=True)), P('//foo/bar'))
 
     @needs_windows
     def test_absolute_windows(self):
diff --git a/Lib/test/test_urllib.py b/Lib/test/test_urllib.py
index ecf429e17811a4..abfbed8840ca03 100644
--- a/Lib/test/test_urllib.py
+++ b/Lib/test/test_urllib.py
@@ -476,7 +476,7 @@ def test_missing_localfile(self):
 
     def test_file_notexists(self):
         fd, tmp_file = tempfile.mkstemp()
-        tmp_file_canon_url = 'file:' + urllib.request.pathname2url(tmp_file)
+        tmp_file_canon_url = urllib.request.pathname2url(tmp_file, 
add_scheme=True)
         parsed = urllib.parse.urlsplit(tmp_file_canon_url)
         tmp_fileurl = parsed._replace(netloc='localhost').geturl()
         try:
@@ -620,7 +620,7 @@ def tearDown(self):
 
     def constructLocalFileUrl(self, filePath):
         filePath = os.path.abspath(filePath)
-        return "file:" + urllib.request.pathname2url(filePath)
+        return urllib.request.pathname2url(filePath, add_scheme=True)
 
     def createNewTempFile(self, data=b""):
         """Creates a new temporary file containing the specified data,
@@ -1436,6 +1436,21 @@ def test_pathname2url(self):
         self.assertEqual(fn(f'{sep}a{sep}b.c'), '///a/b.c')
         self.assertEqual(fn(f'{sep}a{sep}b%#c'), '///a/b%25%23c')
 
+    def test_pathname2url_add_scheme(self):
+        sep = os.path.sep
+        subtests = [
+            ('', 'file:'),
+            (sep, 'file:///'),
+            ('a', 'file:a'),
+            (f'a{sep}b.c', 'file:a/b.c'),
+            (f'{sep}a{sep}b.c', 'file:///a/b.c'),
+            (f'{sep}a{sep}b%#c', 'file:///a/b%25%23c'),
+        ]
+        for path, expected_url in subtests:
+            with self.subTest(path=path):
+                self.assertEqual(
+                    urllib.request.pathname2url(path, add_scheme=True), 
expected_url)
+
     @unittest.skipUnless(sys.platform == 'win32',
                          'test specific to Windows pathnames.')
     def test_pathname2url_win(self):
@@ -1503,6 +1518,49 @@ def test_url2pathname(self):
         self.assertEqual(fn('//localhost/foo/bar'), f'{sep}foo{sep}bar')
         self.assertEqual(fn('///foo/bar'), f'{sep}foo{sep}bar')
         self.assertEqual(fn('////foo/bar'), f'{sep}{sep}foo{sep}bar')
+        self.assertEqual(fn('data:blah'), 'data:blah')
+        self.assertEqual(fn('data://blah'), f'data:{sep}{sep}blah')
+
+    def test_url2pathname_require_scheme(self):
+        sep = os.path.sep
+        subtests = [
+            ('file:', ''),
+            ('FILE:', ''),
+            ('FiLe:', ''),
+            ('file:/', f'{sep}'),
+            ('file:///', f'{sep}'),
+            ('file:////', f'{sep}{sep}'),
+            ('file:foo', 'foo'),
+            ('file:foo/bar', f'foo{sep}bar'),
+            ('file:/foo/bar', f'{sep}foo{sep}bar'),
+            ('file://localhost/foo/bar', f'{sep}foo{sep}bar'),
+            ('file:///foo/bar', f'{sep}foo{sep}bar'),
+            ('file:////foo/bar', f'{sep}{sep}foo{sep}bar'),
+            ('file:data:blah', 'data:blah'),
+            ('file:data://blah', f'data:{sep}{sep}blah'),
+        ]
+        for url, expected_path in subtests:
+            with self.subTest(url=url):
+                self.assertEqual(
+                    urllib.request.url2pathname(url, require_scheme=True),
+                    expected_path)
+
+        error_subtests = [
+            '',
+            ':',
+            'foo',
+            'http:foo',
+            'localfile:foo',
+            'data:foo',
+            'data:file:foo',
+            'data:file://foo',
+        ]
+        for url in error_subtests:
+            with self.subTest(url=url):
+                self.assertRaises(
+                    urllib.error.URLError,
+                    urllib.request.url2pathname,
+                    url, require_scheme=True)
 
     @unittest.skipUnless(sys.platform == 'win32',
                          'test specific to Windows pathnames.')
diff --git a/Lib/test/test_urllib2.py b/Lib/test/test_urllib2.py
index 088ee4c4f90803..f44d324b3ab763 100644
--- a/Lib/test/test_urllib2.py
+++ b/Lib/test/test_urllib2.py
@@ -809,7 +809,7 @@ def test_file(self):
 
         TESTFN = os_helper.TESTFN
         towrite = b"hello, world\n"
-        canonurl = 'file:' + 
urllib.request.pathname2url(os.path.abspath(TESTFN))
+        canonurl = urllib.request.pathname2url(os.path.abspath(TESTFN), 
add_scheme=True)
         parsed = urlsplit(canonurl)
         if parsed.netloc:
             raise unittest.SkipTest("non-local working directory")
diff --git a/Lib/test/test_urllib2net.py b/Lib/test/test_urllib2net.py
index b84290a7368c29..e6a18476908495 100644
--- a/Lib/test/test_urllib2net.py
+++ b/Lib/test/test_urllib2net.py
@@ -150,7 +150,7 @@ def test_file(self):
             f.write('hi there\n')
             f.close()
             urls = [
-                'file:' + urllib.request.pathname2url(os.path.abspath(TESTFN)),
+                urllib.request.pathname2url(os.path.abspath(TESTFN), 
add_scheme=True),
                 ('file:///nonsensename/etc/passwd', None,
                  urllib.error.URLError),
                 ]
diff --git a/Lib/urllib/request.py b/Lib/urllib/request.py
index 84c075ec8b359f..2c9c7b6ca5394d 100644
--- a/Lib/urllib/request.py
+++ b/Lib/urllib/request.py
@@ -1466,17 +1466,16 @@ def get_names(self):
     def open_local_file(self, req):
         import email.utils
         import mimetypes
-        filename = _splittype(req.full_url)[1]
-        localfile = url2pathname(filename)
+        localfile = url2pathname(req.full_url, require_scheme=True)
         try:
             stats = os.stat(localfile)
             size = stats.st_size
             modified = email.utils.formatdate(stats.st_mtime, usegmt=True)
-            mtype = mimetypes.guess_type(filename)[0]
+            mtype = mimetypes.guess_file_type(localfile)[0]
             headers = email.message_from_string(
                 'Content-type: %s\nContent-length: %d\nLast-modified: %s\n' %
                 (mtype or 'text/plain', size, modified))
-            origurl = f'file:{pathname2url(localfile)}'
+            origurl = pathname2url(localfile, add_scheme=True)
             return addinfourl(open(localfile, 'rb'), headers, origurl)
         except OSError as exp:
             raise URLError(exp, exp.filename)
@@ -1635,9 +1634,16 @@ def data_open(self, req):
 
 # Code move from the old urllib module
 
-def url2pathname(url):
-    """OS-specific conversion from a relative URL of the 'file' scheme
-    to a file system path; not recommended for general use."""
+def url2pathname(url, *, require_scheme=False):
+    """Convert the given file URL to a local file system path.
+
+    The 'file:' scheme prefix must be omitted unless *require_scheme*
+    is set to true.
+    """
+    if require_scheme:
+        scheme, url = _splittype(url)
+        if scheme != 'file':
+            raise URLError("URL is missing a 'file:' scheme")
     authority, url = _splithost(url)
     if os.name == 'nt':
         if not _is_local_authority(authority):
@@ -1661,13 +1667,17 @@ def url2pathname(url):
     return unquote(url, encoding=encoding, errors=errors)
 
 
-def pathname2url(pathname):
-    """OS-specific conversion from a file system path to a relative URL
-    of the 'file' scheme; not recommended for general use."""
+def pathname2url(pathname, *, add_scheme=False):
+    """Convert the given local file system path to a file URL.
+
+    The 'file:' scheme prefix is omitted unless *add_scheme*
+    is set to true.
+    """
     if os.name == 'nt':
         pathname = pathname.replace('\\', '/')
     encoding = sys.getfilesystemencoding()
     errors = sys.getfilesystemencodeerrors()
+    scheme = 'file:' if add_scheme else ''
     drive, root, tail = os.path.splitroot(pathname)
     if drive:
         # First, clean up some special forms. We are going to sacrifice the
@@ -1689,7 +1699,7 @@ def pathname2url(pathname):
         # avoids interpreting the path as a URL authority.
         root = '//' + root
     tail = quote(tail, encoding=encoding, errors=errors)
-    return drive + root + tail
+    return scheme + drive + root + tail
 
 
 # Utility functions
diff --git 
a/Misc/NEWS.d/next/Library/2025-04-10-21-43-04.gh-issue-125866.EZ9X8D.rst 
b/Misc/NEWS.d/next/Library/2025-04-10-21-43-04.gh-issue-125866.EZ9X8D.rst
new file mode 100644
index 00000000000000..0d60a16a17753a
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2025-04-10-21-43-04.gh-issue-125866.EZ9X8D.rst
@@ -0,0 +1,4 @@
+Add optional *add_scheme* argument to :func:`urllib.request.pathname2url`; when
+set to true, a complete URL is returned. Likewise add optional *require_scheme*
+argument to :func:`~urllib.request.url2pathname`; when set to true, a complete
+URL is accepted.

_______________________________________________
Python-checkins mailing list -- python-checkins@python.org
To unsubscribe send an email to python-checkins-le...@python.org
https://mail.python.org/mailman3/lists/python-checkins.python.org/
Member address: arch...@mail-archive.com

Reply via email to