https://github.com/python/cpython/commit/746a0c5bc8979d6265e9669987eff52e7631222a
commit: 746a0c5bc8979d6265e9669987eff52e7631222a
branch: 3.13
author: Miss Islington (bot) <[email protected]>
committer: barneygale <[email protected]>
date: 2024-11-20T20:10:29Z
summary:

[3.13] GH-85168: Use filesystem encoding when converting to/from `file` URIs 
(GH-126852) (#127039)

GH-85168: Use filesystem encoding when converting to/from `file` URIs 
(GH-126852)

Adjust `urllib.request.url2pathname()` and `pathname2url()` to use the
filesystem encoding when quoting and unquoting file URIs, rather than
forcing use of UTF-8.

No changes are needed in the `nturl2path` module because Windows always
uses UTF-8, per PEP 529.
(cherry picked from commit c9b399fbdb01584dcfff0d7f6ad484644ff269c3)

Co-authored-by: Barney Gale <[email protected]>

files:
A Misc/NEWS.d/next/Library/2024-11-15-01-50-36.gh-issue-85168.bP8VIN.rst
M Lib/test/test_urllib.py
M Lib/test/test_urllib2.py
M Lib/urllib/request.py

diff --git a/Lib/test/test_urllib.py b/Lib/test/test_urllib.py
index 2c53ce3f99e675..ab18e80663e3bc 100644
--- a/Lib/test/test_urllib.py
+++ b/Lib/test/test_urllib.py
@@ -709,10 +709,6 @@ def tearDown(self):
 
     def constructLocalFileUrl(self, filePath):
         filePath = os.path.abspath(filePath)
-        try:
-            filePath.encode("utf-8")
-        except UnicodeEncodeError:
-            raise unittest.SkipTest("filePath is not encodable to utf8")
         return "file://%s" % urllib.request.pathname2url(filePath)
 
     def createNewTempFile(self, data=b""):
@@ -1562,6 +1558,13 @@ def test_pathname2url_posix(self):
         self.assertEqual(fn('/a/b.c'), '/a/b.c')
         self.assertEqual(fn('/a/b%#c'), '/a/b%25%23c')
 
+    @unittest.skipUnless(os_helper.FS_NONASCII, 'need os_helper.FS_NONASCII')
+    def test_pathname2url_nonascii(self):
+        encoding = sys.getfilesystemencoding()
+        errors = sys.getfilesystemencodeerrors()
+        url = urllib.parse.quote(os_helper.FS_NONASCII, encoding=encoding, 
errors=errors)
+        self.assertEqual(urllib.request.pathname2url(os_helper.FS_NONASCII), 
url)
+
     @unittest.skipUnless(sys.platform == 'win32',
                          'test specific to Windows pathnames.')
     def test_url2pathname_win(self):
@@ -1612,6 +1615,15 @@ def test_url2pathname_posix(self):
         self.assertEqual(fn('////foo/bar'), '//foo/bar')
         self.assertEqual(fn('//localhost/foo/bar'), '//localhost/foo/bar')
 
+    @unittest.skipUnless(os_helper.FS_NONASCII, 'need os_helper.FS_NONASCII')
+    def test_url2pathname_nonascii(self):
+        encoding = sys.getfilesystemencoding()
+        errors = sys.getfilesystemencodeerrors()
+        url = os_helper.FS_NONASCII
+        self.assertEqual(urllib.request.url2pathname(url), 
os_helper.FS_NONASCII)
+        url = urllib.parse.quote(url, encoding=encoding, errors=errors)
+        self.assertEqual(urllib.request.url2pathname(url), 
os_helper.FS_NONASCII)
+
 class Utility_Tests(unittest.TestCase):
     """Testcase to test the various utility functions in the urllib."""
 
diff --git a/Lib/test/test_urllib2.py b/Lib/test/test_urllib2.py
index eed0599642edfb..068dd859f27220 100644
--- a/Lib/test/test_urllib2.py
+++ b/Lib/test/test_urllib2.py
@@ -717,10 +717,6 @@ def test_processors(self):
 
 
 def sanepathname2url(path):
-    try:
-        path.encode("utf-8")
-    except UnicodeEncodeError:
-        raise unittest.SkipTest("path is not encodable to utf8")
     urlpath = urllib.request.pathname2url(path)
     if os.name == "nt" and urlpath.startswith("///"):
         urlpath = urlpath[2:]
diff --git a/Lib/urllib/request.py b/Lib/urllib/request.py
index 9eb819ca53229f..f0321814c69509 100644
--- a/Lib/urllib/request.py
+++ b/Lib/urllib/request.py
@@ -1660,12 +1660,16 @@ def url2pathname(pathname):
             # URL has an empty authority section, so the path begins on the
             # third character.
             pathname = pathname[2:]
-        return unquote(pathname)
+        encoding = sys.getfilesystemencoding()
+        errors = sys.getfilesystemencodeerrors()
+        return unquote(pathname, encoding=encoding, errors=errors)
 
     def pathname2url(pathname):
         """OS-specific conversion from a file system path to a relative URL
         of the 'file' scheme; not recommended for general use."""
-        return quote(pathname)
+        encoding = sys.getfilesystemencoding()
+        errors = sys.getfilesystemencodeerrors()
+        return quote(pathname, encoding=encoding, errors=errors)
 
 
 ftpcache = {}
diff --git 
a/Misc/NEWS.d/next/Library/2024-11-15-01-50-36.gh-issue-85168.bP8VIN.rst 
b/Misc/NEWS.d/next/Library/2024-11-15-01-50-36.gh-issue-85168.bP8VIN.rst
new file mode 100644
index 00000000000000..abceda8f6fd707
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2024-11-15-01-50-36.gh-issue-85168.bP8VIN.rst
@@ -0,0 +1,4 @@
+Fix issue where :func:`urllib.request.url2pathname` and
+:func:`~urllib.request.pathname2url` always used UTF-8 when quoting and
+unquoting file URIs. They now use the :term:`filesystem encoding and error
+handler`.

_______________________________________________
Python-checkins mailing list -- [email protected]
To unsubscribe send an email to [email protected]
https://mail.python.org/mailman3/lists/python-checkins.python.org/
Member address: [email protected]

Reply via email to