https://github.com/python/cpython/commit/ebf564a1d3e2e81b9846535114e481d6096443d2
commit: ebf564a1d3e2e81b9846535114e481d6096443d2
branch: main
author: Barney Gale <[email protected]>
committer: barneygale <[email protected]>
date: 2024-11-22T03:17:06Z
summary:
GH-126766: `url2pathname()`: handle 'localhost' authority (#127129)
Discard any 'localhost' authority from the beginning of a `file:` URI. As a
result, file URIs like `//localhost/etc/hosts` are correctly decoded as
`/etc/hosts`.
files:
A Misc/NEWS.d/next/Library/2024-11-22-02-31-55.gh-issue-126766.jfkhBH.rst
M Lib/nturl2path.py
M Lib/test/test_urllib.py
M Lib/urllib/request.py
diff --git a/Lib/nturl2path.py b/Lib/nturl2path.py
index ed7880fd1a775f..3308ee7c1c784e 100644
--- a/Lib/nturl2path.py
+++ b/Lib/nturl2path.py
@@ -15,14 +15,17 @@ def url2pathname(url):
# become
# C:\foo\bar\spam.foo
import string, urllib.parse
+ if url[:3] == '///':
+ # URL has an empty authority section, so the path begins on the third
+ # character.
+ url = url[2:]
+ elif url[:12] == '//localhost/':
+ # Skip past 'localhost' authority.
+ url = url[11:]
# Windows itself uses ":" even in URLs.
url = url.replace(':', '|')
if not '|' in url:
# No drive specifier, just convert slashes
- if url[:3] == '///':
- # URL has an empty authority section, so the path begins on the
- # third character.
- url = url[2:]
# make sure not to convert quoted slashes :-)
return urllib.parse.unquote(url.replace('/', '\\'))
comp = url.split('|')
diff --git a/Lib/test/test_urllib.py b/Lib/test/test_urllib.py
index 3e5dc256d317a7..e1c1d3170d9807 100644
--- a/Lib/test/test_urllib.py
+++ b/Lib/test/test_urllib.py
@@ -1496,6 +1496,8 @@ def test_url2pathname_win(self):
# Localhost paths
self.assertEqual(fn('//localhost/C:/path/to/file'),
'C:\\path\\to\\file')
self.assertEqual(fn('//localhost/C|/path/to/file'),
'C:\\path\\to\\file')
+ self.assertEqual(fn('//localhost/path/to/file'), '\\path\\to\\file')
+ self.assertEqual(fn('//localhost//server/path/to/file'),
'\\\\server\\path\\to\\file')
# Percent-encoded forward slashes are preserved for backwards
compatibility
self.assertEqual(fn('C:/foo%2fbar'), 'C:\\foo/bar')
self.assertEqual(fn('//server/share/foo%2fbar'),
'\\\\server\\share\\foo/bar')
@@ -1514,7 +1516,7 @@ def test_url2pathname_posix(self):
self.assertEqual(fn('//foo/bar'), '//foo/bar')
self.assertEqual(fn('///foo/bar'), '/foo/bar')
self.assertEqual(fn('////foo/bar'), '//foo/bar')
- self.assertEqual(fn('//localhost/foo/bar'), '//localhost/foo/bar')
+ self.assertEqual(fn('//localhost/foo/bar'), '/foo/bar')
@unittest.skipUnless(os_helper.FS_NONASCII, 'need os_helper.FS_NONASCII')
def test_url2pathname_nonascii(self):
diff --git a/Lib/urllib/request.py b/Lib/urllib/request.py
index bcfdcc51fac369..80be65c613e971 100644
--- a/Lib/urllib/request.py
+++ b/Lib/urllib/request.py
@@ -1657,6 +1657,9 @@ def url2pathname(pathname):
# URL has an empty authority section, so the path begins on the
# third character.
pathname = pathname[2:]
+ elif pathname[:12] == '//localhost/':
+ # Skip past 'localhost' authority.
+ pathname = pathname[11:]
encoding = sys.getfilesystemencoding()
errors = sys.getfilesystemencodeerrors()
return unquote(pathname, encoding=encoding, errors=errors)
diff --git
a/Misc/NEWS.d/next/Library/2024-11-22-02-31-55.gh-issue-126766.jfkhBH.rst
b/Misc/NEWS.d/next/Library/2024-11-22-02-31-55.gh-issue-126766.jfkhBH.rst
new file mode 100644
index 00000000000000..998c99bf4358d5
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2024-11-22-02-31-55.gh-issue-126766.jfkhBH.rst
@@ -0,0 +1,2 @@
+Fix issue where :func:`urllib.request.url2pathname` failed to discard any
+'localhost' authority present in the URL.
_______________________________________________
Python-checkins mailing list -- [email protected]
To unsubscribe send an email to [email protected]
https://mail.python.org/mailman3/lists/python-checkins.python.org/
Member address: [email protected]