https://github.com/python/cpython/commit/8abfaba5a67a99c446f0c13253ee0ce97bf6fa5c commit: 8abfaba5a67a99c446f0c13253ee0ce97bf6fa5c branch: main author: Barney Gale <barney.g...@gmail.com> committer: barneygale <barney.g...@gmail.com> date: 2025-03-19T19:33:01Z summary:
GH-125866: Deprecate `nturl2path` module (#131432) Deprecate the `nturl2path` module. Its functionality is merged into `urllib.request`. Add `tests.test_nturl2path` to exercise `nturl2path`, as it's no longer covered by `test_urllib`. files: A Lib/test/test_nturl2path.py A Misc/NEWS.d/next/Library/2025-03-19-00-09-15.gh-issue-125866.sIIJ5N.rst M Doc/whatsnew/3.14.rst M Lib/nturl2path.py M Lib/test/test_urllib2.py M Lib/urllib/request.py diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst index 303d7922d1708d..04b6eb1099b4fd 100644 --- a/Doc/whatsnew/3.14.rst +++ b/Doc/whatsnew/3.14.rst @@ -1148,6 +1148,11 @@ Deprecated or *sequence* as keyword arguments is now deprecated. (Contributed by Kirill Podoprigora in :gh:`121676`.) +* :mod:`!nturl2path`: This module is now deprecated. Call + :func:`urllib.request.url2pathname` and :func:`~urllib.request.pathname2url` + instead. + (Contributed by Barney Gale in :gh:`125866`.) + * :mod:`os`: :term:`Soft deprecate <soft deprecated>` :func:`os.popen` and :func:`os.spawn* <os.spawnl>` functions. They should no longer be used to diff --git a/Lib/nturl2path.py b/Lib/nturl2path.py index 7b5b82068e989f..57c7858dff0b81 100644 --- a/Lib/nturl2path.py +++ b/Lib/nturl2path.py @@ -3,7 +3,15 @@ This module only exists to provide OS-specific code for urllib.requests, thus do not use directly. """ -# Testing is done through test_urllib. +# Testing is done through test_nturl2path. + +import warnings + + +warnings._deprecated( + __name__, + message=f"{warnings._DEPRECATED_MSG}; use 'urllib.request' instead", + remove=(3, 19)) def url2pathname(url): """OS-specific conversion from a relative URL of the 'file' scheme diff --git a/Lib/test/test_nturl2path.py b/Lib/test/test_nturl2path.py new file mode 100644 index 00000000000000..a6a3422a0f75b2 --- /dev/null +++ b/Lib/test/test_nturl2path.py @@ -0,0 +1,107 @@ +import unittest + +from test.support import warnings_helper + + +nturl2path = warnings_helper.import_deprecated("nturl2path") + + +class NTURL2PathTest(unittest.TestCase): + """Test pathname2url() and url2pathname()""" + + def test_basic(self): + # Make sure simple tests pass + expected_path = r"parts\of\a\path" + expected_url = "parts/of/a/path" + result = nturl2path.pathname2url(expected_path) + self.assertEqual(expected_url, result, + "pathname2url() failed; %s != %s" % + (result, expected_url)) + result = nturl2path.url2pathname(expected_url) + self.assertEqual(expected_path, result, + "url2pathame() failed; %s != %s" % + (result, expected_path)) + + def test_pathname2url(self): + # Test special prefixes are correctly handled in pathname2url() + fn = nturl2path.pathname2url + self.assertEqual(fn('\\\\?\\C:\\dir'), '///C:/dir') + self.assertEqual(fn('\\\\?\\unc\\server\\share\\dir'), '//server/share/dir') + self.assertEqual(fn("C:"), '///C:') + self.assertEqual(fn("C:\\"), '///C:/') + self.assertEqual(fn('c:\\a\\b.c'), '///c:/a/b.c') + self.assertEqual(fn('C:\\a\\b.c'), '///C:/a/b.c') + self.assertEqual(fn('C:\\a\\b.c\\'), '///C:/a/b.c/') + self.assertEqual(fn('C:\\a\\\\b.c'), '///C:/a//b.c') + self.assertEqual(fn('C:\\a\\b%#c'), '///C:/a/b%25%23c') + self.assertEqual(fn('C:\\a\\b\xe9'), '///C:/a/b%C3%A9') + self.assertEqual(fn('C:\\foo\\bar\\spam.foo'), "///C:/foo/bar/spam.foo") + # NTFS alternate data streams + self.assertEqual(fn('C:\\foo:bar'), '///C:/foo%3Abar') + self.assertEqual(fn('foo:bar'), 'foo%3Abar') + # No drive letter + self.assertEqual(fn("\\folder\\test\\"), '///folder/test/') + self.assertEqual(fn("\\\\folder\\test\\"), '//folder/test/') + self.assertEqual(fn("\\\\\\folder\\test\\"), '///folder/test/') + self.assertEqual(fn('\\\\some\\share\\'), '//some/share/') + self.assertEqual(fn('\\\\some\\share\\a\\b.c'), '//some/share/a/b.c') + self.assertEqual(fn('\\\\some\\share\\a\\b%#c\xe9'), '//some/share/a/b%25%23c%C3%A9') + # Alternate path separator + self.assertEqual(fn('C:/a/b.c'), '///C:/a/b.c') + self.assertEqual(fn('//some/share/a/b.c'), '//some/share/a/b.c') + self.assertEqual(fn('//?/C:/dir'), '///C:/dir') + self.assertEqual(fn('//?/unc/server/share/dir'), '//server/share/dir') + # Round-tripping + urls = ['///C:', + '///folder/test/', + '///C:/foo/bar/spam.foo'] + for url in urls: + self.assertEqual(fn(nturl2path.url2pathname(url)), url) + + def test_url2pathname(self): + fn = nturl2path.url2pathname + self.assertEqual(fn('/'), '\\') + self.assertEqual(fn('/C:/'), 'C:\\') + self.assertEqual(fn("///C|"), 'C:') + self.assertEqual(fn("///C:"), 'C:') + self.assertEqual(fn('///C:/'), 'C:\\') + self.assertEqual(fn('/C|//'), 'C:\\\\') + self.assertEqual(fn('///C|/path'), 'C:\\path') + # No DOS drive + self.assertEqual(fn("///C/test/"), '\\C\\test\\') + self.assertEqual(fn("////C/test/"), '\\\\C\\test\\') + # DOS drive paths + self.assertEqual(fn('c:/path/to/file'), 'c:\\path\\to\\file') + self.assertEqual(fn('C:/path/to/file'), 'C:\\path\\to\\file') + self.assertEqual(fn('C:/path/to/file/'), 'C:\\path\\to\\file\\') + self.assertEqual(fn('C:/path/to//file'), 'C:\\path\\to\\\\file') + self.assertEqual(fn('C|/path/to/file'), 'C:\\path\\to\\file') + self.assertEqual(fn('/C|/path/to/file'), 'C:\\path\\to\\file') + self.assertEqual(fn('///C|/path/to/file'), 'C:\\path\\to\\file') + self.assertEqual(fn("///C|/foo/bar/spam.foo"), 'C:\\foo\\bar\\spam.foo') + # Colons in URI + self.assertEqual(fn('///\u00e8|/'), '\u00e8:\\') + self.assertEqual(fn('//host/share/spam.txt:eggs'), '\\\\host\\share\\spam.txt:eggs') + self.assertEqual(fn('///c:/spam.txt:eggs'), 'c:\\spam.txt:eggs') + # UNC paths + self.assertEqual(fn('//server/path/to/file'), '\\\\server\\path\\to\\file') + self.assertEqual(fn('////server/path/to/file'), '\\\\server\\path\\to\\file') + self.assertEqual(fn('/////server/path/to/file'), '\\\\server\\path\\to\\file') + # Localhost paths + self.assertEqual(fn('//localhost/C:/path/to/file'), 'C:\\path\\to\\file') + self.assertEqual(fn('//localhost/C|/path/to/file'), 'C:\\path\\to\\file') + self.assertEqual(fn('//localhost/path/to/file'), '\\path\\to\\file') + self.assertEqual(fn('//localhost//server/path/to/file'), '\\\\server\\path\\to\\file') + # Percent-encoded forward slashes are preserved for backwards compatibility + self.assertEqual(fn('C:/foo%2fbar'), 'C:\\foo/bar') + self.assertEqual(fn('//server/share/foo%2fbar'), '\\\\server\\share\\foo/bar') + # Round-tripping + paths = ['C:', + r'\C\test\\', + r'C:\foo\bar\spam.foo'] + for path in paths: + self.assertEqual(fn(nturl2path.pathname2url(path)), path) + + +if __name__ == '__main__': + unittest.main() diff --git a/Lib/test/test_urllib2.py b/Lib/test/test_urllib2.py index 44e6af8c6b6868..088ee4c4f90803 100644 --- a/Lib/test/test_urllib2.py +++ b/Lib/test/test_urllib2.py @@ -44,10 +44,6 @@ def test___all__(self): context = {} exec('from urllib.%s import *' % module, context) del context['__builtins__'] - if module == 'request' and os.name == 'nt': - u, p = context.pop('url2pathname'), context.pop('pathname2url') - self.assertEqual(u.__module__, 'nturl2path') - self.assertEqual(p.__module__, 'nturl2path') for k, v in context.items(): self.assertEqual(v.__module__, 'urllib.%s' % module, "%r is exposed in 'urllib.%s' but defined in %r" % diff --git a/Lib/urllib/request.py b/Lib/urllib/request.py index 0d1b594b8cf20b..f22dc56af2f428 100644 --- a/Lib/urllib/request.py +++ b/Lib/urllib/request.py @@ -1646,36 +1646,62 @@ def data_open(self, req): # Code move from the old urllib module -# Helper for non-unix systems -if os.name == 'nt': - from nturl2path import url2pathname, pathname2url -else: - def url2pathname(pathname): - """OS-specific conversion from a relative URL of the 'file' scheme - to a file system path; not recommended for general use.""" - if pathname[:3] == '///': - # URL has an empty authority section, so the path begins on the - # third character. - pathname = pathname[2:] - elif pathname[:12] == '//localhost/': - # Skip past 'localhost' authority. - pathname = pathname[11:] - encoding = sys.getfilesystemencoding() - errors = sys.getfilesystemencodeerrors() - return unquote(pathname, encoding=encoding, errors=errors) - - def pathname2url(pathname): - """OS-specific conversion from a file system path to a relative URL - of the 'file' scheme; not recommended for general use.""" - if pathname[:1] == '/': - # Add explicitly empty authority to absolute path. If the path - # starts with exactly one slash then this change is mostly - # cosmetic, but if it begins with two or more slashes then this - # avoids interpreting the path as a URL authority. - pathname = '//' + pathname - encoding = sys.getfilesystemencoding() - errors = sys.getfilesystemencodeerrors() - return quote(pathname, encoding=encoding, errors=errors) +def url2pathname(url): + """OS-specific conversion from a relative URL of the 'file' scheme + to a file system path; not recommended for general use.""" + if url[:3] == '///': + # Empty authority section, so the path begins on the third character. + url = url[2:] + elif url[:12] == '//localhost/': + # Skip past 'localhost' authority. + url = url[11:] + + if os.name == 'nt': + if url[:3] == '///': + # Skip past extra slash before UNC drive in URL path. + url = url[1:] + else: + if url[:1] == '/' and url[2:3] in (':', '|'): + # Skip past extra slash before DOS drive in URL path. + url = url[1:] + if url[1:2] == '|': + # Older URLs use a pipe after a drive letter + url = url[:1] + ':' + url[2:] + url = url.replace('/', '\\') + encoding = sys.getfilesystemencoding() + errors = sys.getfilesystemencodeerrors() + return unquote(url, encoding=encoding, errors=errors) + + +def pathname2url(pathname): + """OS-specific conversion from a file system path to a relative URL + of the 'file' scheme; not recommended for general use.""" + if os.name == 'nt': + pathname = pathname.replace('\\', '/') + encoding = sys.getfilesystemencoding() + errors = sys.getfilesystemencodeerrors() + drive, root, tail = os.path.splitroot(pathname) + if drive: + # First, clean up some special forms. We are going to sacrifice the + # additional information anyway + if drive[:4] == '//?/': + drive = drive[4:] + if drive[:4].upper() == 'UNC/': + drive = '//' + drive[4:] + if drive[1:] == ':': + # DOS drive specified. Add three slashes to the start, producing + # an authority section with a zero-length authority, and a path + # section starting with a single slash. + drive = '///' + drive + drive = quote(drive, encoding=encoding, errors=errors, safe='/:') + elif root: + # Add explicitly empty authority to absolute path. If the path + # starts with exactly one slash then this change is mostly + # cosmetic, but if it begins with two or more slashes then this + # avoids interpreting the path as a URL authority. + root = '//' + root + tail = quote(tail, encoding=encoding, errors=errors) + return drive + root + tail # Utility functions diff --git a/Misc/NEWS.d/next/Library/2025-03-19-00-09-15.gh-issue-125866.sIIJ5N.rst b/Misc/NEWS.d/next/Library/2025-03-19-00-09-15.gh-issue-125866.sIIJ5N.rst new file mode 100644 index 00000000000000..da3a448fc9185d --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-03-19-00-09-15.gh-issue-125866.sIIJ5N.rst @@ -0,0 +1,3 @@ +Deprecate the :mod:`!nturl2path` module. Call +:func:`urllib.request.url2pathname` and :func:`~urllib.request.pathname2url` +instead. _______________________________________________ Python-checkins mailing list -- python-checkins@python.org To unsubscribe send an email to python-checkins-le...@python.org https://mail.python.org/mailman3/lists/python-checkins.python.org/ Member address: arch...@mail-archive.com