[Python-checkins] GH-125866: Deprecate `nturl2path` module (#131432)

barneygale Sat, 05 Apr 2025 10:24:54 -0700

https://github.com/python/cpython/commit/8abfaba5a67a99c446f0c13253ee0ce97bf6fa5c
commit: 8abfaba5a67a99c446f0c13253ee0ce97bf6fa5c
branch: main
author: Barney Gale <barney.g...@gmail.com>
committer: barneygale <barney.g...@gmail.com>
date: 2025-03-19T19:33:01Z
summary:


GH-125866: Deprecate `nturl2path` module (#131432)

Deprecate the `nturl2path` module. Its functionality is merged into
`urllib.request`.

Add `tests.test_nturl2path` to exercise `nturl2path`, as it's no longer
covered by `test_urllib`.

files:
A Lib/test/test_nturl2path.py
A Misc/NEWS.d/next/Library/2025-03-19-00-09-15.gh-issue-125866.sIIJ5N.rst
M Doc/whatsnew/3.14.rst
M Lib/nturl2path.py
M Lib/test/test_urllib2.py
M Lib/urllib/request.py

diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst
index 303d7922d1708d..04b6eb1099b4fd 100644
--- a/Doc/whatsnew/3.14.rst
+++ b/Doc/whatsnew/3.14.rst
@@ -1148,6 +1148,11 @@ Deprecated
   or *sequence* as keyword arguments is now deprecated.
   (Contributed by Kirill Podoprigora in :gh:`121676`.)
 
+* :mod:`!nturl2path`: This module is now deprecated. Call
+  :func:`urllib.request.url2pathname` and :func:`~urllib.request.pathname2url`
+  instead.
+  (Contributed by Barney Gale in :gh:`125866`.)
+
 * :mod:`os`:
   :term:`Soft deprecate <soft deprecated>` :func:`os.popen` and
   :func:`os.spawn* <os.spawnl>` functions. They should no longer be used to
diff --git a/Lib/nturl2path.py b/Lib/nturl2path.py
index 7b5b82068e989f..57c7858dff0b81 100644
--- a/Lib/nturl2path.py
+++ b/Lib/nturl2path.py
@@ -3,7 +3,15 @@
 This module only exists to provide OS-specific code
 for urllib.requests, thus do not use directly.
 """
-# Testing is done through test_urllib.
+# Testing is done through test_nturl2path.
+
+import warnings
+
+
+warnings._deprecated(
+    __name__,
+    message=f"{warnings._DEPRECATED_MSG}; use 'urllib.request' instead",
+    remove=(3, 19))
 
 def url2pathname(url):
     """OS-specific conversion from a relative URL of the 'file' scheme
diff --git a/Lib/test/test_nturl2path.py b/Lib/test/test_nturl2path.py
new file mode 100644
index 00000000000000..a6a3422a0f75b2
--- /dev/null
+++ b/Lib/test/test_nturl2path.py
@@ -0,0 +1,107 @@
+import unittest
+
+from test.support import warnings_helper
+
+
+nturl2path = warnings_helper.import_deprecated("nturl2path")
+
+
+class NTURL2PathTest(unittest.TestCase):
+    """Test pathname2url() and url2pathname()"""
+
+    def test_basic(self):
+        # Make sure simple tests pass
+        expected_path = r"parts\of\a\path"
+        expected_url = "parts/of/a/path"
+        result = nturl2path.pathname2url(expected_path)
+        self.assertEqual(expected_url, result,
+                         "pathname2url() failed; %s != %s" %
+                         (result, expected_url))
+        result = nturl2path.url2pathname(expected_url)
+        self.assertEqual(expected_path, result,
+                         "url2pathame() failed; %s != %s" %
+                         (result, expected_path))
+
+    def test_pathname2url(self):
+        # Test special prefixes are correctly handled in pathname2url()
+        fn = nturl2path.pathname2url
+        self.assertEqual(fn('\\\\?\\C:\\dir'), '///C:/dir')
+        self.assertEqual(fn('\\\\?\\unc\\server\\share\\dir'), 
'//server/share/dir')
+        self.assertEqual(fn("C:"), '///C:')
+        self.assertEqual(fn("C:\\"), '///C:/')
+        self.assertEqual(fn('c:\\a\\b.c'), '///c:/a/b.c')
+        self.assertEqual(fn('C:\\a\\b.c'), '///C:/a/b.c')
+        self.assertEqual(fn('C:\\a\\b.c\\'), '///C:/a/b.c/')
+        self.assertEqual(fn('C:\\a\\\\b.c'), '///C:/a//b.c')
+        self.assertEqual(fn('C:\\a\\b%#c'), '///C:/a/b%25%23c')
+        self.assertEqual(fn('C:\\a\\b\xe9'), '///C:/a/b%C3%A9')
+        self.assertEqual(fn('C:\\foo\\bar\\spam.foo'), 
"///C:/foo/bar/spam.foo")
+        # NTFS alternate data streams
+        self.assertEqual(fn('C:\\foo:bar'), '///C:/foo%3Abar')
+        self.assertEqual(fn('foo:bar'), 'foo%3Abar')
+        # No drive letter
+        self.assertEqual(fn("\\folder\\test\\"), '///folder/test/')
+        self.assertEqual(fn("\\\\folder\\test\\"), '//folder/test/')
+        self.assertEqual(fn("\\\\\\folder\\test\\"), '///folder/test/')
+        self.assertEqual(fn('\\\\some\\share\\'), '//some/share/')
+        self.assertEqual(fn('\\\\some\\share\\a\\b.c'), '//some/share/a/b.c')
+        self.assertEqual(fn('\\\\some\\share\\a\\b%#c\xe9'), 
'//some/share/a/b%25%23c%C3%A9')
+        # Alternate path separator
+        self.assertEqual(fn('C:/a/b.c'), '///C:/a/b.c')
+        self.assertEqual(fn('//some/share/a/b.c'), '//some/share/a/b.c')
+        self.assertEqual(fn('//?/C:/dir'), '///C:/dir')
+        self.assertEqual(fn('//?/unc/server/share/dir'), '//server/share/dir')
+        # Round-tripping
+        urls = ['///C:',
+                '///folder/test/',
+                '///C:/foo/bar/spam.foo']
+        for url in urls:
+            self.assertEqual(fn(nturl2path.url2pathname(url)), url)
+
+    def test_url2pathname(self):
+        fn = nturl2path.url2pathname
+        self.assertEqual(fn('/'), '\\')
+        self.assertEqual(fn('/C:/'), 'C:\\')
+        self.assertEqual(fn("///C|"), 'C:')
+        self.assertEqual(fn("///C:"), 'C:')
+        self.assertEqual(fn('///C:/'), 'C:\\')
+        self.assertEqual(fn('/C|//'), 'C:\\\\')
+        self.assertEqual(fn('///C|/path'), 'C:\\path')
+        # No DOS drive
+        self.assertEqual(fn("///C/test/"), '\\C\\test\\')
+        self.assertEqual(fn("////C/test/"), '\\\\C\\test\\')
+        # DOS drive paths
+        self.assertEqual(fn('c:/path/to/file'), 'c:\\path\\to\\file')
+        self.assertEqual(fn('C:/path/to/file'), 'C:\\path\\to\\file')
+        self.assertEqual(fn('C:/path/to/file/'), 'C:\\path\\to\\file\\')
+        self.assertEqual(fn('C:/path/to//file'), 'C:\\path\\to\\\\file')
+        self.assertEqual(fn('C|/path/to/file'), 'C:\\path\\to\\file')
+        self.assertEqual(fn('/C|/path/to/file'), 'C:\\path\\to\\file')
+        self.assertEqual(fn('///C|/path/to/file'), 'C:\\path\\to\\file')
+        self.assertEqual(fn("///C|/foo/bar/spam.foo"), 
'C:\\foo\\bar\\spam.foo')
+        # Colons in URI
+        self.assertEqual(fn('///\u00e8|/'), '\u00e8:\\')
+        self.assertEqual(fn('//host/share/spam.txt:eggs'), 
'\\\\host\\share\\spam.txt:eggs')
+        self.assertEqual(fn('///c:/spam.txt:eggs'), 'c:\\spam.txt:eggs')
+        # UNC paths
+        self.assertEqual(fn('//server/path/to/file'), 
'\\\\server\\path\\to\\file')
+        self.assertEqual(fn('////server/path/to/file'), 
'\\\\server\\path\\to\\file')
+        self.assertEqual(fn('/////server/path/to/file'), 
'\\\\server\\path\\to\\file')
+        # Localhost paths
+        self.assertEqual(fn('//localhost/C:/path/to/file'), 
'C:\\path\\to\\file')
+        self.assertEqual(fn('//localhost/C|/path/to/file'), 
'C:\\path\\to\\file')
+        self.assertEqual(fn('//localhost/path/to/file'), '\\path\\to\\file')
+        self.assertEqual(fn('//localhost//server/path/to/file'), 
'\\\\server\\path\\to\\file')
+        # Percent-encoded forward slashes are preserved for backwards 
compatibility
+        self.assertEqual(fn('C:/foo%2fbar'), 'C:\\foo/bar')
+        self.assertEqual(fn('//server/share/foo%2fbar'), 
'\\\\server\\share\\foo/bar')
+        # Round-tripping
+        paths = ['C:',
+                 r'\C\test\\',
+                 r'C:\foo\bar\spam.foo']
+        for path in paths:
+            self.assertEqual(fn(nturl2path.pathname2url(path)), path)
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/Lib/test/test_urllib2.py b/Lib/test/test_urllib2.py
index 44e6af8c6b6868..088ee4c4f90803 100644
--- a/Lib/test/test_urllib2.py
+++ b/Lib/test/test_urllib2.py
@@ -44,10 +44,6 @@ def test___all__(self):
             context = {}
             exec('from urllib.%s import *' % module, context)
             del context['__builtins__']
-            if module == 'request' and os.name == 'nt':
-                u, p = context.pop('url2pathname'), context.pop('pathname2url')
-                self.assertEqual(u.__module__, 'nturl2path')
-                self.assertEqual(p.__module__, 'nturl2path')
             for k, v in context.items():
                 self.assertEqual(v.__module__, 'urllib.%s' % module,
                     "%r is exposed in 'urllib.%s' but defined in %r" %
diff --git a/Lib/urllib/request.py b/Lib/urllib/request.py
index 0d1b594b8cf20b..f22dc56af2f428 100644
--- a/Lib/urllib/request.py
+++ b/Lib/urllib/request.py
@@ -1646,36 +1646,62 @@ def data_open(self, req):
 
 # Code move from the old urllib module
 
-# Helper for non-unix systems
-if os.name == 'nt':
-    from nturl2path import url2pathname, pathname2url
-else:
-    def url2pathname(pathname):
-        """OS-specific conversion from a relative URL of the 'file' scheme
-        to a file system path; not recommended for general use."""
-        if pathname[:3] == '///':
-            # URL has an empty authority section, so the path begins on the
-            # third character.
-            pathname = pathname[2:]
-        elif pathname[:12] == '//localhost/':
-            # Skip past 'localhost' authority.
-            pathname = pathname[11:]
-        encoding = sys.getfilesystemencoding()
-        errors = sys.getfilesystemencodeerrors()
-        return unquote(pathname, encoding=encoding, errors=errors)
-
-    def pathname2url(pathname):
-        """OS-specific conversion from a file system path to a relative URL
-        of the 'file' scheme; not recommended for general use."""
-        if pathname[:1] == '/':
-            # Add explicitly empty authority to absolute path. If the path
-            # starts with exactly one slash then this change is mostly
-            # cosmetic, but if it begins with two or more slashes then this
-            # avoids interpreting the path as a URL authority.
-            pathname = '//' + pathname
-        encoding = sys.getfilesystemencoding()
-        errors = sys.getfilesystemencodeerrors()
-        return quote(pathname, encoding=encoding, errors=errors)
+def url2pathname(url):
+    """OS-specific conversion from a relative URL of the 'file' scheme
+    to a file system path; not recommended for general use."""
+    if url[:3] == '///':
+        # Empty authority section, so the path begins on the third character.
+        url = url[2:]
+    elif url[:12] == '//localhost/':
+        # Skip past 'localhost' authority.
+        url = url[11:]
+
+    if os.name == 'nt':
+        if url[:3] == '///':
+            # Skip past extra slash before UNC drive in URL path.
+            url = url[1:]
+        else:
+            if url[:1] == '/' and url[2:3] in (':', '|'):
+                # Skip past extra slash before DOS drive in URL path.
+                url = url[1:]
+            if url[1:2] == '|':
+                # Older URLs use a pipe after a drive letter
+                url = url[:1] + ':' + url[2:]
+        url = url.replace('/', '\\')
+    encoding = sys.getfilesystemencoding()
+    errors = sys.getfilesystemencodeerrors()
+    return unquote(url, encoding=encoding, errors=errors)
+
+
+def pathname2url(pathname):
+    """OS-specific conversion from a file system path to a relative URL
+    of the 'file' scheme; not recommended for general use."""
+    if os.name == 'nt':
+        pathname = pathname.replace('\\', '/')
+    encoding = sys.getfilesystemencoding()
+    errors = sys.getfilesystemencodeerrors()
+    drive, root, tail = os.path.splitroot(pathname)
+    if drive:
+        # First, clean up some special forms. We are going to sacrifice the
+        # additional information anyway
+        if drive[:4] == '//?/':
+            drive = drive[4:]
+            if drive[:4].upper() == 'UNC/':
+                drive = '//' + drive[4:]
+        if drive[1:] == ':':
+            # DOS drive specified. Add three slashes to the start, producing
+            # an authority section with a zero-length authority, and a path
+            # section starting with a single slash.
+            drive = '///' + drive
+        drive = quote(drive, encoding=encoding, errors=errors, safe='/:')
+    elif root:
+        # Add explicitly empty authority to absolute path. If the path
+        # starts with exactly one slash then this change is mostly
+        # cosmetic, but if it begins with two or more slashes then this
+        # avoids interpreting the path as a URL authority.
+        root = '//' + root
+    tail = quote(tail, encoding=encoding, errors=errors)
+    return drive + root + tail
 
 
 # Utility functions
diff --git 
a/Misc/NEWS.d/next/Library/2025-03-19-00-09-15.gh-issue-125866.sIIJ5N.rst 
b/Misc/NEWS.d/next/Library/2025-03-19-00-09-15.gh-issue-125866.sIIJ5N.rst
new file mode 100644
index 00000000000000..da3a448fc9185d
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2025-03-19-00-09-15.gh-issue-125866.sIIJ5N.rst
@@ -0,0 +1,3 @@
+Deprecate the :mod:`!nturl2path` module. Call
+:func:`urllib.request.url2pathname` and :func:`~urllib.request.pathname2url`
+instead.

_______________________________________________
Python-checkins mailing list -- python-checkins@python.org
To unsubscribe send an email to python-checkins-le...@python.org
https://mail.python.org/mailman3/lists/python-checkins.python.org/
Member address: arch...@mail-archive.com

[Python-checkins] GH-125866: Deprecate `nturl2path` module (#131432)

Reply via email to