[Python-checkins] gh-133036: Deprecate codecs.open (#133038)

methane Tue, 29 Apr 2025 18:11:27 -0700

https://github.com/python/cpython/commit/4e294f6feb3193854d23e0e8be487213a80b232f
commit: 4e294f6feb3193854d23e0e8be487213a80b232f
branch: main
author: Inada Naoki <songofaca...@gmail.com>
committer: methane <songofaca...@gmail.com>
date: 2025-04-30T10:11:09+09:00
summary:


gh-133036: Deprecate codecs.open (#133038)

Co-authored-by: Hugo van Kemenade <1324225+hug...@users.noreply.github.com>
Co-authored-by: Victor Stinner <vstin...@python.org>

files:
A Misc/NEWS.d/next/Library/2025-04-27-15-21-05.gh-issue-133036.HCNYA7.rst
M Doc/deprecations/pending-removal-in-future.rst
M Doc/library/codecs.rst
M Doc/whatsnew/3.14.rst
M Lib/_pyio.py
M Lib/codecs.py
M Lib/test/test_codecs.py
M Lib/test/test_multibytecodec.py
M Lib/test/test_sax.py
M Modules/_io/textio.c
M Python/codecs.c

diff --git a/Doc/deprecations/pending-removal-in-future.rst 
b/Doc/deprecations/pending-removal-in-future.rst
index 563f994c04b17b..4c4a368baca955 100644
--- a/Doc/deprecations/pending-removal-in-future.rst
+++ b/Doc/deprecations/pending-removal-in-future.rst
@@ -47,6 +47,8 @@ although there is currently no date scheduled for their 
removal.
   :data:`calendar.FEBRUARY`.
   (Contributed by Prince Roshan in :gh:`103636`.)
 
+* :mod:`codecs`: use :func:`open` instead of :func:`codecs.open`. 
(:gh:`133038`)
+
 * :attr:`codeobject.co_lnotab`: use the :meth:`codeobject.co_lines` method
   instead.
 
diff --git a/Doc/library/codecs.rst b/Doc/library/codecs.rst
index f63148a9bd2a31..14f6547e4e0522 100644
--- a/Doc/library/codecs.rst
+++ b/Doc/library/codecs.rst
@@ -208,6 +208,10 @@ wider range of codecs when working with binary files:
    .. versionchanged:: 3.11
       The ``'U'`` mode has been removed.
 
+   .. deprecated:: next
+
+      :func:`codecs.open` has been superseded by :func:`open`.
+
 
 .. function:: EncodedFile(file, data_encoding, file_encoding=None, 
errors='strict')
 
diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst
index 185a4670d8517d..9e6b69fbc05273 100644
--- a/Doc/whatsnew/3.14.rst
+++ b/Doc/whatsnew/3.14.rst
@@ -1597,6 +1597,10 @@ Deprecated
   as a single positional argument.
   (Contributed by Serhiy Storchaka in :gh:`109218`.)
 
+* :mod:`codecs`:
+  :func:`codecs.open` is now deprecated. Use :func:`open` instead.
+  (Contributed by Inada Naoki in :gh:`133036`.)
+
 * :mod:`functools`:
   Calling the Python implementation of :func:`functools.reduce` with *function*
   or *sequence* as keyword arguments is now deprecated.
diff --git a/Lib/_pyio.py b/Lib/_pyio.py
index b875103bee441c..a870de5b532542 100644
--- a/Lib/_pyio.py
+++ b/Lib/_pyio.py
@@ -2056,8 +2056,7 @@ def __init__(self, buffer, encoding=None, errors=None, 
newline=None,
             raise ValueError("invalid encoding: %r" % encoding)
 
         if not codecs.lookup(encoding)._is_text_encoding:
-            msg = ("%r is not a text encoding; "
-                   "use codecs.open() to handle arbitrary codecs")
+            msg = "%r is not a text encoding"
             raise LookupError(msg % encoding)
 
         if errors is None:
diff --git a/Lib/codecs.py b/Lib/codecs.py
index e365e6cf22929f..fc38e922257644 100644
--- a/Lib/codecs.py
+++ b/Lib/codecs.py
@@ -884,7 +884,6 @@ def __reduce_ex__(self, proto):
 ### Shortcuts
 
 def open(filename, mode='r', encoding=None, errors='strict', buffering=-1):
-
     """ Open an encoded file using the given mode and return
         a wrapped version providing transparent encoding/decoding.
 
@@ -912,8 +911,11 @@ def open(filename, mode='r', encoding=None, 
errors='strict', buffering=-1):
         .encoding which allows querying the used encoding. This
         attribute is only available if an encoding was specified as
         parameter.
-
     """
+    import warnings
+    warnings.warn("codecs.open() is deprecated. Use open() instead.",
+                  DeprecationWarning, stacklevel=2)
+
     if encoding is not None and \
        'b' not in mode:
         # Force opening of the file in binary mode
diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py
index e51f7e0ee12b1f..94fcf98e75721f 100644
--- a/Lib/test/test_codecs.py
+++ b/Lib/test/test_codecs.py
@@ -7,6 +7,7 @@
 import unittest
 import encodings
 from unittest import mock
+import warnings
 
 from test import support
 from test.support import os_helper
@@ -20,13 +21,12 @@
 except ImportError:
     _testinternalcapi = None
 
-try:
-    import ctypes
-except ImportError:
-    ctypes = None
-    SIZEOF_WCHAR_T = -1
-else:
-    SIZEOF_WCHAR_T = ctypes.sizeof(ctypes.c_wchar)
+
+def codecs_open_no_warn(*args, **kwargs):
+    """Call codecs.open(*args, **kwargs) ignoring DeprecationWarning."""
+    with warnings.catch_warnings():
+        warnings.simplefilter("ignore")
+        return codecs.open(*args, **kwargs)
 
 def coding_checker(self, coder):
     def check(input, expect):
@@ -35,13 +35,13 @@ def check(input, expect):
 
 # On small versions of Windows like Windows IoT or Windows Nano Server not all 
codepages are present
 def is_code_page_present(cp):
-    from ctypes import POINTER, WINFUNCTYPE, WinDLL
+    from ctypes import POINTER, WINFUNCTYPE, WinDLL, Structure
     from ctypes.wintypes import BOOL, BYTE, WCHAR, UINT, DWORD
 
     MAX_LEADBYTES = 12  # 5 ranges, 2 bytes ea., 0 term.
     MAX_DEFAULTCHAR = 2 # single or double byte
     MAX_PATH = 260
-    class CPINFOEXW(ctypes.Structure):
+    class CPINFOEXW(Structure):
         _fields_ = [("MaxCharSize", UINT),
                     ("DefaultChar", BYTE*MAX_DEFAULTCHAR),
                     ("LeadByte", BYTE*MAX_LEADBYTES),
@@ -719,19 +719,19 @@ def test_bug691291(self):
         self.addCleanup(os_helper.unlink, os_helper.TESTFN)
         with open(os_helper.TESTFN, 'wb') as fp:
             fp.write(s)
-        with codecs.open(os_helper.TESTFN, 'r',
+        with codecs_open_no_warn(os_helper.TESTFN, 'r',
                          encoding=self.encoding) as reader:
             self.assertEqual(reader.read(), s1)
 
     def test_invalid_modes(self):
         for mode in ('U', 'rU', 'r+U'):
             with self.assertRaises(ValueError) as cm:
-                codecs.open(os_helper.TESTFN, mode, encoding=self.encoding)
+                codecs_open_no_warn(os_helper.TESTFN, mode, 
encoding=self.encoding)
             self.assertIn('invalid mode', str(cm.exception))
 
         for mode in ('rt', 'wt', 'at', 'r+t'):
             with self.assertRaises(ValueError) as cm:
-                codecs.open(os_helper.TESTFN, mode, encoding=self.encoding)
+                codecs_open_no_warn(os_helper.TESTFN, mode, 
encoding=self.encoding)
             self.assertIn("can't have text and binary mode at once",
                           str(cm.exception))
 
@@ -1844,9 +1844,9 @@ def test_all(self):
     def test_open(self):
         self.addCleanup(os_helper.unlink, os_helper.TESTFN)
         for mode in ('w', 'r', 'r+', 'w+', 'a', 'a+'):
-            with self.subTest(mode), \
-                    codecs.open(os_helper.TESTFN, mode, 'ascii') as file:
-                self.assertIsInstance(file, codecs.StreamReaderWriter)
+            with self.subTest(mode), self.assertWarns(DeprecationWarning):
+                with codecs.open(os_helper.TESTFN, mode, 'ascii') as file:
+                    self.assertIsInstance(file, codecs.StreamReaderWriter)
 
     def test_undefined(self):
         self.assertRaises(UnicodeError, codecs.encode, 'abc', 'undefined')
@@ -1863,7 +1863,7 @@ def test_file_closes_if_lookup_error_raised(self):
         mock_open = mock.mock_open()
         with mock.patch('builtins.open', mock_open) as file:
             with self.assertRaises(LookupError):
-                codecs.open(os_helper.TESTFN, 'wt', 'invalid-encoding')
+                codecs_open_no_warn(os_helper.TESTFN, 'wt', 'invalid-encoding')
 
             file().close.assert_called()
 
@@ -2883,7 +2883,7 @@ def test_seek0(self):
         self.addCleanup(os_helper.unlink, os_helper.TESTFN)
         for encoding in tests:
             # Check if the BOM is written only once
-            with codecs.open(os_helper.TESTFN, 'w+', encoding=encoding) as f:
+            with codecs_open_no_warn(os_helper.TESTFN, 'w+', 
encoding=encoding) as f:
                 f.write(data)
                 f.write(data)
                 f.seek(0)
@@ -2892,7 +2892,7 @@ def test_seek0(self):
                 self.assertEqual(f.read(), data * 2)
 
             # Check that the BOM is written after a seek(0)
-            with codecs.open(os_helper.TESTFN, 'w+', encoding=encoding) as f:
+            with codecs_open_no_warn(os_helper.TESTFN, 'w+', 
encoding=encoding) as f:
                 f.write(data[0])
                 self.assertNotEqual(f.tell(), 0)
                 f.seek(0)
@@ -2901,7 +2901,7 @@ def test_seek0(self):
                 self.assertEqual(f.read(), data)
 
             # (StreamWriter) Check that the BOM is written after a seek(0)
-            with codecs.open(os_helper.TESTFN, 'w+', encoding=encoding) as f:
+            with codecs_open_no_warn(os_helper.TESTFN, 'w+', 
encoding=encoding) as f:
                 f.writer.write(data[0])
                 self.assertNotEqual(f.writer.tell(), 0)
                 f.writer.seek(0)
@@ -2911,7 +2911,7 @@ def test_seek0(self):
 
             # Check that the BOM is not written after a seek() at a position
             # different than the start
-            with codecs.open(os_helper.TESTFN, 'w+', encoding=encoding) as f:
+            with codecs_open_no_warn(os_helper.TESTFN, 'w+', 
encoding=encoding) as f:
                 f.write(data)
                 f.seek(f.tell())
                 f.write(data)
@@ -2920,7 +2920,7 @@ def test_seek0(self):
 
             # (StreamWriter) Check that the BOM is not written after a seek()
             # at a position different than the start
-            with codecs.open(os_helper.TESTFN, 'w+', encoding=encoding) as f:
+            with codecs_open_no_warn(os_helper.TESTFN, 'w+', 
encoding=encoding) as f:
                 f.writer.write(data)
                 f.writer.seek(f.writer.tell())
                 f.writer.write(data)
diff --git a/Lib/test/test_multibytecodec.py b/Lib/test/test_multibytecodec.py
index 1b55f1e70b32f5..d7a233377bdb02 100644
--- a/Lib/test/test_multibytecodec.py
+++ b/Lib/test/test_multibytecodec.py
@@ -314,7 +314,8 @@ def test_bug1728403(self):
                 f.write(b'\xa1')
             finally:
                 f.close()
-            f = codecs.open(TESTFN, encoding='cp949')
+            with self.assertWarns(DeprecationWarning):
+                f = codecs.open(TESTFN, encoding='cp949')
             try:
                 self.assertRaises(UnicodeDecodeError, f.read, 2)
             finally:
diff --git a/Lib/test/test_sax.py b/Lib/test/test_sax.py
index 0d0f86c145b499..5c10bcedc69bc6 100644
--- a/Lib/test/test_sax.py
+++ b/Lib/test/test_sax.py
@@ -1,5 +1,4 @@
 # regression test for SAX 2.0
-# $Id$
 
 from xml.sax import make_parser, ContentHandler, \
                     SAXException, SAXReaderNotAvailable, SAXParseException
@@ -832,8 +831,9 @@ class StreamReaderWriterXmlgenTest(XmlgenTest, 
unittest.TestCase):
     fname = os_helper.TESTFN + '-codecs'
 
     def ioclass(self):
-        writer = codecs.open(self.fname, 'w', encoding='ascii',
-                             errors='xmlcharrefreplace', buffering=0)
+        with self.assertWarns(DeprecationWarning):
+            writer = codecs.open(self.fname, 'w', encoding='ascii',
+                                errors='xmlcharrefreplace', buffering=0)
         def cleanup():
             writer.close()
             os_helper.unlink(self.fname)
diff --git 
a/Misc/NEWS.d/next/Library/2025-04-27-15-21-05.gh-issue-133036.HCNYA7.rst 
b/Misc/NEWS.d/next/Library/2025-04-27-15-21-05.gh-issue-133036.HCNYA7.rst
new file mode 100644
index 00000000000000..46b1f5575d0d2e
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2025-04-27-15-21-05.gh-issue-133036.HCNYA7.rst
@@ -0,0 +1,2 @@
+:func:`codecs.open` is now deprecated. Use :func:`open` instead. Contributed
+by Inada Naoki.
diff --git a/Modules/_io/textio.c b/Modules/_io/textio.c
index e77d8448310fba..a5b2ca7240a55f 100644
--- a/Modules/_io/textio.c
+++ b/Modules/_io/textio.c
@@ -1185,7 +1185,7 @@ _io_TextIOWrapper___init___impl(textio *self, PyObject 
*buffer,
     }
 
     /* Check we have been asked for a real text encoding */
-    codec_info = _PyCodec_LookupTextEncoding(encoding, "codecs.open()");
+    codec_info = _PyCodec_LookupTextEncoding(encoding, NULL);
     if (codec_info == NULL) {
         Py_CLEAR(self->encoding);
         goto error;
@@ -1324,8 +1324,7 @@ textiowrapper_change_encoding(textio *self, PyObject 
*encoding,
     }
 
     // Create new encoder & decoder
-    PyObject *codec_info = _PyCodec_LookupTextEncoding(
-        c_encoding, "codecs.open()");
+    PyObject *codec_info = _PyCodec_LookupTextEncoding(c_encoding, NULL);
     if (codec_info == NULL) {
         Py_DECREF(encoding);
         Py_DECREF(errors);
diff --git a/Python/codecs.c b/Python/codecs.c
index 265f5214e5bad2..caf8d9d5f3c188 100644
--- a/Python/codecs.c
+++ b/Python/codecs.c
@@ -540,11 +540,19 @@ PyObject * _PyCodec_LookupTextEncoding(const char 
*encoding,
             Py_DECREF(attr);
             if (is_text_codec <= 0) {
                 Py_DECREF(codec);
-                if (!is_text_codec)
-                    PyErr_Format(PyExc_LookupError,
-                                 "'%.400s' is not a text encoding; "
-                                 "use %s to handle arbitrary codecs",
-                                 encoding, alternate_command);
+                if (!is_text_codec) {
+                    if (alternate_command != NULL) {
+                        PyErr_Format(PyExc_LookupError,
+                                     "'%.400s' is not a text encoding; "
+                                     "use %s to handle arbitrary codecs",
+                                     encoding, alternate_command);
+                    }
+                    else {
+                        PyErr_Format(PyExc_LookupError,
+                                     "'%.400s' is not a text encoding",
+                                     encoding);
+                    }
+                }
                 return NULL;
             }
         }

_______________________________________________
Python-checkins mailing list -- python-checkins@python.org
To unsubscribe send an email to python-checkins-le...@python.org
https://mail.python.org/mailman3/lists/python-checkins.python.org/
Member address: arch...@mail-archive.com

[Python-checkins] gh-133036: Deprecate codecs.open (#133038)

Reply via email to