https://github.com/python/cpython/commit/4e294f6feb3193854d23e0e8be487213a80b232f commit: 4e294f6feb3193854d23e0e8be487213a80b232f branch: main author: Inada Naoki <songofaca...@gmail.com> committer: methane <songofaca...@gmail.com> date: 2025-04-30T10:11:09+09:00 summary:
gh-133036: Deprecate codecs.open (#133038) Co-authored-by: Hugo van Kemenade <1324225+hug...@users.noreply.github.com> Co-authored-by: Victor Stinner <vstin...@python.org> files: A Misc/NEWS.d/next/Library/2025-04-27-15-21-05.gh-issue-133036.HCNYA7.rst M Doc/deprecations/pending-removal-in-future.rst M Doc/library/codecs.rst M Doc/whatsnew/3.14.rst M Lib/_pyio.py M Lib/codecs.py M Lib/test/test_codecs.py M Lib/test/test_multibytecodec.py M Lib/test/test_sax.py M Modules/_io/textio.c M Python/codecs.c diff --git a/Doc/deprecations/pending-removal-in-future.rst b/Doc/deprecations/pending-removal-in-future.rst index 563f994c04b17b..4c4a368baca955 100644 --- a/Doc/deprecations/pending-removal-in-future.rst +++ b/Doc/deprecations/pending-removal-in-future.rst @@ -47,6 +47,8 @@ although there is currently no date scheduled for their removal. :data:`calendar.FEBRUARY`. (Contributed by Prince Roshan in :gh:`103636`.) +* :mod:`codecs`: use :func:`open` instead of :func:`codecs.open`. (:gh:`133038`) + * :attr:`codeobject.co_lnotab`: use the :meth:`codeobject.co_lines` method instead. diff --git a/Doc/library/codecs.rst b/Doc/library/codecs.rst index f63148a9bd2a31..14f6547e4e0522 100644 --- a/Doc/library/codecs.rst +++ b/Doc/library/codecs.rst @@ -208,6 +208,10 @@ wider range of codecs when working with binary files: .. versionchanged:: 3.11 The ``'U'`` mode has been removed. + .. deprecated:: next + + :func:`codecs.open` has been superseded by :func:`open`. + .. function:: EncodedFile(file, data_encoding, file_encoding=None, errors='strict') diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst index 185a4670d8517d..9e6b69fbc05273 100644 --- a/Doc/whatsnew/3.14.rst +++ b/Doc/whatsnew/3.14.rst @@ -1597,6 +1597,10 @@ Deprecated as a single positional argument. (Contributed by Serhiy Storchaka in :gh:`109218`.) +* :mod:`codecs`: + :func:`codecs.open` is now deprecated. Use :func:`open` instead. + (Contributed by Inada Naoki in :gh:`133036`.) + * :mod:`functools`: Calling the Python implementation of :func:`functools.reduce` with *function* or *sequence* as keyword arguments is now deprecated. diff --git a/Lib/_pyio.py b/Lib/_pyio.py index b875103bee441c..a870de5b532542 100644 --- a/Lib/_pyio.py +++ b/Lib/_pyio.py @@ -2056,8 +2056,7 @@ def __init__(self, buffer, encoding=None, errors=None, newline=None, raise ValueError("invalid encoding: %r" % encoding) if not codecs.lookup(encoding)._is_text_encoding: - msg = ("%r is not a text encoding; " - "use codecs.open() to handle arbitrary codecs") + msg = "%r is not a text encoding" raise LookupError(msg % encoding) if errors is None: diff --git a/Lib/codecs.py b/Lib/codecs.py index e365e6cf22929f..fc38e922257644 100644 --- a/Lib/codecs.py +++ b/Lib/codecs.py @@ -884,7 +884,6 @@ def __reduce_ex__(self, proto): ### Shortcuts def open(filename, mode='r', encoding=None, errors='strict', buffering=-1): - """ Open an encoded file using the given mode and return a wrapped version providing transparent encoding/decoding. @@ -912,8 +911,11 @@ def open(filename, mode='r', encoding=None, errors='strict', buffering=-1): .encoding which allows querying the used encoding. This attribute is only available if an encoding was specified as parameter. - """ + import warnings + warnings.warn("codecs.open() is deprecated. Use open() instead.", + DeprecationWarning, stacklevel=2) + if encoding is not None and \ 'b' not in mode: # Force opening of the file in binary mode diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py index e51f7e0ee12b1f..94fcf98e75721f 100644 --- a/Lib/test/test_codecs.py +++ b/Lib/test/test_codecs.py @@ -7,6 +7,7 @@ import unittest import encodings from unittest import mock +import warnings from test import support from test.support import os_helper @@ -20,13 +21,12 @@ except ImportError: _testinternalcapi = None -try: - import ctypes -except ImportError: - ctypes = None - SIZEOF_WCHAR_T = -1 -else: - SIZEOF_WCHAR_T = ctypes.sizeof(ctypes.c_wchar) + +def codecs_open_no_warn(*args, **kwargs): + """Call codecs.open(*args, **kwargs) ignoring DeprecationWarning.""" + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + return codecs.open(*args, **kwargs) def coding_checker(self, coder): def check(input, expect): @@ -35,13 +35,13 @@ def check(input, expect): # On small versions of Windows like Windows IoT or Windows Nano Server not all codepages are present def is_code_page_present(cp): - from ctypes import POINTER, WINFUNCTYPE, WinDLL + from ctypes import POINTER, WINFUNCTYPE, WinDLL, Structure from ctypes.wintypes import BOOL, BYTE, WCHAR, UINT, DWORD MAX_LEADBYTES = 12 # 5 ranges, 2 bytes ea., 0 term. MAX_DEFAULTCHAR = 2 # single or double byte MAX_PATH = 260 - class CPINFOEXW(ctypes.Structure): + class CPINFOEXW(Structure): _fields_ = [("MaxCharSize", UINT), ("DefaultChar", BYTE*MAX_DEFAULTCHAR), ("LeadByte", BYTE*MAX_LEADBYTES), @@ -719,19 +719,19 @@ def test_bug691291(self): self.addCleanup(os_helper.unlink, os_helper.TESTFN) with open(os_helper.TESTFN, 'wb') as fp: fp.write(s) - with codecs.open(os_helper.TESTFN, 'r', + with codecs_open_no_warn(os_helper.TESTFN, 'r', encoding=self.encoding) as reader: self.assertEqual(reader.read(), s1) def test_invalid_modes(self): for mode in ('U', 'rU', 'r+U'): with self.assertRaises(ValueError) as cm: - codecs.open(os_helper.TESTFN, mode, encoding=self.encoding) + codecs_open_no_warn(os_helper.TESTFN, mode, encoding=self.encoding) self.assertIn('invalid mode', str(cm.exception)) for mode in ('rt', 'wt', 'at', 'r+t'): with self.assertRaises(ValueError) as cm: - codecs.open(os_helper.TESTFN, mode, encoding=self.encoding) + codecs_open_no_warn(os_helper.TESTFN, mode, encoding=self.encoding) self.assertIn("can't have text and binary mode at once", str(cm.exception)) @@ -1844,9 +1844,9 @@ def test_all(self): def test_open(self): self.addCleanup(os_helper.unlink, os_helper.TESTFN) for mode in ('w', 'r', 'r+', 'w+', 'a', 'a+'): - with self.subTest(mode), \ - codecs.open(os_helper.TESTFN, mode, 'ascii') as file: - self.assertIsInstance(file, codecs.StreamReaderWriter) + with self.subTest(mode), self.assertWarns(DeprecationWarning): + with codecs.open(os_helper.TESTFN, mode, 'ascii') as file: + self.assertIsInstance(file, codecs.StreamReaderWriter) def test_undefined(self): self.assertRaises(UnicodeError, codecs.encode, 'abc', 'undefined') @@ -1863,7 +1863,7 @@ def test_file_closes_if_lookup_error_raised(self): mock_open = mock.mock_open() with mock.patch('builtins.open', mock_open) as file: with self.assertRaises(LookupError): - codecs.open(os_helper.TESTFN, 'wt', 'invalid-encoding') + codecs_open_no_warn(os_helper.TESTFN, 'wt', 'invalid-encoding') file().close.assert_called() @@ -2883,7 +2883,7 @@ def test_seek0(self): self.addCleanup(os_helper.unlink, os_helper.TESTFN) for encoding in tests: # Check if the BOM is written only once - with codecs.open(os_helper.TESTFN, 'w+', encoding=encoding) as f: + with codecs_open_no_warn(os_helper.TESTFN, 'w+', encoding=encoding) as f: f.write(data) f.write(data) f.seek(0) @@ -2892,7 +2892,7 @@ def test_seek0(self): self.assertEqual(f.read(), data * 2) # Check that the BOM is written after a seek(0) - with codecs.open(os_helper.TESTFN, 'w+', encoding=encoding) as f: + with codecs_open_no_warn(os_helper.TESTFN, 'w+', encoding=encoding) as f: f.write(data[0]) self.assertNotEqual(f.tell(), 0) f.seek(0) @@ -2901,7 +2901,7 @@ def test_seek0(self): self.assertEqual(f.read(), data) # (StreamWriter) Check that the BOM is written after a seek(0) - with codecs.open(os_helper.TESTFN, 'w+', encoding=encoding) as f: + with codecs_open_no_warn(os_helper.TESTFN, 'w+', encoding=encoding) as f: f.writer.write(data[0]) self.assertNotEqual(f.writer.tell(), 0) f.writer.seek(0) @@ -2911,7 +2911,7 @@ def test_seek0(self): # Check that the BOM is not written after a seek() at a position # different than the start - with codecs.open(os_helper.TESTFN, 'w+', encoding=encoding) as f: + with codecs_open_no_warn(os_helper.TESTFN, 'w+', encoding=encoding) as f: f.write(data) f.seek(f.tell()) f.write(data) @@ -2920,7 +2920,7 @@ def test_seek0(self): # (StreamWriter) Check that the BOM is not written after a seek() # at a position different than the start - with codecs.open(os_helper.TESTFN, 'w+', encoding=encoding) as f: + with codecs_open_no_warn(os_helper.TESTFN, 'w+', encoding=encoding) as f: f.writer.write(data) f.writer.seek(f.writer.tell()) f.writer.write(data) diff --git a/Lib/test/test_multibytecodec.py b/Lib/test/test_multibytecodec.py index 1b55f1e70b32f5..d7a233377bdb02 100644 --- a/Lib/test/test_multibytecodec.py +++ b/Lib/test/test_multibytecodec.py @@ -314,7 +314,8 @@ def test_bug1728403(self): f.write(b'\xa1') finally: f.close() - f = codecs.open(TESTFN, encoding='cp949') + with self.assertWarns(DeprecationWarning): + f = codecs.open(TESTFN, encoding='cp949') try: self.assertRaises(UnicodeDecodeError, f.read, 2) finally: diff --git a/Lib/test/test_sax.py b/Lib/test/test_sax.py index 0d0f86c145b499..5c10bcedc69bc6 100644 --- a/Lib/test/test_sax.py +++ b/Lib/test/test_sax.py @@ -1,5 +1,4 @@ # regression test for SAX 2.0 -# $Id$ from xml.sax import make_parser, ContentHandler, \ SAXException, SAXReaderNotAvailable, SAXParseException @@ -832,8 +831,9 @@ class StreamReaderWriterXmlgenTest(XmlgenTest, unittest.TestCase): fname = os_helper.TESTFN + '-codecs' def ioclass(self): - writer = codecs.open(self.fname, 'w', encoding='ascii', - errors='xmlcharrefreplace', buffering=0) + with self.assertWarns(DeprecationWarning): + writer = codecs.open(self.fname, 'w', encoding='ascii', + errors='xmlcharrefreplace', buffering=0) def cleanup(): writer.close() os_helper.unlink(self.fname) diff --git a/Misc/NEWS.d/next/Library/2025-04-27-15-21-05.gh-issue-133036.HCNYA7.rst b/Misc/NEWS.d/next/Library/2025-04-27-15-21-05.gh-issue-133036.HCNYA7.rst new file mode 100644 index 00000000000000..46b1f5575d0d2e --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-04-27-15-21-05.gh-issue-133036.HCNYA7.rst @@ -0,0 +1,2 @@ +:func:`codecs.open` is now deprecated. Use :func:`open` instead. Contributed +by Inada Naoki. diff --git a/Modules/_io/textio.c b/Modules/_io/textio.c index e77d8448310fba..a5b2ca7240a55f 100644 --- a/Modules/_io/textio.c +++ b/Modules/_io/textio.c @@ -1185,7 +1185,7 @@ _io_TextIOWrapper___init___impl(textio *self, PyObject *buffer, } /* Check we have been asked for a real text encoding */ - codec_info = _PyCodec_LookupTextEncoding(encoding, "codecs.open()"); + codec_info = _PyCodec_LookupTextEncoding(encoding, NULL); if (codec_info == NULL) { Py_CLEAR(self->encoding); goto error; @@ -1324,8 +1324,7 @@ textiowrapper_change_encoding(textio *self, PyObject *encoding, } // Create new encoder & decoder - PyObject *codec_info = _PyCodec_LookupTextEncoding( - c_encoding, "codecs.open()"); + PyObject *codec_info = _PyCodec_LookupTextEncoding(c_encoding, NULL); if (codec_info == NULL) { Py_DECREF(encoding); Py_DECREF(errors); diff --git a/Python/codecs.c b/Python/codecs.c index 265f5214e5bad2..caf8d9d5f3c188 100644 --- a/Python/codecs.c +++ b/Python/codecs.c @@ -540,11 +540,19 @@ PyObject * _PyCodec_LookupTextEncoding(const char *encoding, Py_DECREF(attr); if (is_text_codec <= 0) { Py_DECREF(codec); - if (!is_text_codec) - PyErr_Format(PyExc_LookupError, - "'%.400s' is not a text encoding; " - "use %s to handle arbitrary codecs", - encoding, alternate_command); + if (!is_text_codec) { + if (alternate_command != NULL) { + PyErr_Format(PyExc_LookupError, + "'%.400s' is not a text encoding; " + "use %s to handle arbitrary codecs", + encoding, alternate_command); + } + else { + PyErr_Format(PyExc_LookupError, + "'%.400s' is not a text encoding", + encoding); + } + } return NULL; } } _______________________________________________ Python-checkins mailing list -- python-checkins@python.org To unsubscribe send an email to python-checkins-le...@python.org https://mail.python.org/mailman3/lists/python-checkins.python.org/ Member address: arch...@mail-archive.com