https://github.com/python/cpython/commit/fe08cdf2657bfde80aeed436fe49fee233651837
commit: fe08cdf2657bfde80aeed436fe49fee233651837
branch: 3.13
author: Miss Islington (bot) <31488909+miss-isling...@users.noreply.github.com>
committer: serhiy-storchaka <storch...@gmail.com>
date: 2024-12-17T11:45:29Z
summary:

[3.13] gh-126742: Add _PyErr_SetLocaleString, use it for gdbm & dlerror 
messages (GH-126746) (GH-128023)

- Add a helper to set an error from locale-encoded `char*`
- Use the helper for gdbm & dlerror messages
(cherry picked from commit 7303f06846b69016a075bca7ad7c6055f29ad024)

Co-authored-by: Bénédikt Tran <10796600+picn...@users.noreply.github.com>
Co-authored-by: Victor Stinner <vstin...@python.org>

files:
M Include/internal/pycore_pyerrors.h
M Lib/test/test_ctypes/test_dlerror.py
M Lib/test/test_dbm_gnu.py
M Modules/_ctypes/_ctypes.c
M Modules/_ctypes/callproc.c
M Modules/_gdbmmodule.c
M Modules/_hashopenssl.c
M Modules/_sqlite/util.c
M Modules/_testcapi/exceptions.c
M Modules/main.c
M Modules/pyexpat.c
M Python/errors.c

diff --git a/Include/internal/pycore_pyerrors.h 
b/Include/internal/pycore_pyerrors.h
index 15071638203457..615cc23ec93528 100644
--- a/Include/internal/pycore_pyerrors.h
+++ b/Include/internal/pycore_pyerrors.h
@@ -125,6 +125,18 @@ PyAPI_FUNC(void) _PyErr_SetString(
     PyObject *exception,
     const char *string);
 
+/*
+ * Set an exception with the error message decoded from the current locale
+ * encoding (LC_CTYPE).
+ *
+ * Exceptions occurring in decoding take priority over the desired exception.
+ *
+ * Exported for '_ctypes' shared extensions.
+ */
+PyAPI_FUNC(void) _PyErr_SetLocaleString(
+    PyObject *exception,
+    const char *string);
+
 PyAPI_FUNC(PyObject*) _PyErr_Format(
     PyThreadState *tstate,
     PyObject *exception,
diff --git a/Lib/test/test_ctypes/test_dlerror.py 
b/Lib/test/test_ctypes/test_dlerror.py
index 4441e30cd7a2a7..c3c34d43481d36 100644
--- a/Lib/test/test_ctypes/test_dlerror.py
+++ b/Lib/test/test_ctypes/test_dlerror.py
@@ -1,7 +1,12 @@
+import _ctypes
 import os
+import platform
 import sys
+import test.support
 import unittest
-import platform
+from ctypes import CDLL, c_int
+from ctypes.util import find_library
+
 
 FOO_C = r"""
 #include <unistd.h>
@@ -26,7 +31,7 @@
 
 
 @unittest.skipUnless(sys.platform.startswith('linux'),
-                     'Test only valid for Linux')
+                     'test requires GNU IFUNC support')
 class TestNullDlsym(unittest.TestCase):
     """GH-126554: Ensure that we catch NULL dlsym return values
 
@@ -53,14 +58,6 @@ def test_null_dlsym(self):
         import subprocess
         import tempfile
 
-        # To avoid ImportErrors on Windows, where _ctypes does not have
-        # dlopen and dlsym,
-        # import here, i.e., inside the test function.
-        # The skipUnless('linux') decorator ensures that we're on linux
-        # if we're executing these statements.
-        from ctypes import CDLL, c_int
-        from _ctypes import dlopen, dlsym
-
         retcode = subprocess.call(["gcc", "--version"],
                                   stdout=subprocess.DEVNULL,
                                   stderr=subprocess.DEVNULL)
@@ -111,6 +108,8 @@ def test_null_dlsym(self):
             self.assertEqual(os.read(pipe_r, 2), b'OK')
 
             # Case #3: Test 'py_dl_sym' from Modules/_ctypes/callproc.c
+            dlopen = test.support.get_attribute(_ctypes, 'dlopen')
+            dlsym = test.support.get_attribute(_ctypes, 'dlsym')
             L = dlopen(dstname)
             with self.assertRaisesRegex(OSError, "symbol 'foo' not found"):
                 dlsym(L, "foo")
@@ -119,5 +118,66 @@ def test_null_dlsym(self):
             self.assertEqual(os.read(pipe_r, 2), b'OK')
 
 
+@unittest.skipUnless(os.name != 'nt', 'test requires dlerror() calls')
+class TestLocalization(unittest.TestCase):
+
+    @staticmethod
+    def configure_locales(func):
+        return test.support.run_with_locale(
+            'LC_ALL',
+            'fr_FR.iso88591', 'ja_JP.sjis', 'zh_CN.gbk',
+            'fr_FR.utf8', 'en_US.utf8',
+            '',
+        )(func)
+
+    @classmethod
+    def setUpClass(cls):
+        cls.libc_filename = find_library("c")
+
+    @configure_locales
+    def test_localized_error_from_dll(self):
+        dll = CDLL(self.libc_filename)
+        with self.assertRaises(AttributeError) as cm:
+            dll.this_name_does_not_exist
+        if sys.platform.startswith('linux'):
+            # On macOS, the filename is not reported by dlerror().
+            self.assertIn(self.libc_filename, str(cm.exception))
+
+    @configure_locales
+    def test_localized_error_in_dll(self):
+        dll = CDLL(self.libc_filename)
+        with self.assertRaises(ValueError) as cm:
+            c_int.in_dll(dll, 'this_name_does_not_exist')
+        if sys.platform.startswith('linux'):
+            # On macOS, the filename is not reported by dlerror().
+            self.assertIn(self.libc_filename, str(cm.exception))
+
+    @unittest.skipUnless(hasattr(_ctypes, 'dlopen'),
+                         'test requires _ctypes.dlopen()')
+    @configure_locales
+    def test_localized_error_dlopen(self):
+        missing_filename = b'missing\xff.so'
+        # Depending whether the locale, we may encode '\xff' differently
+        # but we are only interested in avoiding a UnicodeDecodeError
+        # when reporting the dlerror() error message which contains
+        # the localized filename.
+        filename_pattern = r'missing.*?\.so'
+        with self.assertRaisesRegex(OSError, filename_pattern):
+            _ctypes.dlopen(missing_filename, 2)
+
+    @unittest.skipUnless(hasattr(_ctypes, 'dlopen'),
+                         'test requires _ctypes.dlopen()')
+    @unittest.skipUnless(hasattr(_ctypes, 'dlsym'),
+                         'test requires _ctypes.dlsym()')
+    @configure_locales
+    def test_localized_error_dlsym(self):
+        dll = _ctypes.dlopen(self.libc_filename)
+        with self.assertRaises(OSError) as cm:
+            _ctypes.dlsym(dll, 'this_name_does_not_exist')
+        if sys.platform.startswith('linux'):
+            # On macOS, the filename is not reported by dlerror().
+            self.assertIn(self.libc_filename, str(cm.exception))
+
+
 if __name__ == "__main__":
     unittest.main()
diff --git a/Lib/test/test_dbm_gnu.py b/Lib/test/test_dbm_gnu.py
index e20addf1f04f1b..66268c42a300b5 100644
--- a/Lib/test/test_dbm_gnu.py
+++ b/Lib/test/test_dbm_gnu.py
@@ -1,10 +1,11 @@
-from test import support
-from test.support import import_helper, cpython_only
-gdbm = import_helper.import_module("dbm.gnu") #skip if not supported
-import unittest
 import os
-from test.support.os_helper import TESTFN, TESTFN_NONASCII, unlink, FakePath
+import unittest
+from test import support
+from test.support import cpython_only, import_helper
+from test.support.os_helper import (TESTFN, TESTFN_NONASCII, FakePath,
+                                    create_empty_file, temp_dir, unlink)
 
+gdbm = import_helper.import_module("dbm.gnu")  # skip if not supported
 
 filename = TESTFN
 
@@ -205,6 +206,16 @@ def test_clear(self):
                 self.assertNotIn(k, db)
             self.assertEqual(len(db), 0)
 
+    @support.run_with_locale(
+        'LC_ALL',
+        'fr_FR.iso88591', 'ja_JP.sjis', 'zh_CN.gbk',
+        'fr_FR.utf8', 'en_US.utf8',
+        '',
+    )
+    def test_localized_error(self):
+        with temp_dir() as d:
+            create_empty_file(os.path.join(d, 'test'))
+            self.assertRaises(gdbm.error, gdbm.open, filename, 'r')
 
 
 if __name__ == '__main__':
diff --git a/Modules/_ctypes/_ctypes.c b/Modules/_ctypes/_ctypes.c
index 1509cee6022d3a..3d7cb1b1164843 100644
--- a/Modules/_ctypes/_ctypes.c
+++ b/Modules/_ctypes/_ctypes.c
@@ -986,15 +986,8 @@ CDataType_in_dll_impl(PyObject *type, PyTypeObject *cls, 
PyObject *dll,
     #ifdef USE_DLERROR
     const char *dlerr = dlerror();
     if (dlerr) {
-        PyObject *message = PyUnicode_DecodeLocale(dlerr, "surrogateescape");
-        if (message) {
-            PyErr_SetObject(PyExc_ValueError, message);
-            Py_DECREF(message);
-            return NULL;
-        }
-        // Ignore errors from PyUnicode_DecodeLocale,
-        // fall back to the generic error below.
-        PyErr_Clear();
+        _PyErr_SetLocaleString(PyExc_ValueError, dlerr);
+        return NULL;
     }
     #endif
 #undef USE_DLERROR
@@ -3809,21 +3802,14 @@ PyCFuncPtr_FromDll(PyTypeObject *type, PyObject *args, 
PyObject *kwds)
     address = (PPROC)dlsym(handle, name);
 
     if (!address) {
-       #ifdef USE_DLERROR
+    #ifdef USE_DLERROR
         const char *dlerr = dlerror();
         if (dlerr) {
-            PyObject *message = PyUnicode_DecodeLocale(dlerr, 
"surrogateescape");
-            if (message) {
-                PyErr_SetObject(PyExc_AttributeError, message);
-                Py_DECREF(ftuple);
-                Py_DECREF(message);
-                return NULL;
-            }
-            // Ignore errors from PyUnicode_DecodeLocale,
-            // fall back to the generic error below.
-            PyErr_Clear();
+            _PyErr_SetLocaleString(PyExc_AttributeError, dlerr);
+            Py_DECREF(ftuple);
+            return NULL;
         }
-       #endif
+    #endif
         PyErr_Format(PyExc_AttributeError, "function '%s' not found", name);
         Py_DECREF(ftuple);
         return NULL;
diff --git a/Modules/_ctypes/callproc.c b/Modules/_ctypes/callproc.c
index b5511f06e3904a..f9864ebb735ddf 100644
--- a/Modules/_ctypes/callproc.c
+++ b/Modules/_ctypes/callproc.c
@@ -1579,10 +1579,11 @@ static PyObject *py_dl_open(PyObject *self, PyObject 
*args)
     Py_XDECREF(name2);
     if (!handle) {
         const char *errmsg = dlerror();
-        if (!errmsg)
-            errmsg = "dlopen() error";
-        PyErr_SetString(PyExc_OSError,
-                               errmsg);
+        if (errmsg) {
+            _PyErr_SetLocaleString(PyExc_OSError, errmsg);
+            return NULL;
+        }
+        PyErr_SetString(PyExc_OSError, "dlopen() error");
         return NULL;
     }
     return PyLong_FromVoidPtr(handle);
@@ -1595,8 +1596,12 @@ static PyObject *py_dl_close(PyObject *self, PyObject 
*args)
     if (!PyArg_ParseTuple(args, "O&:dlclose", &_parse_voidp, &handle))
         return NULL;
     if (dlclose(handle)) {
-        PyErr_SetString(PyExc_OSError,
-                               dlerror());
+        const char *errmsg = dlerror();
+        if (errmsg) {
+            _PyErr_SetLocaleString(PyExc_OSError, errmsg);
+            return NULL;
+        }
+        PyErr_SetString(PyExc_OSError, "dlclose() error");
         return NULL;
     }
     Py_RETURN_NONE;
@@ -1630,21 +1635,14 @@ static PyObject *py_dl_sym(PyObject *self, PyObject 
*args)
     if (ptr) {
         return PyLong_FromVoidPtr(ptr);
     }
-       #ifdef USE_DLERROR
-    const char *dlerr = dlerror();
-    if (dlerr) {
-        PyObject *message = PyUnicode_DecodeLocale(dlerr, "surrogateescape");
-        if (message) {
-            PyErr_SetObject(PyExc_OSError, message);
-            Py_DECREF(message);
-            return NULL;
-        }
-        // Ignore errors from PyUnicode_DecodeLocale,
-        // fall back to the generic error below.
-        PyErr_Clear();
+    #ifdef USE_DLERROR
+    const char *errmsg = dlerror();
+    if (errmsg) {
+        _PyErr_SetLocaleString(PyExc_OSError, errmsg);
+        return NULL;
     }
-       #endif
-       #undef USE_DLERROR
+    #endif
+    #undef USE_DLERROR
     PyErr_Format(PyExc_OSError, "symbol '%s' not found", name);
     return NULL;
 }
diff --git a/Modules/_gdbmmodule.c b/Modules/_gdbmmodule.c
index df7fba67810ed0..ea4fe247987e9d 100644
--- a/Modules/_gdbmmodule.c
+++ b/Modules/_gdbmmodule.c
@@ -8,10 +8,11 @@
 #endif
 
 #include "Python.h"
+#include "pycore_pyerrors.h"        // _PyErr_SetLocaleString()
 #include "gdbm.h"
 
 #include <fcntl.h>
-#include <stdlib.h>               // free()
+#include <stdlib.h>                 // free()
 #include <sys/stat.h>
 #include <sys/types.h>
 
@@ -33,6 +34,24 @@ get_gdbm_state(PyObject *module)
     return (_gdbm_state *)state;
 }
 
+/*
+ * Set the gdbm error obtained by gdbm_strerror(gdbm_errno).
+ *
+ * If no error message exists, a generic (UTF-8) error message
+ * is used instead.
+ */
+static void
+set_gdbm_error(_gdbm_state *state, const char *generic_error)
+{
+    const char *gdbm_errmsg = gdbm_strerror(gdbm_errno);
+    if (gdbm_errmsg) {
+        _PyErr_SetLocaleString(state->gdbm_error, gdbm_errmsg);
+    }
+    else {
+        PyErr_SetString(state->gdbm_error, generic_error);
+    }
+}
+
 /*[clinic input]
 module _gdbm
 class _gdbm.gdbm "gdbmobject *" "&Gdbmtype"
@@ -91,7 +110,7 @@ newgdbmobject(_gdbm_state *state, const char *file, int 
flags, int mode)
             PyErr_SetFromErrnoWithFilename(state->gdbm_error, file);
         }
         else {
-            PyErr_SetString(state->gdbm_error, gdbm_strerror(gdbm_errno));
+            set_gdbm_error(state, "gdbm_open() error");
         }
         Py_DECREF(dp);
         return NULL;
@@ -136,7 +155,7 @@ gdbm_length(gdbmobject *dp)
                 PyErr_SetFromErrno(state->gdbm_error);
             }
             else {
-                PyErr_SetString(state->gdbm_error, gdbm_strerror(gdbm_errno));
+                set_gdbm_error(state, "gdbm_count() error");
             }
             return -1;
         }
@@ -286,7 +305,7 @@ gdbm_ass_sub(gdbmobject *dp, PyObject *v, PyObject *w)
                 PyErr_SetObject(PyExc_KeyError, v);
             }
             else {
-                PyErr_SetString(state->gdbm_error, gdbm_strerror(gdbm_errno));
+                set_gdbm_error(state, "gdbm_delete() error");
             }
             return -1;
         }
@@ -297,11 +316,12 @@ gdbm_ass_sub(gdbmobject *dp, PyObject *v, PyObject *w)
         }
         errno = 0;
         if (gdbm_store(dp->di_dbm, krec, drec, GDBM_REPLACE) < 0) {
-            if (errno != 0)
+            if (errno != 0) {
                 PyErr_SetFromErrno(state->gdbm_error);
-            else
-                PyErr_SetString(state->gdbm_error,
-                                gdbm_strerror(gdbm_errno));
+            }
+            else {
+                set_gdbm_error(state, "gdbm_store() error");
+            }
             return -1;
         }
     }
@@ -534,10 +554,12 @@ _gdbm_gdbm_reorganize_impl(gdbmobject *self, PyTypeObject 
*cls)
     check_gdbmobject_open(self, state->gdbm_error);
     errno = 0;
     if (gdbm_reorganize(self->di_dbm) < 0) {
-        if (errno != 0)
+        if (errno != 0) {
             PyErr_SetFromErrno(state->gdbm_error);
-        else
-            PyErr_SetString(state->gdbm_error, gdbm_strerror(gdbm_errno));
+        }
+        else {
+            set_gdbm_error(state, "gdbm_reorganize() error");
+        }
         return NULL;
     }
     Py_RETURN_NONE;
diff --git a/Modules/_hashopenssl.c b/Modules/_hashopenssl.c
index 14d9c186151232..d2e7630c22e7e9 100644
--- a/Modules/_hashopenssl.c
+++ b/Modules/_hashopenssl.c
@@ -320,6 +320,7 @@ _setException(PyObject *exc, const char* altmsg, ...)
     va_end(vargs);
     ERR_clear_error();
 
+    /* ERR_ERROR_STRING(3) ensures that the messages below are ASCII */
     lib = ERR_lib_error_string(errcode);
     func = ERR_func_error_string(errcode);
     reason = ERR_reason_error_string(errcode);
diff --git a/Modules/_sqlite/util.c b/Modules/_sqlite/util.c
index 9e8613ef67916e..b0622e66928f47 100644
--- a/Modules/_sqlite/util.c
+++ b/Modules/_sqlite/util.c
@@ -134,6 +134,7 @@ _pysqlite_seterror(pysqlite_state *state, sqlite3 *db)
 
     /* Create and set the exception. */
     int extended_errcode = sqlite3_extended_errcode(db);
+    // sqlite3_errmsg() always returns an UTF-8 encoded message
     const char *errmsg = sqlite3_errmsg(db);
     raise_exception(exc_class, extended_errcode, errmsg);
     return extended_errcode;
diff --git a/Modules/_testcapi/exceptions.c b/Modules/_testcapi/exceptions.c
index 42a9915143e6fa..d73625c453f6c4 100644
--- a/Modules/_testcapi/exceptions.c
+++ b/Modules/_testcapi/exceptions.c
@@ -3,6 +3,7 @@
 
 #include "parts.h"
 #include "util.h"
+
 #include "clinic/exceptions.c.h"
 
 
diff --git a/Modules/main.c b/Modules/main.c
index 15ea49a1bad19e..3bf2241f2837a3 100644
--- a/Modules/main.c
+++ b/Modules/main.c
@@ -374,6 +374,7 @@ pymain_run_file_obj(PyObject *program_name, PyObject 
*filename,
     if (fp == NULL) {
         // Ignore the OSError
         PyErr_Clear();
+        // TODO(picnixz): strerror() is locale dependent but not 
PySys_FormatStderr().
         PySys_FormatStderr("%S: can't open file %R: [Errno %d] %s\n",
                            program_name, filename, errno, strerror(errno));
         return 2;
diff --git a/Modules/pyexpat.c b/Modules/pyexpat.c
index fafb5ce4106111..e131389805bf3d 100644
--- a/Modules/pyexpat.c
+++ b/Modules/pyexpat.c
@@ -1782,7 +1782,12 @@ add_error(PyObject *errors_module, PyObject *codes_dict,
      *       with the other uses of the XML_ErrorString function
      *       elsewhere within this file.  pyexpat's copy of the messages
      *       only acts as a fallback in case of outdated runtime libexpat,
-     *       where it returns NULL. */
+     *       where it returns NULL.
+     *
+     *       In addition, XML_ErrorString is assumed to return UTF-8 encoded
+     *       strings (in conv_string_to_unicode, we decode them using 'strict'
+     *       error handling).
+     */
     const char *error_string = XML_ErrorString(error_code);
     if (error_string == NULL) {
         error_string = error_info_of[error_index].description;
diff --git a/Python/errors.c b/Python/errors.c
index bf1ddfa8cfb4dc..f19c030b1be114 100644
--- a/Python/errors.c
+++ b/Python/errors.c
@@ -299,6 +299,15 @@ PyErr_SetString(PyObject *exception, const char *string)
     _PyErr_SetString(tstate, exception, string);
 }
 
+void
+_PyErr_SetLocaleString(PyObject *exception, const char *string)
+{
+    PyObject *value = PyUnicode_DecodeLocale(string, "surrogateescape");
+    if (value != NULL) {
+        PyErr_SetObject(exception, value);
+        Py_DECREF(value);
+    }
+}
 
 PyObject* _Py_HOT_FUNCTION
 PyErr_Occurred(void)

_______________________________________________
Python-checkins mailing list -- python-checkins@python.org
To unsubscribe send an email to python-checkins-le...@python.org
https://mail.python.org/mailman3/lists/python-checkins.python.org/
Member address: arch...@mail-archive.com

Reply via email to