https://github.com/python/cpython/commit/aee45fd03f23c29aac57ecf747f9f1ee52235e72
commit: aee45fd03f23c29aac57ecf747f9f1ee52235e72
branch: main
author: Bénédikt Tran <10796600+picn...@users.noreply.github.com>
committer: picnixz <10796600+picn...@users.noreply.github.com>
date: 2025-06-08T12:34:57Z
summary:

gh-134531: refactor `_hashlib` logic for handling NIDs and EVP_MDs (#135254)

files:
M Modules/_hashopenssl.c

diff --git a/Modules/_hashopenssl.c b/Modules/_hashopenssl.c
index 42821ebe9f6a54..50cf3c57491049 100644
--- a/Modules/_hashopenssl.c
+++ b/Modules/_hashopenssl.c
@@ -368,41 +368,83 @@ notify_ssl_error_occurred(void)
 }
 /* LCOV_EXCL_STOP */
 
-static const char *
-get_openssl_evp_md_utf8name(const EVP_MD *md)
-{
-    assert(md != NULL);
-    int nid = EVP_MD_nid(md);
-    const char *name = NULL;
-    const py_hashentry_t *h;
+/*
+ * OpenSSL provides a way to go from NIDs to digest names for hash functions
+ * but lacks this granularity for MAC objects where it is not possible to get
+ * the underlying digest name (only the block size and digest size are allowed
+ * to be recovered).
+ *
+ * In addition, OpenSSL aliases pollute the list of known digest names
+ * as OpenSSL appears to have its own definition of alias. In particular,
+ * the resulting list still contains duplicate and alternate names for several
+ * algorithms.
+ *
+ * Therefore, digest names, whether they are used by hash functions or HMAC,
+ * are handled through EVP_MD objects or directly by using some NID.
+ */
 
-    for (h = py_hashes; h->py_name != NULL; h++) {
+/* Get a cached entry by OpenSSL NID. */
+static const py_hashentry_t *
+get_hashentry_by_nid(int nid)
+{
+    for (const py_hashentry_t *h = py_hashes; h->py_name != NULL; h++) {
         if (h->ossl_nid == nid) {
-            name = h->py_name;
-            break;
+            return h;
         }
     }
+    return NULL;
+}
+
+/*
+ * Convert the NID to a string via OBJ_nid2*() functions.
+ *
+ * If 'nid' cannot be resolved, set an exception and return NULL.
+ */
+static const char *
+get_asn1_utf8name_by_nid(int nid)
+{
+    const char *name = OBJ_nid2ln(nid);
     if (name == NULL) {
-        /* Ignore aliased names and only use long, lowercase name. The aliases
-         * pollute the list and OpenSSL appears to have its own definition of
-         * alias as the resulting list still contains duplicate and alternate
-         * names for several algorithms.
-         */
-        name = OBJ_nid2ln(nid);
-        if (name == NULL)
-            name = OBJ_nid2sn(nid);
+        // In OpenSSL 3.0 and later, OBJ_nid*() are thread-safe and may raise.
+        assert(ERR_peek_last_error() != 0);
+        if (ERR_GET_REASON(ERR_peek_last_error()) != OBJ_R_UNKNOWN_NID) {
+            notify_ssl_error_occurred();
+            return NULL;
+        }
+        // fallback to short name and unconditionally propagate errors
+        name = OBJ_nid2sn(nid);
+        if (name == NULL) {
+            raise_ssl_error(PyExc_ValueError, "cannot resolve NID %d", nid);
+        }
     }
     return name;
 }
 
-static PyObject *
-get_openssl_evp_md_name(const EVP_MD *md)
+/*
+ * Convert the NID to an OpenSSL digest name.
+ *
+ * On error, set an exception and return NULL.
+ */
+static const char *
+get_hashlib_utf8name_by_nid(int nid)
+{
+    const py_hashentry_t *e = get_hashentry_by_nid(nid);
+    return e ? e->py_name : get_asn1_utf8name_by_nid(nid);
+}
+
+/* Same as get_hashlib_utf8name_by_nid() but using an EVP_MD object. */
+static const char *
+get_hashlib_utf8name_by_evp_md(const EVP_MD *md)
 {
-    const char *name = get_openssl_evp_md_utf8name(md);
-    return PyUnicode_FromString(name);
+    assert(md != NULL);
+    return get_hashlib_utf8name_by_nid(EVP_MD_nid(md));
 }
 
-/* Get EVP_MD by HID and purpose */
+/*
+ * Get a new reference to an EVP_MD object described by name and purpose.
+ *
+ * If 'name' is an OpenSSL indexed name, the return value is cached.
+ */
 static PY_EVP_MD *
 get_openssl_evp_md_by_utf8name(PyObject *module, const char *name,
                                Py_hash_type py_ht)
@@ -471,42 +513,46 @@ get_openssl_evp_md_by_utf8name(PyObject *module, const 
char *name,
     return digest;
 }
 
-/* Get digest EVP_MD from object
+/*
+ * Raise an exception indicating that 'digestmod' is not supported.
+ */
+static void
+raise_unsupported_digestmod_error(PyObject *module, PyObject *digestmod)
+{
+    _hashlibstate *state = get_hashlib_state(module);
+    PyErr_Format(state->unsupported_digestmod_error,
+                 "Unsupported digestmod %R", digestmod);
+}
+
+/*
+ * Get a new reference to an EVP_MD described by 'digestmod' and purpose.
+ *
+ * On error, set an exception and return NULL.
  *
- * * string
- * * _hashopenssl builtin function
+ * Parameters
  *
- * on error returns NULL with exception set.
+ *      digestmod   A digest name or a _hashopenssl builtin function
+ *      py_ht       The message digest purpose.
  */
 static PY_EVP_MD *
-get_openssl_evp_md(PyObject *module, PyObject *digestmod,
-                   Py_hash_type py_ht)
+get_openssl_evp_md(PyObject *module, PyObject *digestmod, Py_hash_type py_ht)
 {
-    PyObject *name_obj = NULL;
     const char *name;
-
     if (PyUnicode_Check(digestmod)) {
-        name_obj = digestmod;
-    } else {
-        _hashlibstate *state = get_hashlib_state(module);
-        // borrowed ref
-        name_obj = PyDict_GetItemWithError(state->constructs, digestmod);
+        name = PyUnicode_AsUTF8(digestmod);
     }
-    if (name_obj == NULL) {
-        if (!PyErr_Occurred()) {
-            _hashlibstate *state = get_hashlib_state(module);
-            PyErr_Format(
-                state->unsupported_digestmod_error,
-                "Unsupported digestmod %R", digestmod);
-        }
-        return NULL;
+    else {
+        PyObject *dict = get_hashlib_state(module)->constructs;
+        assert(dict != NULL);
+        PyObject *borrowed_ref = PyDict_GetItemWithError(dict, digestmod);
+        name = borrowed_ref == NULL ? NULL : PyUnicode_AsUTF8(borrowed_ref);
     }
-
-    name = PyUnicode_AsUTF8(name_obj);
     if (name == NULL) {
+        if (!PyErr_Occurred()) {
+            raise_unsupported_digestmod_error(module, digestmod);
+        }
         return NULL;
     }
-
     return get_openssl_evp_md_by_utf8name(module, name, py_ht);
 }
 
@@ -745,7 +791,9 @@ _hashlib_HASH_get_name(PyObject *op, void 
*Py_UNUSED(closure))
         notify_ssl_error_occurred();
         return NULL;
     }
-    return get_openssl_evp_md_name(md);
+    const char *name = get_hashlib_utf8name_by_evp_md(md);
+    assert(name != NULL || PyErr_Occurred());
+    return name == NULL ? NULL : PyUnicode_FromString(name);
 }
 
 static PyGetSetDef HASH_getsets[] = {
@@ -1775,20 +1823,15 @@ _hmac_dealloc(PyObject *op)
 static PyObject *
 _hmac_repr(PyObject *op)
 {
+    const char *digest_name;
     HMACobject *self = HMACobject_CAST(op);
     const EVP_MD *md = _hashlib_hmac_get_md(self);
-    if (md == NULL) {
-        return NULL;
-    }
-    PyObject *digest_name = get_openssl_evp_md_name(md);
+    digest_name = md == NULL ? NULL : get_hashlib_utf8name_by_evp_md(md);
     if (digest_name == NULL) {
+        assert(PyErr_Occurred());
         return NULL;
     }
-    PyObject *repr = PyUnicode_FromFormat(
-        "<%U HMAC object @ %p>", digest_name, self
-    );
-    Py_DECREF(digest_name);
-    return repr;
+    return PyUnicode_FromFormat("<%s HMAC object @ %p>", digest_name, self);
 }
 
 /*[clinic input]
@@ -1900,13 +1943,12 @@ _hashlib_hmac_get_name(PyObject *op, void 
*Py_UNUSED(closure))
     if (md == NULL) {
         return NULL;
     }
-    PyObject *digest_name = get_openssl_evp_md_name(md);
+    const char *digest_name = get_hashlib_utf8name_by_evp_md(md);
     if (digest_name == NULL) {
+        assert(PyErr_Occurred());
         return NULL;
     }
-    PyObject *name = PyUnicode_FromFormat("hmac-%U", digest_name);
-    Py_DECREF(digest_name);
-    return name;
+    return PyUnicode_FromFormat("hmac-%s", digest_name);
 }
 
 static PyMethodDef HMAC_methods[] = {
@@ -1982,7 +2024,9 @@ _openssl_hash_name_mapper(const EVP_MD *md, const char 
*from,
         return;
     }
 
-    py_name = get_openssl_evp_md_name(md);
+    const char *name = get_hashlib_utf8name_by_evp_md(md);
+    assert(name != NULL || PyErr_Occurred());
+    py_name = name == NULL ? NULL : PyUnicode_FromString(name);
     if (py_name == NULL) {
         state->error = 1;
     } else {

_______________________________________________
Python-checkins mailing list -- python-checkins@python.org
To unsubscribe send an email to python-checkins-le...@python.org
https://mail.python.org/mailman3//lists/python-checkins.python.org
Member address: arch...@mail-archive.com

Reply via email to