https://github.com/python/cpython/commit/b865871486987e7622a2059981cc8d708f9b04b0
commit: b865871486987e7622a2059981cc8d708f9b04b0
branch: main
author: Bénédikt Tran <10796600+picn...@users.noreply.github.com>
committer: picnixz <10796600+picn...@users.noreply.github.com>
date: 2025-04-07T09:31:17+02:00
summary:

gh-99108: add support for SIMD-accelerated HMAC-BLAKE2 (#132120)

files:
M Modules/hmacmodule.c

diff --git a/Modules/hmacmodule.c b/Modules/hmacmodule.c
index faf4e0a023147e..f75854c6ef5c91 100644
--- a/Modules/hmacmodule.c
+++ b/Modules/hmacmodule.c
@@ -20,6 +20,21 @@
 #include "pycore_hashtable.h"
 #include "pycore_strhex.h"              // _Py_strhex()
 
+/*
+ * Taken from blake2module.c. In the future, detection of SIMD support
+ * should be delegated to https://github.com/python/cpython/pull/125011.
+ */
+#if defined(__x86_64__) && defined(__GNUC__)
+#  include <cpuid.h>
+#elif defined(_M_X64)
+#  include <intrin.h>
+#endif
+
+#if defined(__APPLE__) && defined(__arm64__)
+#  undef HACL_CAN_COMPILE_SIMD128
+#  undef HACL_CAN_COMPILE_SIMD256
+#endif
+
 // Small mismatch between the variable names Python defines as part of 
configure
 // at the ones HACL* expects to be set in order to enable those headers.
 #define HACL_CAN_COMPILE_VEC128 HACL_CAN_COMPILE_SIMD128
@@ -1667,17 +1682,73 @@ hmacmodule_init_strings(hmacmodule_state *state)
 static void
 hmacmodule_init_cpu_features(hmacmodule_state *state)
 {
+    int eax1 = 0, ebx1 = 0, ecx1 = 0, edx1 = 0;
+    int eax7 = 0, ebx7 = 0, ecx7 = 0, edx7 = 0;
+#if defined(__x86_64__) && defined(__GNUC__)
+    __cpuid_count(1, 0, eax1, ebx1, ecx1, edx1);
+    __cpuid_count(7, 0, eax7, ebx7, ecx7, edx7);
+#elif defined(_M_X64)
+    int info1[4] = { 0 };
+    __cpuidex(info1, 1, 0);
+    eax1 = info1[0], ebx1 = info1[1], ecx1 = info1[2], edx1 = info1[3];
+
+    int info7[4] = { 0 };
+    __cpuidex(info7, 7, 0);
+    eax7 = info7[0], ebx7 = info7[1], ecx7 = info7[2], edx7 = info7[3];
+#endif
+    // fmt: off
+    (void)eax1; (void)ebx1; (void)ecx1; (void)edx1;
+    (void)eax7; (void)ebx7; (void)ecx7; (void)edx7;
+    // fmt: on
+
+#define EBX_AVX2 (1 << 5)
+#define ECX_SSE3 (1 << 0)
+#define ECX_SSSE3 (1 << 9)
+#define ECX_SSE4_1 (1 << 19)
+#define ECX_SSE4_2 (1 << 20)
+#define ECX_AVX (1 << 28)
+#define EDX_SSE (1 << 25)
+#define EDX_SSE2 (1 << 26)
+#define EDX_CMOV (1 << 15)
+
+    bool avx = (ecx1 & ECX_AVX) != 0;
+    bool avx2 = (ebx7 & EBX_AVX2) != 0;
+
+    bool sse = (edx1 & EDX_SSE) != 0;
+    bool sse2 = (edx1 & EDX_SSE2) != 0;
+    bool cmov = (edx1 & EDX_CMOV) != 0;
+
+    bool sse3 = (ecx1 & ECX_SSE3) != 0;
+    bool sse41 = (ecx1 & ECX_SSE4_1) != 0;
+    bool sse42 = (ecx1 & ECX_SSE4_2) != 0;
+
+#undef EDX_CMOV
+#undef EDX_SSE2
+#undef EDX_SSE
+#undef ECX_AVX
+#undef ECX_SSE4_2
+#undef ECX_SSE4_1
+#undef ECX_SSSE3
+#undef ECX_SSE3
+#undef EBX_AVX2
+
 #if HACL_CAN_COMPILE_SIMD128
-    // TODO: use py_cpuid_features (gh-125022) to deduce what we want
-    state->can_run_simd128 = false;
+    // TODO(picnixz): use py_cpuid_features (gh-125022) to improve detection
+    state->can_run_simd128 = sse && sse2 && sse3 && sse41 && sse42 && cmov;
 #else
+    // fmt: off
+    (void)sse; (void)sse2; (void)sse3; (void)sse41; (void)sse42; (void)cmov;
+    // fmt: on
     state->can_run_simd128 = false;
 #endif
 
 #if HACL_CAN_COMPILE_SIMD256
-    // TODO: use py_cpuid_features (gh-125022) to deduce what we want
-    state->can_run_simd256 = false;
+    // TODO(picnixz): use py_cpuid_features (gh-125022) to improve detection
+    state->can_run_simd256 = state->can_run_simd128 && avx && avx2;
 #else
+    // fmt: off
+    (void)avx; (void)avx2;
+    // fmt: on
     state->can_run_simd256 = false;
 #endif
 }

_______________________________________________
Python-checkins mailing list -- python-checkins@python.org
To unsubscribe send an email to python-checkins-le...@python.org
https://mail.python.org/mailman3/lists/python-checkins.python.org/
Member address: arch...@mail-archive.com

Reply via email to