https://github.com/python/cpython/commit/b865871486987e7622a2059981cc8d708f9b04b0 commit: b865871486987e7622a2059981cc8d708f9b04b0 branch: main author: Bénédikt Tran <10796600+picn...@users.noreply.github.com> committer: picnixz <10796600+picn...@users.noreply.github.com> date: 2025-04-07T09:31:17+02:00 summary:
gh-99108: add support for SIMD-accelerated HMAC-BLAKE2 (#132120) files: M Modules/hmacmodule.c diff --git a/Modules/hmacmodule.c b/Modules/hmacmodule.c index faf4e0a023147e..f75854c6ef5c91 100644 --- a/Modules/hmacmodule.c +++ b/Modules/hmacmodule.c @@ -20,6 +20,21 @@ #include "pycore_hashtable.h" #include "pycore_strhex.h" // _Py_strhex() +/* + * Taken from blake2module.c. In the future, detection of SIMD support + * should be delegated to https://github.com/python/cpython/pull/125011. + */ +#if defined(__x86_64__) && defined(__GNUC__) +# include <cpuid.h> +#elif defined(_M_X64) +# include <intrin.h> +#endif + +#if defined(__APPLE__) && defined(__arm64__) +# undef HACL_CAN_COMPILE_SIMD128 +# undef HACL_CAN_COMPILE_SIMD256 +#endif + // Small mismatch between the variable names Python defines as part of configure // at the ones HACL* expects to be set in order to enable those headers. #define HACL_CAN_COMPILE_VEC128 HACL_CAN_COMPILE_SIMD128 @@ -1667,17 +1682,73 @@ hmacmodule_init_strings(hmacmodule_state *state) static void hmacmodule_init_cpu_features(hmacmodule_state *state) { + int eax1 = 0, ebx1 = 0, ecx1 = 0, edx1 = 0; + int eax7 = 0, ebx7 = 0, ecx7 = 0, edx7 = 0; +#if defined(__x86_64__) && defined(__GNUC__) + __cpuid_count(1, 0, eax1, ebx1, ecx1, edx1); + __cpuid_count(7, 0, eax7, ebx7, ecx7, edx7); +#elif defined(_M_X64) + int info1[4] = { 0 }; + __cpuidex(info1, 1, 0); + eax1 = info1[0], ebx1 = info1[1], ecx1 = info1[2], edx1 = info1[3]; + + int info7[4] = { 0 }; + __cpuidex(info7, 7, 0); + eax7 = info7[0], ebx7 = info7[1], ecx7 = info7[2], edx7 = info7[3]; +#endif + // fmt: off + (void)eax1; (void)ebx1; (void)ecx1; (void)edx1; + (void)eax7; (void)ebx7; (void)ecx7; (void)edx7; + // fmt: on + +#define EBX_AVX2 (1 << 5) +#define ECX_SSE3 (1 << 0) +#define ECX_SSSE3 (1 << 9) +#define ECX_SSE4_1 (1 << 19) +#define ECX_SSE4_2 (1 << 20) +#define ECX_AVX (1 << 28) +#define EDX_SSE (1 << 25) +#define EDX_SSE2 (1 << 26) +#define EDX_CMOV (1 << 15) + + bool avx = (ecx1 & ECX_AVX) != 0; + bool avx2 = (ebx7 & EBX_AVX2) != 0; + + bool sse = (edx1 & EDX_SSE) != 0; + bool sse2 = (edx1 & EDX_SSE2) != 0; + bool cmov = (edx1 & EDX_CMOV) != 0; + + bool sse3 = (ecx1 & ECX_SSE3) != 0; + bool sse41 = (ecx1 & ECX_SSE4_1) != 0; + bool sse42 = (ecx1 & ECX_SSE4_2) != 0; + +#undef EDX_CMOV +#undef EDX_SSE2 +#undef EDX_SSE +#undef ECX_AVX +#undef ECX_SSE4_2 +#undef ECX_SSE4_1 +#undef ECX_SSSE3 +#undef ECX_SSE3 +#undef EBX_AVX2 + #if HACL_CAN_COMPILE_SIMD128 - // TODO: use py_cpuid_features (gh-125022) to deduce what we want - state->can_run_simd128 = false; + // TODO(picnixz): use py_cpuid_features (gh-125022) to improve detection + state->can_run_simd128 = sse && sse2 && sse3 && sse41 && sse42 && cmov; #else + // fmt: off + (void)sse; (void)sse2; (void)sse3; (void)sse41; (void)sse42; (void)cmov; + // fmt: on state->can_run_simd128 = false; #endif #if HACL_CAN_COMPILE_SIMD256 - // TODO: use py_cpuid_features (gh-125022) to deduce what we want - state->can_run_simd256 = false; + // TODO(picnixz): use py_cpuid_features (gh-125022) to improve detection + state->can_run_simd256 = state->can_run_simd128 && avx && avx2; #else + // fmt: off + (void)avx; (void)avx2; + // fmt: on state->can_run_simd256 = false; #endif } _______________________________________________ Python-checkins mailing list -- python-checkins@python.org To unsubscribe send an email to python-checkins-le...@python.org https://mail.python.org/mailman3/lists/python-checkins.python.org/ Member address: arch...@mail-archive.com