* tests/bench-slope.c (vone): New. (auto_ghz_bench): Remove memory barrier usage; Generate constant values from volatile variable. --
Alderlake P-core is able of fuse "immediate to register addition" instructions with other instructions. This feature broke auto-GHZ detection in bench-slope. Patch forces generation of "register to register addition" by generating constant values from volatile source, which prevents compiler from known the values in constant value variables. Signed-off-by: Jussi Kivilinna <jussi.kivili...@iki.fi> --- tests/bench-slope.c | 26 +++++++++++--------------- 1 file changed, 11 insertions(+), 15 deletions(-) diff --git a/tests/bench-slope.c b/tests/bench-slope.c index b8818aed..71bc97d0 100644 --- a/tests/bench-slope.c +++ b/tests/bench-slope.c @@ -515,6 +515,8 @@ err_free: /********************************************* CPU frequency auto-detection. */ +static volatile size_t vone = 1; + static int auto_ghz_init (struct bench_obj *obj) { @@ -535,6 +537,9 @@ auto_ghz_free (struct bench_obj *obj) static void auto_ghz_bench (struct bench_obj *obj, void *buf, size_t buflen) { + size_t one = vone; + size_t two = one + vone; + (void)obj; (void)buf; @@ -544,21 +549,12 @@ auto_ghz_bench (struct bench_obj *obj, void *buf, size_t buflen) * function will give cycles/iteration result 1024.0 on high-end CPUs. * With turbo, result will be less and can be used detect turbo-clock. */ -#ifdef HAVE_GCC_ASM_VOLATILE_MEMORY - /* Auto-ghz operation takes two CPU cycles to perform. Memory barriers - * are used to prevent compiler from optimizing this loop away. */ - #define AUTO_GHZ_OPERATION \ - asm volatile ("":"+r"(buflen)::"memory"); \ - buflen ^= 1; \ - asm volatile ("":"+r"(buflen)::"memory"); \ - buflen -= 2 -#else - /* TODO: Needs alternative way of preventing compiler optimizations. - * Mix of XOR and subtraction appears to do the trick for now. */ - #define AUTO_GHZ_OPERATION \ - buflen ^= 1; \ - buflen -= 2 -#endif + /* Auto-ghz operation takes two CPU cycles to perform. Variables are + * generated through volatile object and therefore compiler is unable + * to optimize these operations away. */ +#define AUTO_GHZ_OPERATION \ + buflen ^= one; \ + buflen -= two #define AUTO_GHZ_OPERATION_2 \ AUTO_GHZ_OPERATION; \ -- 2.40.1 _______________________________________________ Gcrypt-devel mailing list Gcrypt-devel@gnupg.org https://lists.gnupg.org/mailman/listinfo/gcrypt-devel