https://gcc.gnu.org/bugzilla/show_bug.cgi?id=122319
Bug ID: 122319
Summary: gcc does not know _tzcnt_u64 <= 64
Product: gcc
Version: 16.0
Status: UNCONFIRMED
Severity: normal
Priority: P3
Component: c++
Assignee: unassigned at gcc dot gnu.org
Reporter: rockeet at gmail dot com
Target Milestone: ---
```
#include <stddef.h>
#include <stdint.h>
#include <immintrin.h>
bool avx512cmplt(const unsigned char* x, const unsigned char* y, size_t n) {
[[assume(n<=64)]];
// return x < y;
__mmask64 msk = _bzhi_u64(-1, n);
__m512i xxx = _mm512_maskz_loadu_epi8(msk, x);
__m512i yyy = _mm512_maskz_loadu_epi8(msk, y);
__mmask64 neq = _mm512_cmpneq_epi8_mask(xxx, yyy);
__mmask64 lt = _mm512_cmplt_epi8_mask(xxx, yyy);
auto pos = _tzcnt_u64(neq); // pos = 64 when neq is 0
//[[assume(pos<=64)]]; // gcc does not know this, clang knows
//return (lt >> pos & 1) != 0; // maybe pos == 64 so this is wrong
return (_bextr_u64(-1, pos, 1) & lt) != 0;
// _bextr_u64(-1, pos, 1) == 0 when pos is 64
}
```
gcc generate code(https://godbolt.org/z/P6rYjGdnb):
avx512cmplt(unsigned char const*, unsigned char const*, unsigned long):
mov rcx, -1
bzhi rax, rcx, rdx
kmovq k1, rax
vmovdqu8 zmm0{k1}{z}, ZMMWORD PTR [rdi]
vmovdqu8 zmm1{k1}{z}, ZMMWORD PTR [rsi]
vpcmpb k3, zmm0, zmm1, 4
kmovq rax, k3
tzcnt rax, rax
movzx eax, al ; This is redundant if there is no assume
or ah, 1
bextr rcx, rcx, rax
kmovq k2, rcx
vpcmpb k0{k2}, zmm0, zmm1, 1
kortestq k0, k0
setne al
vzeroupper
ret