https://gcc.gnu.org/bugzilla/show_bug.cgi?id=122319

            Bug ID: 122319
           Summary: gcc does not know _tzcnt_u64 <= 64
           Product: gcc
           Version: 16.0
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: c++
          Assignee: unassigned at gcc dot gnu.org
          Reporter: rockeet at gmail dot com
  Target Milestone: ---

```
#include <stddef.h>
#include <stdint.h>
#include <immintrin.h>
bool avx512cmplt(const unsigned char* x, const unsigned char* y, size_t n) {
    [[assume(n<=64)]];
    // return x < y;
    __mmask64 msk = _bzhi_u64(-1, n);
    __m512i   xxx = _mm512_maskz_loadu_epi8(msk, x);
    __m512i   yyy = _mm512_maskz_loadu_epi8(msk, y);
    __mmask64 neq = _mm512_cmpneq_epi8_mask(xxx, yyy);
    __mmask64 lt  = _mm512_cmplt_epi8_mask(xxx, yyy);
    auto pos = _tzcnt_u64(neq); // pos = 64 when neq is 0
    //[[assume(pos<=64)]]; // gcc does not know this, clang knows
  //return (lt >> pos & 1) != 0; // maybe pos == 64 so this is wrong
    return (_bextr_u64(-1, pos, 1) & lt) != 0;
    //      _bextr_u64(-1, pos, 1) == 0 when pos is 64
}
```

gcc generate code(https://godbolt.org/z/P6rYjGdnb):

avx512cmplt(unsigned char const*, unsigned char const*, unsigned long):
        mov     rcx, -1
        bzhi    rax, rcx, rdx
        kmovq   k1, rax
        vmovdqu8        zmm0{k1}{z}, ZMMWORD PTR [rdi]
        vmovdqu8        zmm1{k1}{z}, ZMMWORD PTR [rsi]
        vpcmpb  k3, zmm0, zmm1, 4
        kmovq   rax, k3
        tzcnt   rax, rax
        movzx   eax, al ; This is redundant if there is no assume
        or      ah, 1
        bextr   rcx, rcx, rax
        kmovq   k2, rcx
        vpcmpb  k0{k2}, zmm0, zmm1, 1
        kortestq        k0, k0
        setne   al
        vzeroupper
        ret

Reply via email to