https://gcc.gnu.org/bugzilla/show_bug.cgi?id=122074

            Bug ID: 122074
           Summary: Wrong code for avx512 intrinsic
           Product: gcc
           Version: 16.0
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: c++
          Assignee: unassigned at gcc dot gnu.org
          Reporter: rockeet at gmail dot com
  Target Milestone: ---

#include <immintrin.h>
using byte_t = unsigned char;
size_t avx512_search_byte_max32(const byte_t* data, size_t len, byte_t key) {
        return _tzcnt_u32(_mm256_mask_cmpge_epi8_mask(_bzhi_u32(-1, len),
                            *(__m256i_u*)data, _mm256_set1_epi8(key)));
}
see https://godbolt.org/z/Ef157Pj3n , gcc generate:

"avx512_search_byte_max32(unsigned char const*, unsigned long, unsigned char)":
        vmovdqu ymm1, YMMWORD PTR [rdi]
        vpbroadcastb    ymm0, edx
        mov     eax, -1
        bzhi    eax, eax, esi
        kmovd   k1, eax
        vpcmpb  k0{k1}, ymm1, ymm0, 5
        kmovd   eax, k0
        tzcnt   eax, eax
        mov     eax, eax
        vzeroupper
        ret

intel icc generate:

avx512_search_byte_max32(unsigned char const*, unsigned long, unsigned char):
..B3.1:                         # Preds ..B3.0
        mov       eax, -1                                       #20.20
        vpbroadcastb ymm0, edx                                  #20.20
        bzhi      ecx, eax, esi                                 #20.20
        kmovd     k1, ecx                                       #20.20
        vpcmpb    k0{k1}, ymm0, YMMWORD PTR [rdi], 2            #20.20
        kmovd     r8d, k0                                       #20.20
        tzcnt     eax, r8d                                      #20.9
        vzeroupper                                              #20.9
        ret                                                     #20.9
The issue is: if [data, data+32) spans page boundary and bytes corresponding to
0 in mask is invalid address(such as segfault/bus error), code generated by gcc
will coredump.

Reply via email to