https://gcc.gnu.org/bugzilla/show_bug.cgi?id=29776

--- Comment #25 from Adam Warner <adam.warner.nz at gmail dot com> ---
Documenting a workaround I've found for the unnecessary sign extension. I'm
still perplexed at the improbability of this appearing to work!

workaround_bsr_sign_extension.c:

#include <stdint.h>

uint64_t bsr_u64(uint64_t a) {
  if (sizeof(unsigned long) == 8) return 63 - __builtin_clzl(a);
  if (sizeof(unsigned long long) == 8) return 63 - __builtin_clzll(a);
}

uint64_t bsr_u64_alt(uint64_t a) {
  if (sizeof(unsigned long) == 8) return UINT64_C(63) - __builtin_clzl(a);
  if (sizeof(unsigned long long) == 8) return UINT64_C(63) -
__builtin_clzll(a);
}

int main(void) {return 0;}

$ gcc -O3 workaround_bsr_sign_extension.c && objdump -d -m i386:x86-64:intel
a.out|less

Relevant output:

0000000000001140 <bsr_u64>:
    1140:       48 0f bd c7             bsr    rax,rdi
    1144:       48 98                   cdqe
    1146:       c3                      ret
    1147:       66 0f 1f 84 00 00 00    nop    WORD PTR [rax+rax*1+0x0]
    114e:       00 00 

0000000000001150 <bsr_u64_alt>:
    1150:       48 0f bd c7             bsr    rax,rdi
    1154:       c3                      ret

In the alternative implementation the superfluous 32- to 64-bit sign extension
instruction CDQE is no longer generated.

Reply via email to