https://gcc.gnu.org/bugzilla/show_bug.cgi?id=122294

            Bug ID: 122294
           Summary: Suboptimal code for several implementations of
                    memalignment
           Product: gcc
           Version: unknown
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: rtl-optimization
          Assignee: unassigned at gcc dot gnu.org
          Reporter: peter0x44 at disroot dot org
  Target Milestone: ---

For the following code:

#include <cstdint>
#include <stdlib.h>

size_t memalignment1(void *ptr)
{
  return (size_t)ptr & -(size_t)ptr;
}

size_t memalignment2(void *ptr)
{
  if (ptr == NULL)
    return 0;
  return (size_t)ptr & -(size_t)ptr;
}

size_t memalignment3(void *ptr)
{
  uintptr_t addr;
  int trailing_zeros;

  if (ptr == NULL)
    return 0;

  addr = (uintptr_t)ptr;

  trailing_zeros = __builtin_ctzll((unsigned long long)addr);
  return (size_t)1 << trailing_zeros;
}

gcc emits at -Os:

memalignment1(void*):
        mov     rax, rdi
        neg     rax
        and     rax, rdi
        ret
memalignment2(void*):
        mov     rax, rdi
        xor     edx, edx
        neg     rax
        and     rax, rdi
        test    rdi, rdi
        cmove   rax, rdx
        ret
memalignment3(void*):
        bsf     rcx, rdi
        mov     eax, 1
        xor     edx, edx
        sal     rax, cl
        test    rdi, rdi
        cmove   rax, rdx
        ret

Only the oneliner implementation generates optimal code. The rest are bad.

clang generates identical code to gcc for the oneliner, for all 3
implementations.

https://gcc.godbolt.org/z/qKhr5T8aq

Reply via email to