https://gcc.gnu.org/bugzilla/show_bug.cgi?id=108710

            Bug ID: 108710
           Summary: Recognizing "rounding down to the nearest power of
                    two"
           Product: gcc
           Version: 13.0
            Status: UNCONFIRMED
          Severity: enhancement
          Priority: P3
         Component: tree-optimization
          Assignee: unassigned at gcc dot gnu.org
          Reporter: tkoenig at gcc dot gnu.org
  Target Milestone: ---

In the code

#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>

uint64_t foo (uint64_t x)
{
  x = x | (x >> 1);
  x = x | (x >> 2);
  x = x | (x >> 4);
  x = x | (x >> 8);
  x = x | (x >> 16);
  x = x | (x >> 32);
  return x - (x >> 1);
}

uint64_t bar (uint64_t x)
{
  if (x == 0)
    return 0;
  else
    return 1ul << (63 - __builtin_clzl(x));
}

void tst (uint64_t a)
{
  uint64_t r_foo, r_bar;
  r_foo = foo(a);
  r_bar = bar(a);
  printf ("%20lu %20lu %20lu\n", a, r_foo, r_bar);
  if (r_foo != r_bar)
    abort();
}

int main()
{
  tst(0ul);
  for (uint64_t i = 1; i<64; i++) {
    for (uint64_t j = 0; j<i; j++) {
      uint64_t a, b, c;
      b = 1ul << i;
      a = b - (1ul << j);
      c = b + (1ul << j);
      tst(a);
      tst(b);
      tst(c);
    }
  }
  return 0;
}

the nearest power of two, roundnihg down, is calculated, using the two
methods from chapter 3-2 in Hacker's Delight.  The method used in foo,
using only standard C, is used, for example, in the embench benchmkark.

Code for recent trunk is 

foo:
.LFB39:
        .cfi_startproc
        endbr64
        movq    %rdi, %rax
        shrq    %rax
        orq     %rax, %rdi
        movq    %rdi, %rax
        shrq    $2, %rax
        orq     %rdi, %rax
        movq    %rax, %rdi
        shrq    $4, %rdi
        orq     %rdi, %rax
        movq    %rax, %rdi
        shrq    $8, %rdi
        orq     %rax, %rdi
        movq    %rdi, %rax
        shrq    $16, %rax
        orq     %rax, %rdi
        movq    %rdi, %rax
        shrq    $32, %rax
        orq     %rdi, %rax
        movq    %rax, %rdx
        shrq    %rdx
        subq    %rdx, %rax
        ret

(same with -O3 and -Os) and for bar is

bar:
.LFB23:
        .cfi_startproc
        movq    %rdi, %rax
        testq   %rdi, %rdi
        je      .L4
        movabsq $-9223372036854775808, %rax
        bsrq    %rdi, %rcx
        xorq    $63, %rcx
        shrq    %cl, %rax
.L4:
        ret

which is more compact and should have a well-predicted branch.

It would be good for the idiom to be recognized (same as for the
round up to the nearest power of two).  Also, the number of register
moves in the original version could be removed by using more registers,
at least for -Os.

Reply via email to