[Bug rtl-optimization/86627] New: Inefficient division of 128-bit ints by small constant integers

tkoenig at gcc dot gnu.org Sun, 22 Jul 2018 03:20:02 -0700

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=86627


            Bug ID: 86627
           Summary: Inefficient division of 128-bit ints by small constant
                    integers
           Product: gcc
           Version: 9.0
            Status: UNCONFIRMED
          Severity: enhancement
          Priority: P3
         Component: rtl-optimization
          Assignee: unassigned at gcc dot gnu.org
          Reporter: tkoenig at gcc dot gnu.org
  Target Milestone: ---

For division by small integers which fit into registers,
gcc uses quite a few tricks. For 128-bit integers, a function is
called instead, which takes much longer. Even a division by two
uses a library function.

Test case:

long half_64 (long a)
{
  return a/2;
}

long onethird_64 (long a)
{
  return a/3;
}

__int128_t half_128 (__int128_t a)
{
  return a/2;
}

__int128_t onethird_128 (__int128_t a)
{
  return a/3;
}

This gets translated on x86_64-pc-linux-gnu into

        .file   "halb.c"
        .text
        .p2align 4
        .globl  half_64
        .type   half_64, @function
half_64:
.LFB0:
        .cfi_startproc
        movq    %rdi, %rax
        shrq    $63, %rax
        addq    %rdi, %rax
        sarq    %rax
        ret
        .cfi_endproc
.LFE0:
        .size   half_64, .-half_64
        .p2align 4
        .globl  onethird_64
        .type   onethird_64, @function
onethird_64:
.LFB1:
        .cfi_startproc
        movabsq $6148914691236517206, %rdx
        movq    %rdi, %rax
        sarq    $63, %rdi
        imulq   %rdx
        movq    %rdx, %rax
        subq    %rdi, %rax
        ret
        .cfi_endproc
.LFE1:
        .size   onethird_64, .-onethird_64
        .globl  __divti3
        .p2align 4
        .globl  half_128
        .type   half_128, @function
half_128:
.LFB2:
        .cfi_startproc
        subq    $8, %rsp
        .cfi_def_cfa_offset 16
        movl    $2, %edx
        xorl    %ecx, %ecx
        call    __divti3
        addq    $8, %rsp
        .cfi_def_cfa_offset 8
        ret
        .cfi_endproc
.LFE2:
        .size   half_128, .-half_128
        .p2align 4
        .globl  onethird_128
        .type   onethird_128, @function
onethird_128:
.LFB3:
        .cfi_startproc
        subq    $8, %rsp
        .cfi_def_cfa_offset 16
        movl    $3, %edx
        xorl    %ecx, %ecx
        call    __divti3
        addq    $8, %rsp
        .cfi_def_cfa_offset 8
        ret
        .cfi_endproc
.LFE3:
        .size   onethird_128, .-onethird_128
        .ident  "GCC: (GNU) 9.0.0 20180711 (experimental)"
        .section        .note.GNU-stack,"",@progbits

[Bug rtl-optimization/86627] New: Inefficient division of 128-bit ints by small constant integers

Reply via email to