http://gcc.gnu.org/bugzilla/show_bug.cgi?id=50696

             Bug #: 50696
           Summary: [x32] Unnecessary lea
    Classification: Unclassified
           Product: gcc
           Version: 4.7.0
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: target
        AssignedTo: unassig...@gcc.gnu.org
        ReportedBy: hjl.to...@gmail.com
                CC: ubiz...@gmail.com


[hjl@gnu-mic-2 pr50633]$ cat x.i
struct s { int val[16]; };

extern double f (struct s pb, double pc);

int main ()
{
  struct s x;
  int i;

  for (i = 0; i < 16; i++)
    x.val[i] = i + 1;
  if (f (x, 10000.0L) != 10136.0L)
    __builtin_abort ();
  return 0;
}
[hjl@gnu-mic-2 pr50633]$ make x.s
/export/build/gnu/gcc-x32/build-x86_64-linux/gcc/xgcc
-B/export/build/gnu/gcc-x32/build-x86_64-linux/gcc/ -mx32 -O -S x.i
[hjl@gnu-mic-2 pr50633]$ cat x.s
    .file    "x.i"
    .text
    .globl    main
    .type    main, @function
main:
.LFB0:
    .cfi_startproc
    subq    $136, %rsp
    .cfi_def_cfa_offset 144
    movl    $0, %eax
    movl    %esp, %ecx
    addl    $60, %ecx
.L2:
    addl    $1, %eax
    leal    (%rcx,%rax,4), %edx
    movl    %eax, (%edx)
    cmpl    $16, %eax
    jne    .L2
    movq    64(%rsp), %rax
    movq    %rax, (%rsp)
    movq    72(%rsp), %rax
    movq    %rax, 8(%rsp)
    movq    80(%rsp), %rax
    movq    %rax, 16(%rsp)
    movq    88(%rsp), %rax
    movq    %rax, 24(%rsp)
    movq    96(%rsp), %rax
    movq    %rax, 32(%rsp)
    movq    104(%rsp), %rax
    movq    %rax, 40(%rsp)
    movq    112(%rsp), %rax
    movq    %rax, 48(%rsp)
    movq    120(%rsp), %rax
    movq    %rax, 56(%rsp)
    movsd    .LC0(%rip), %xmm0
    call    f
    ucomisd    .LC1(%rip), %xmm0
    jp    .L5
    je    .L7
.L5:
    call    abort
.L7:
    movl    $0, %eax
    addq    $136, %rsp
    .cfi_def_cfa_offset 8
    ret
    .cfi_endproc
.LFE0:
    .size    main, .-main

    leal    (%rcx,%rax,4), %edx
    movl    %eax, (%edx)

can be combined into

       movl    %eax, (%ecx,%eax,4)

[reply] [-] Comment 4 H.J. Lu 2011-10-06 19:19:23 UTC

Combine failed:

(set (mem:SI (and:DI (plus:DI (subreg:DI (mult:SI (reg/v:SI 84 [ i ])
                        (const_int 4 [0x4])) 0)
                (subreg:DI (reg:SI 106) 0)) 
            (const_int 4294967292 [0xfffffffc])) [3 MEM[symbol: x, index:
D.2741_12, step: 4, offset: 4294967292B]+0 S4 A32])
    (reg/v:SI 84 [ i ])) 

for

(insn 37 35 39 3 (set (reg:SI 90)
        (plus:SI (mult:SI (reg/v:SI 84 [ i ])
                (const_int 4 [0x4]))
            (reg:SI 106))) x.i:11 247 {*leasi_2}
     (nil))

(insn 39 37 41 3 (set (mem:SI (zero_extend:DI (reg:SI 90)) [3 MEM[symbol: x,
index: D.2741_12, step: 4, offset: 4294967292B]+0 S4 A32])
        (reg/v:SI 84 [ i ])) x.i:11 64 {*movsi_internal}
     (expr_list:REG_DEAD (reg:SI 90)
        (nil)))

Since address is 32bit aligned, 0xfffffffc is the same as
0xffffffff.  But we don't have this information.

why combine creates:

Failed to match this instruction:
(set (mem:SI (and:DI (plus:DI (subreg:DI (mult:SI (reg/v:SI 85 [ i ])
                        (const_int 4 [0x4])) 0)
                (subreg:DI (reg:SI 106) 0))
            (const_int 4294967292 [0xfffffffc])) [0 MEM[symbol: x, index:
D.2741_1, step: 4, offset: 4294967292B]+0 S4 A32])
    (reg/v:SI 85 [ i ]))

Considering that this is in fact zero-extension, the "optimized" pattern is
worse than sticking subreg to the whole address, i.e.

(and:DI (subreg:DI (plus:SI (mult:SI (reg/v:SI 85 [ i ]) (const_int 4 [0x4]))
                            (reg:SI 106)) 0)
        (const_int 4294967295 [0xffffffff]))

Please note that we have registers in two different modes in the former
pattern. The later pattern would be recognized by i386.c code.

Reply via email to