http://gcc.gnu.org/bugzilla/show_bug.cgi?id=60172

--- Comment #3 from Richard Biener <rguenth at gcc dot gnu.org> ---
I can't really interpret the asm differences but it seems we need more
registers?

Forwprop applies the association transform (those that fold-const.c already
does when presented with large enough GENERIC trees) - it transforms
(p +p off1) +p off2 to (p +p (off1 + off2)), that is, associates the
pointer that is offsetted first and computes the offset using unsigned
integer arithmetic.  That enables the reassociation pass to process
the offset expression and simplifying it (that pass cannot handle a
pointer addition chain).

This happens in forwprop4 only - thus does -fdisable-tree-forwprop4 fix the
regression?

I really can't see a fundamental difference (but the associated adds) in
the resulting code.  So I wonder what RTL transform does / does not trigger
with one of the variants.

On x86_64 the code difference with -O2 [-fno-tree-forwprop4] is

@@ -11,22 +11,25 @@
        .cfi_startproc
        leal    5(%rdx), %r8d
        movslq  %edx, %rdx
+       salq    $2, %rdx
        movslq  %r8d, %rax
        leaq    0(,%rax,4), %r9
-       addq    %r9, %rax
        leaq    (%rdi,%r9), %r10
-       leaq    (%rax,%rax,4), %rax
+       addq    %r9, %rax
        movl    %ecx, (%r10)
        movl    %ecx, 4(%rdi,%r9)
-       leaq    (%rsi,%rax,4), %rax
+       leaq    (%rax,%rax,4), %rcx
        movl    %r8d, 60(%rdi,%r9)
-       leaq    (%rax,%rdx,4), %rax
+       salq    $2, %rcx
+       leaq    (%rdx,%rcx), %rax
+       addq    %rsi, %rax
        addl    $1, 16(%rax)
        movl    %r8d, 20(%rax)
        movl    %r8d, 24(%rax)
-       movl    (%r10), %edx
+       movl    (%r10), %edi
+       leaq    1000(%rsi,%rcx), %rax
        movl    $5, Int_Glob(%rip)
-       movl    %edx, 1020(%rax)
+       movl    %edi, 20(%rdx,%rax)
        ret
        .cfi_endproc

If we look at immediate uses before RTL expansion relevant changes
(single-use -> non-single-use change or vice-versa - enables combine/fwprop)
are

-_32 : --> single use.
+_32 : -->2 uses.
+_16 = _41 + _32;
 _33 = Arr_2_Par_Ref_22(D) + _32;

which happens when associating

   _32 = pretmp_20 + 1000;
   _33 = Arr_2_Par_Ref_22(D) + _32;
   _34 = *_8;
-  _51 = _33 + _41;
+  _16 = _41 + _32;
+  _51 = Arr_2_Par_Ref_22(D) + _16;
   MEM[(int[25] *)_51 + 20B] = _34;

but _33 is dead after the transform.

+_33 : --> no uses

so that's a spurious difference.  Stmts with no uses are not expanded,
but it seems to change what TER does.  Hmm.

-_32 replace with --> _32 = pretmp_20 + 1000;
-

Killing dead stmts with

Index: gcc/tree-outof-ssa.c
===================================================================
--- gcc/tree-outof-ssa.c        (revision 207757)
+++ gcc/tree-outof-ssa.c        (working copy)
@@ -876,6 +876,21 @@ eliminate_useless_phis (void)
            }
        }
     }
+
+  for (unsigned i = 1; i < num_ssa_names; ++i)
+    {
+      tree name = ssa_name (i);
+      if (!name || !has_zero_uses (name) || virtual_operand_p (name))
+       continue;
+      gimple def_stmt = SSA_NAME_DEF_STMT (name);
+      if (!is_gimple_assign (def_stmt)
+         || gimple_has_side_effects (def_stmt)
+         || stmt_could_throw_p (def_stmt))
+       continue;
+      gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
+      gsi_remove (&gsi, true);
+      release_defs (def_stmt);
+    }
 }


fixes that (hack alert).  With that we get strictly more TER.  Does
-fno-tree-ter also make the testcase regress, even with
-fdisable-tree-forwprop4?

Reply via email to