This time with a simplified testcase - the remaining issue with
the original one is if-conversion looking at DR_REF and nothing
hoisting the address compute of a[i+1] in both arms making it
see they are the same (yeah, SCEV to the rescue, but not in this
patch).

This patch re-organizes things so that we apply if conversion
of stores when it is safe (and not only when -ftree-loop-if-convert-stores
is given).  And when it is not safe (aka store-data-races) we rely
on -ftree-loop-if-convert-stores - which I'm going to change in a followup
patch to use --param allow-store-data-races like we do for LIM
(and incidentially enable for -Ofast).

The patch changes the flow throughout if-conversion to remember
if we if-converted a store (because that needs to trigger some
different code-paths) and re-uses any_mask_load_store for that
which also means we apply versioning if we if-convert a store.
IMHO a good thing to avoid spurious store-data-races (if requested)
in non-vectorized code.  if-conversion of stores in scalar
code should remain to RTL if-conversion.

Bootstrapped on x86_64-unknown-linux-gnu, testing in progress.

I've verified that SPEC CPU 2006 still builds and runs properly
on a AVX2 machine with flag_tree_loop_if_convert_stores changed
to PARAM_VALUE (PARAM_ALLOW_STORE_DATA_RACES) and -Ofast -march=native.

AFAICS if-conversion still has some unguarded store data races
with respect to the handled-components it strips from the
accesses before determining them as "same".  So I suppose
that strictly speaking we need to split the hash map into
two when we don't allow store data races (one for reads
doing the stripping and one for stores not doing it).  We could
also simply keep a flag "all DR_REFs same".

That said, I'd like to get rid of the separate if-conversion flag
for stores.

Richard.

2015-12-09  Richard Biener  <rguent...@suse.de>

        PR tree-optimization/68583
        * tree-if-conv.c (if_convertible_phi_p): Drop
        flag_tree_loop_if_convert_stores check in favor of the
        existing any_mask_load_store check.
        (insert_gimplified_predicates): Likewise.
        (combine_blocks): Likewise.
        (tree_if_conversion): Likewise.
        (ifcvt_memrefs_wont_trap): Properly check
        flag_tree_loop_if_convert_stores in all places that can end
        up introducing store-data-races.
        (if_convertible_gimple_assign_stmt_p): Remove restriction
        on flag_tree_loop_if_convert_stores for stores we can if-convert
        without introducing store-data-races.  Force versioning for
        all if-converted stores.

        * gcc.dg/tree-ssa/ifc-pr68583.c: New testcase.
        * gcc.dg/vect/vect-72.c: Adjust.
        * gcc.dg/vect/vect-cselim-2.c: Likewise.
        * gcc.dg/vect/vect-strided-store-a-u8-i2.c: Likewise.

Index: gcc/tree-if-conv.c
===================================================================
*** gcc/tree-if-conv.c  (revision 231444)
--- gcc/tree-if-conv.c  (working copy)
*************** bb_with_exit_edge_p (struct loop *loop,
*** 517,523 ****
     PHI is not if-convertible if:
     - it has more than 2 arguments.
  
!    When the flag_tree_loop_if_convert_stores is not set, PHI is not
     if-convertible if:
     - a virtual PHI is immediately used in another PHI node,
     - there is a virtual PHI in a BB other than the loop->header.
--- 517,523 ----
     PHI is not if-convertible if:
     - it has more than 2 arguments.
  
!    When we didn't see if-convertible stores, PHI is not
     if-convertible if:
     - a virtual PHI is immediately used in another PHI node,
     - there is a virtual PHI in a BB other than the loop->header.
*************** if_convertible_phi_p (struct loop *loop,
*** 545,554 ****
          }
      }
  
!   if (flag_tree_loop_if_convert_stores || any_mask_load_store)
      return true;
  
!   /* When the flag_tree_loop_if_convert_stores is not set, check
       that there are no memory writes in the branches of the loop to be
       if-converted.  */
    if (virtual_operand_p (gimple_phi_result (phi)))
--- 545,554 ----
          }
      }
  
!   if (any_mask_load_store)
      return true;
  
!   /* When there were no if-convertible stores, check
       that there are no memory writes in the branches of the loop to be
       if-converted.  */
    if (virtual_operand_p (gimple_phi_result (phi)))
*************** ifcvt_memrefs_wont_trap (gimple *stmt, v
*** 713,728 ****
           to unconditionally.  */
        if (base_master_dr
          && DR_BASE_W_UNCONDITIONALLY (*base_master_dr))
!       return true;
        else
        {
          /* or the base is know to be not readonly.  */
          tree base_tree = get_base_address (DR_REF (a));
          if (DECL_P (base_tree)
              && decl_binds_to_current_def_p (base_tree)
!             && flag_tree_loop_if_convert_stores
!             && !TREE_READONLY (base_tree))
!           return true;
        }
      }
    return false;
--- 713,727 ----
           to unconditionally.  */
        if (base_master_dr
          && DR_BASE_W_UNCONDITIONALLY (*base_master_dr))
!       return flag_tree_loop_if_convert_stores;
        else
        {
          /* or the base is know to be not readonly.  */
          tree base_tree = get_base_address (DR_REF (a));
          if (DECL_P (base_tree)
              && decl_binds_to_current_def_p (base_tree)
!             && ! TREE_READONLY (base_tree))
!           return flag_tree_loop_if_convert_stores;
        }
      }
    return false;
*************** if_convertible_gimple_assign_stmt_p (gim
*** 791,797 ****
                                     bool *any_mask_load_store)
  {
    tree lhs = gimple_assign_lhs (stmt);
-   basic_block bb;
  
    if (dump_file && (dump_flags & TDF_DETAILS))
      {
--- 790,795 ----
*************** if_convertible_gimple_assign_stmt_p (gim
*** 835,862 ****
        return false;
      }
  
!   if (flag_tree_loop_if_convert_stores)
!     return true;
! 
!   bb = gimple_bb (stmt);
! 
!   if (TREE_CODE (lhs) != SSA_NAME
!       && bb != bb->loop_father->header
!       && !bb_with_exit_edge_p (bb->loop_father, bb))
!     {
!       if (ifcvt_can_use_mask_load_store (stmt))
!       {
!         gimple_set_plf (stmt, GF_PLF_2, true);
!         *any_mask_load_store = true;
!         return true;
!       }
!       if (dump_file && (dump_flags & TDF_DETAILS))
!       {
!         fprintf (dump_file, "LHS is not var\n");
!         print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
!       }
!       return false;
!     }
  
    return true;
  }
--- 833,842 ----
        return false;
      }
  
!   /* When if-converting stores force versioning, likewise if we
!      ended up generating store data races.  */
!   if (gimple_vdef (stmt))
!     *any_mask_load_store = true;
  
    return true;
  }
*************** insert_gimplified_predicates (loop_p loo
*** 1851,1858 ****
        stmts = bb_predicate_gimplified_stmts (bb);
        if (stmts)
        {
!         if (flag_tree_loop_if_convert_stores
!             || any_mask_load_store)
            {
              /* Insert the predicate of the BB just after the label,
                 as the if-conversion of memory writes will use this
--- 1831,1837 ----
        stmts = bb_predicate_gimplified_stmts (bb);
        if (stmts)
        {
!         if (any_mask_load_store)
            {
              /* Insert the predicate of the BB just after the label,
                 as the if-conversion of memory writes will use this
*************** combine_blocks (struct loop *loop, bool
*** 2174,2180 ****
    insert_gimplified_predicates (loop, any_mask_load_store);
    predicate_all_scalar_phis (loop);
  
!   if (flag_tree_loop_if_convert_stores || any_mask_load_store)
      predicate_mem_writes (loop);
  
    /* Merge basic blocks: first remove all the edges in the loop,
--- 2153,2159 ----
    insert_gimplified_predicates (loop, any_mask_load_store);
    predicate_all_scalar_phis (loop);
  
!   if (any_mask_load_store)
      predicate_mem_writes (loop);
  
    /* Merge basic blocks: first remove all the edges in the loop,
*************** tree_if_conversion (struct loop *loop)
*** 2691,2697 ****
      }
  
    todo |= TODO_cleanup_cfg;
!   if (flag_tree_loop_if_convert_stores || any_mask_load_store)
      {
        mark_virtual_operands_for_renaming (cfun);
        todo |= TODO_update_ssa_only_virtuals;
--- 2670,2676 ----
      }
  
    todo |= TODO_cleanup_cfg;
!   if (any_mask_load_store)
      {
        mark_virtual_operands_for_renaming (cfun);
        todo |= TODO_update_ssa_only_virtuals;
Index: gcc/testsuite/gcc.dg/tree-ssa/ifc-pr68583.c
===================================================================
*** gcc/testsuite/gcc.dg/tree-ssa/ifc-pr68583.c (revision 0)
--- gcc/testsuite/gcc.dg/tree-ssa/ifc-pr68583.c (working copy)
***************
*** 0 ****
--- 1,23 ----
+ /* { dg-do compile } */
+ /* { dg-options "-O3 -fdump-tree-ifcvt" } */
+ 
+ void foo (long *a)
+ {
+   int i;
+   for (i = 0; i < 100; i+=2)
+     {
+       long *p = &a[i+1];
+       if (a[i] == 0)
+       {
+         *p = 2;
+         a[i] = 3;
+       }
+       else
+       {
+         *p = 3;
+         a[i] = 4;
+       }
+     }
+ }
+ 
+ /* { dg-final { scan-tree-dump "Applying if-conversion" "ifcvt" } } */
Index: gcc/testsuite/gcc.dg/vect/vect-72.c
===================================================================
--- gcc/testsuite/gcc.dg/vect/vect-72.c (revision 231444)
+++ gcc/testsuite/gcc.dg/vect/vect-72.c (working copy)
@@ -19,9 +19,10 @@ int main1 ()
   for (i=0; i < N+1; i++)
     {
       ib[i] = i;
-      /* Avoid vectorization.  */
       if (i%3 == 0)
         ib[i] = 5;
+      /* Avoid vectorization.  */
+      __asm__ volatile ("" : : : "memory");
     }
 
   for (i = 1; i < N+1; i++)
Index: gcc/testsuite/gcc.dg/vect/vect-cselim-2.c
===================================================================
--- gcc/testsuite/gcc.dg/vect/vect-cselim-2.c   (revision 231444)
+++ gcc/testsuite/gcc.dg/vect/vect-cselim-2.c   (working copy)
@@ -1,4 +1,7 @@
 /* { dg-require-effective-target vect_int } */
+/* We now if-convert the loop unconditonally as the memory locations
+   are always stored to.  */
+/* { dg-additional-options "-fno-tree-loop-if-convert" } */
 
 #include <stdarg.h>
 #include "tree-vect.h"
Index: gcc/testsuite/gcc.dg/vect/vect-strided-store-a-u8-i2.c
===================================================================
--- gcc/testsuite/gcc.dg/vect/vect-strided-store-a-u8-i2.c      (revision 
231444)
+++ gcc/testsuite/gcc.dg/vect/vect-strided-store-a-u8-i2.c      (working copy)
@@ -26,6 +26,8 @@ main1 ()
       b[i] = i * 2;
       if (i%3 == 0)
         a[i] = 10; 
+      /* Prevent vectorization.  */
+      __asm__ volatile ("" : : : "memory");
     }
 
   for (i = 0; i < N; i++)

Reply via email to