This time with a simplified testcase - the remaining issue with the original one is if-conversion looking at DR_REF and nothing hoisting the address compute of a[i+1] in both arms making it see they are the same (yeah, SCEV to the rescue, but not in this patch).
This patch re-organizes things so that we apply if conversion of stores when it is safe (and not only when -ftree-loop-if-convert-stores is given). And when it is not safe (aka store-data-races) we rely on -ftree-loop-if-convert-stores - which I'm going to change in a followup patch to use --param allow-store-data-races like we do for LIM (and incidentially enable for -Ofast). The patch changes the flow throughout if-conversion to remember if we if-converted a store (because that needs to trigger some different code-paths) and re-uses any_mask_load_store for that which also means we apply versioning if we if-convert a store. IMHO a good thing to avoid spurious store-data-races (if requested) in non-vectorized code. if-conversion of stores in scalar code should remain to RTL if-conversion. Bootstrapped on x86_64-unknown-linux-gnu, testing in progress. I've verified that SPEC CPU 2006 still builds and runs properly on a AVX2 machine with flag_tree_loop_if_convert_stores changed to PARAM_VALUE (PARAM_ALLOW_STORE_DATA_RACES) and -Ofast -march=native. AFAICS if-conversion still has some unguarded store data races with respect to the handled-components it strips from the accesses before determining them as "same". So I suppose that strictly speaking we need to split the hash map into two when we don't allow store data races (one for reads doing the stripping and one for stores not doing it). We could also simply keep a flag "all DR_REFs same". That said, I'd like to get rid of the separate if-conversion flag for stores. Richard. 2015-12-09 Richard Biener <rguent...@suse.de> PR tree-optimization/68583 * tree-if-conv.c (if_convertible_phi_p): Drop flag_tree_loop_if_convert_stores check in favor of the existing any_mask_load_store check. (insert_gimplified_predicates): Likewise. (combine_blocks): Likewise. (tree_if_conversion): Likewise. (ifcvt_memrefs_wont_trap): Properly check flag_tree_loop_if_convert_stores in all places that can end up introducing store-data-races. (if_convertible_gimple_assign_stmt_p): Remove restriction on flag_tree_loop_if_convert_stores for stores we can if-convert without introducing store-data-races. Force versioning for all if-converted stores. * gcc.dg/tree-ssa/ifc-pr68583.c: New testcase. * gcc.dg/vect/vect-72.c: Adjust. * gcc.dg/vect/vect-cselim-2.c: Likewise. * gcc.dg/vect/vect-strided-store-a-u8-i2.c: Likewise. Index: gcc/tree-if-conv.c =================================================================== *** gcc/tree-if-conv.c (revision 231444) --- gcc/tree-if-conv.c (working copy) *************** bb_with_exit_edge_p (struct loop *loop, *** 517,523 **** PHI is not if-convertible if: - it has more than 2 arguments. ! When the flag_tree_loop_if_convert_stores is not set, PHI is not if-convertible if: - a virtual PHI is immediately used in another PHI node, - there is a virtual PHI in a BB other than the loop->header. --- 517,523 ---- PHI is not if-convertible if: - it has more than 2 arguments. ! When we didn't see if-convertible stores, PHI is not if-convertible if: - a virtual PHI is immediately used in another PHI node, - there is a virtual PHI in a BB other than the loop->header. *************** if_convertible_phi_p (struct loop *loop, *** 545,554 **** } } ! if (flag_tree_loop_if_convert_stores || any_mask_load_store) return true; ! /* When the flag_tree_loop_if_convert_stores is not set, check that there are no memory writes in the branches of the loop to be if-converted. */ if (virtual_operand_p (gimple_phi_result (phi))) --- 545,554 ---- } } ! if (any_mask_load_store) return true; ! /* When there were no if-convertible stores, check that there are no memory writes in the branches of the loop to be if-converted. */ if (virtual_operand_p (gimple_phi_result (phi))) *************** ifcvt_memrefs_wont_trap (gimple *stmt, v *** 713,728 **** to unconditionally. */ if (base_master_dr && DR_BASE_W_UNCONDITIONALLY (*base_master_dr)) ! return true; else { /* or the base is know to be not readonly. */ tree base_tree = get_base_address (DR_REF (a)); if (DECL_P (base_tree) && decl_binds_to_current_def_p (base_tree) ! && flag_tree_loop_if_convert_stores ! && !TREE_READONLY (base_tree)) ! return true; } } return false; --- 713,727 ---- to unconditionally. */ if (base_master_dr && DR_BASE_W_UNCONDITIONALLY (*base_master_dr)) ! return flag_tree_loop_if_convert_stores; else { /* or the base is know to be not readonly. */ tree base_tree = get_base_address (DR_REF (a)); if (DECL_P (base_tree) && decl_binds_to_current_def_p (base_tree) ! && ! TREE_READONLY (base_tree)) ! return flag_tree_loop_if_convert_stores; } } return false; *************** if_convertible_gimple_assign_stmt_p (gim *** 791,797 **** bool *any_mask_load_store) { tree lhs = gimple_assign_lhs (stmt); - basic_block bb; if (dump_file && (dump_flags & TDF_DETAILS)) { --- 790,795 ---- *************** if_convertible_gimple_assign_stmt_p (gim *** 835,862 **** return false; } ! if (flag_tree_loop_if_convert_stores) ! return true; ! ! bb = gimple_bb (stmt); ! ! if (TREE_CODE (lhs) != SSA_NAME ! && bb != bb->loop_father->header ! && !bb_with_exit_edge_p (bb->loop_father, bb)) ! { ! if (ifcvt_can_use_mask_load_store (stmt)) ! { ! gimple_set_plf (stmt, GF_PLF_2, true); ! *any_mask_load_store = true; ! return true; ! } ! if (dump_file && (dump_flags & TDF_DETAILS)) ! { ! fprintf (dump_file, "LHS is not var\n"); ! print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM); ! } ! return false; ! } return true; } --- 833,842 ---- return false; } ! /* When if-converting stores force versioning, likewise if we ! ended up generating store data races. */ ! if (gimple_vdef (stmt)) ! *any_mask_load_store = true; return true; } *************** insert_gimplified_predicates (loop_p loo *** 1851,1858 **** stmts = bb_predicate_gimplified_stmts (bb); if (stmts) { ! if (flag_tree_loop_if_convert_stores ! || any_mask_load_store) { /* Insert the predicate of the BB just after the label, as the if-conversion of memory writes will use this --- 1831,1837 ---- stmts = bb_predicate_gimplified_stmts (bb); if (stmts) { ! if (any_mask_load_store) { /* Insert the predicate of the BB just after the label, as the if-conversion of memory writes will use this *************** combine_blocks (struct loop *loop, bool *** 2174,2180 **** insert_gimplified_predicates (loop, any_mask_load_store); predicate_all_scalar_phis (loop); ! if (flag_tree_loop_if_convert_stores || any_mask_load_store) predicate_mem_writes (loop); /* Merge basic blocks: first remove all the edges in the loop, --- 2153,2159 ---- insert_gimplified_predicates (loop, any_mask_load_store); predicate_all_scalar_phis (loop); ! if (any_mask_load_store) predicate_mem_writes (loop); /* Merge basic blocks: first remove all the edges in the loop, *************** tree_if_conversion (struct loop *loop) *** 2691,2697 **** } todo |= TODO_cleanup_cfg; ! if (flag_tree_loop_if_convert_stores || any_mask_load_store) { mark_virtual_operands_for_renaming (cfun); todo |= TODO_update_ssa_only_virtuals; --- 2670,2676 ---- } todo |= TODO_cleanup_cfg; ! if (any_mask_load_store) { mark_virtual_operands_for_renaming (cfun); todo |= TODO_update_ssa_only_virtuals; Index: gcc/testsuite/gcc.dg/tree-ssa/ifc-pr68583.c =================================================================== *** gcc/testsuite/gcc.dg/tree-ssa/ifc-pr68583.c (revision 0) --- gcc/testsuite/gcc.dg/tree-ssa/ifc-pr68583.c (working copy) *************** *** 0 **** --- 1,23 ---- + /* { dg-do compile } */ + /* { dg-options "-O3 -fdump-tree-ifcvt" } */ + + void foo (long *a) + { + int i; + for (i = 0; i < 100; i+=2) + { + long *p = &a[i+1]; + if (a[i] == 0) + { + *p = 2; + a[i] = 3; + } + else + { + *p = 3; + a[i] = 4; + } + } + } + + /* { dg-final { scan-tree-dump "Applying if-conversion" "ifcvt" } } */ Index: gcc/testsuite/gcc.dg/vect/vect-72.c =================================================================== --- gcc/testsuite/gcc.dg/vect/vect-72.c (revision 231444) +++ gcc/testsuite/gcc.dg/vect/vect-72.c (working copy) @@ -19,9 +19,10 @@ int main1 () for (i=0; i < N+1; i++) { ib[i] = i; - /* Avoid vectorization. */ if (i%3 == 0) ib[i] = 5; + /* Avoid vectorization. */ + __asm__ volatile ("" : : : "memory"); } for (i = 1; i < N+1; i++) Index: gcc/testsuite/gcc.dg/vect/vect-cselim-2.c =================================================================== --- gcc/testsuite/gcc.dg/vect/vect-cselim-2.c (revision 231444) +++ gcc/testsuite/gcc.dg/vect/vect-cselim-2.c (working copy) @@ -1,4 +1,7 @@ /* { dg-require-effective-target vect_int } */ +/* We now if-convert the loop unconditonally as the memory locations + are always stored to. */ +/* { dg-additional-options "-fno-tree-loop-if-convert" } */ #include <stdarg.h> #include "tree-vect.h" Index: gcc/testsuite/gcc.dg/vect/vect-strided-store-a-u8-i2.c =================================================================== --- gcc/testsuite/gcc.dg/vect/vect-strided-store-a-u8-i2.c (revision 231444) +++ gcc/testsuite/gcc.dg/vect/vect-strided-store-a-u8-i2.c (working copy) @@ -26,6 +26,8 @@ main1 () b[i] = i * 2; if (i%3 == 0) a[i] = 10; + /* Prevent vectorization. */ + __asm__ volatile ("" : : : "memory"); } for (i = 0; i < N; i++)