[PATCH] Fix PR68583
Well, not really in the end - the actual testcase would need code hoisting to make the refs equal enough for ifcvt. Making the testcase simpler doesn't seem to capture the issue so for now without one (I'll keep trying). Bootstrapped and tested on x86_64-unknown-linux-gnu. Richard. 2015-12-09 Richard BienerPR tree-optimization/68583 * tree-if-conv.c (ifc_dr): Make flags bool, add w_unconditionally flag and rename predicates to w_predicate, rw_predicate and base_w_predicate. (DR_WRITTEN_AT_LEAST_ONCE): Rename to ... (DR_BASE_W_UNCONDITIONALLY): ... this. (DR_W_UNCONDITIONALLY): Add. (hash_memrefs_baserefs_and_store_DRs_read): Adjust. Compute unconditionally written separately from read or written. (ifcvt_memrefs_wont_trap): Properly treat reads. (ifcvt_could_trap_p): Inline ... (if_convertible_gimple_assign_stmt_p): ... here. Refactor to avoid code duplication. (if_convertible_loop_p_1): Adjust and properly initialize predicates. Index: gcc/tree-if-conv.c === *** gcc/tree-if-conv.c (revision 231396) --- gcc/tree-if-conv.c (working copy) *** if_convertible_phi_p (struct loop *loop, *** 582,601 each DR->aux field. */ struct ifc_dr { ! /* -1 when not initialized, 0 when false, 1 when true. */ ! int written_at_least_once; ! ! /* -1 when not initialized, 0 when false, 1 when true. */ ! int rw_unconditionally; ! ! tree predicate; ! ! tree base_predicate; }; #define IFC_DR(DR) ((struct ifc_dr *) (DR)->aux) ! #define DR_WRITTEN_AT_LEAST_ONCE(DR) (IFC_DR (DR)->written_at_least_once) #define DR_RW_UNCONDITIONALLY(DR) (IFC_DR (DR)->rw_unconditionally) /* Iterates over DR's and stores refs, DR and base refs, DR pairs in HASH tables. While storing them in HASH table, it checks if the --- 582,600 each DR->aux field. */ struct ifc_dr { ! bool rw_unconditionally; ! bool w_unconditionally; ! bool written_at_least_once; ! ! tree rw_predicate; ! tree w_predicate; ! tree base_w_predicate; }; #define IFC_DR(DR) ((struct ifc_dr *) (DR)->aux) ! #define DR_BASE_W_UNCONDITIONALLY(DR) (IFC_DR (DR)->written_at_least_once) #define DR_RW_UNCONDITIONALLY(DR) (IFC_DR (DR)->rw_unconditionally) + #define DR_W_UNCONDITIONALLY(DR) (IFC_DR (DR)->w_unconditionally) /* Iterates over DR's and stores refs, DR and base refs, DR pairs in HASH tables. While storing them in HASH table, it checks if the *** hash_memrefs_baserefs_and_store_DRs_read *** 623,660 ref = TREE_OPERAND (ref, 0); master_dr = _DR_map->get_or_insert (ref, ); - if (!exist1) ! { ! IFC_DR (a)->predicate = ca; ! *master_dr = a; ! } ! else ! IFC_DR (*master_dr)->predicate ! = fold_or_predicates ! (EXPR_LOCATION (IFC_DR (*master_dr)->predicate), !ca, IFC_DR (*master_dr)->predicate); ! if (is_true_predicate (IFC_DR (*master_dr)->predicate)) ! DR_RW_UNCONDITIONALLY (*master_dr) = 1; if (DR_IS_WRITE (a)) { base_master_dr = _DR_map->get_or_insert (base_ref, ); - if (!exist2) ! { ! IFC_DR (a)->base_predicate = ca; ! *base_master_dr = a; ! } ! else ! IFC_DR (*base_master_dr)->base_predicate ! = fold_or_predicates ! (EXPR_LOCATION (IFC_DR (*base_master_dr)->base_predicate), ! ca, IFC_DR (*base_master_dr)->base_predicate); ! ! if (is_true_predicate (IFC_DR (*base_master_dr)->base_predicate)) ! DR_WRITTEN_AT_LEAST_ONCE (*base_master_dr) = 1; } } --- 622,654 ref = TREE_OPERAND (ref, 0); master_dr = _DR_map->get_or_insert (ref, ); if (!exist1) ! *master_dr = a; ! if (DR_IS_WRITE (a)) ! { ! IFC_DR (*master_dr)->w_predicate ! = fold_or_predicates (UNKNOWN_LOCATION, ca, ! IFC_DR (*master_dr)->w_predicate); ! if (is_true_predicate (IFC_DR (*master_dr)->w_predicate)) ! DR_W_UNCONDITIONALLY (*master_dr) = true; ! } ! IFC_DR (*master_dr)->rw_predicate ! = fold_or_predicates (UNKNOWN_LOCATION, ca, ! IFC_DR (*master_dr)->rw_predicate); ! if (is_true_predicate (IFC_DR (*master_dr)->rw_predicate)) ! DR_RW_UNCONDITIONALLY (*master_dr) = true; if (DR_IS_WRITE (a)) { base_master_dr = _DR_map->get_or_insert (base_ref, ); if (!exist2) ! *base_master_dr = a; ! IFC_DR (*base_master_dr)->base_w_predicate ! = fold_or_predicates (UNKNOWN_LOCATION, ca, ! IFC_DR (*base_master_dr)->base_w_predicate); ! if (is_true_predicate (IFC_DR (*base_master_dr)->base_w_predicate)) ! DR_BASE_W_UNCONDITIONALLY (*base_master_dr) = true; } } ***
[PATCH] Fix PR68583 some more
This time with a simplified testcase - the remaining issue with the original one is if-conversion looking at DR_REF and nothing hoisting the address compute of a[i+1] in both arms making it see they are the same (yeah, SCEV to the rescue, but not in this patch). This patch re-organizes things so that we apply if conversion of stores when it is safe (and not only when -ftree-loop-if-convert-stores is given). And when it is not safe (aka store-data-races) we rely on -ftree-loop-if-convert-stores - which I'm going to change in a followup patch to use --param allow-store-data-races like we do for LIM (and incidentially enable for -Ofast). The patch changes the flow throughout if-conversion to remember if we if-converted a store (because that needs to trigger some different code-paths) and re-uses any_mask_load_store for that which also means we apply versioning if we if-convert a store. IMHO a good thing to avoid spurious store-data-races (if requested) in non-vectorized code. if-conversion of stores in scalar code should remain to RTL if-conversion. Bootstrapped on x86_64-unknown-linux-gnu, testing in progress. I've verified that SPEC CPU 2006 still builds and runs properly on a AVX2 machine with flag_tree_loop_if_convert_stores changed to PARAM_VALUE (PARAM_ALLOW_STORE_DATA_RACES) and -Ofast -march=native. AFAICS if-conversion still has some unguarded store data races with respect to the handled-components it strips from the accesses before determining them as "same". So I suppose that strictly speaking we need to split the hash map into two when we don't allow store data races (one for reads doing the stripping and one for stores not doing it). We could also simply keep a flag "all DR_REFs same". That said, I'd like to get rid of the separate if-conversion flag for stores. Richard. 2015-12-09 Richard BienerPR tree-optimization/68583 * tree-if-conv.c (if_convertible_phi_p): Drop flag_tree_loop_if_convert_stores check in favor of the existing any_mask_load_store check. (insert_gimplified_predicates): Likewise. (combine_blocks): Likewise. (tree_if_conversion): Likewise. (ifcvt_memrefs_wont_trap): Properly check flag_tree_loop_if_convert_stores in all places that can end up introducing store-data-races. (if_convertible_gimple_assign_stmt_p): Remove restriction on flag_tree_loop_if_convert_stores for stores we can if-convert without introducing store-data-races. Force versioning for all if-converted stores. * gcc.dg/tree-ssa/ifc-pr68583.c: New testcase. * gcc.dg/vect/vect-72.c: Adjust. * gcc.dg/vect/vect-cselim-2.c: Likewise. * gcc.dg/vect/vect-strided-store-a-u8-i2.c: Likewise. Index: gcc/tree-if-conv.c === *** gcc/tree-if-conv.c (revision 231444) --- gcc/tree-if-conv.c (working copy) *** bb_with_exit_edge_p (struct loop *loop, *** 517,523 PHI is not if-convertible if: - it has more than 2 arguments. !When the flag_tree_loop_if_convert_stores is not set, PHI is not if-convertible if: - a virtual PHI is immediately used in another PHI node, - there is a virtual PHI in a BB other than the loop->header. --- 517,523 PHI is not if-convertible if: - it has more than 2 arguments. !When we didn't see if-convertible stores, PHI is not if-convertible if: - a virtual PHI is immediately used in another PHI node, - there is a virtual PHI in a BB other than the loop->header. *** if_convertible_phi_p (struct loop *loop, *** 545,554 } } ! if (flag_tree_loop_if_convert_stores || any_mask_load_store) return true; ! /* When the flag_tree_loop_if_convert_stores is not set, check that there are no memory writes in the branches of the loop to be if-converted. */ if (virtual_operand_p (gimple_phi_result (phi))) --- 545,554 } } ! if (any_mask_load_store) return true; ! /* When there were no if-convertible stores, check that there are no memory writes in the branches of the loop to be if-converted. */ if (virtual_operand_p (gimple_phi_result (phi))) *** ifcvt_memrefs_wont_trap (gimple *stmt, v *** 713,728 to unconditionally. */ if (base_master_dr && DR_BASE_W_UNCONDITIONALLY (*base_master_dr)) ! return true; else { /* or the base is know to be not readonly. */ tree base_tree = get_base_address (DR_REF (a)); if (DECL_P (base_tree) && decl_binds_to_current_def_p (base_tree) ! && flag_tree_loop_if_convert_stores ! && !TREE_READONLY (base_tree)) ! return true; } } return false; --- 713,727 to unconditionally.
Re: [PATCH] Fix PR68583
On Wed, 9 Dec 2015, Richard Biener wrote: > > Well, not really in the end - the actual testcase would need code > hoisting to make the refs equal enough for ifcvt. Making the testcase > simpler doesn't seem to capture the issue so for now without one > (I'll keep trying). > > Bootstrapped and tested on x86_64-unknown-linux-gnu. This fixed PR68417, I committed the testcase below. Richard. 2015-12-09 Richard BienerPR tree-optimization/68417 * gcc.dg/vect/pr68417.c: New testcase. Index: gcc/testsuite/gcc.dg/vect/pr68417.c === --- gcc/testsuite/gcc.dg/vect/pr68417.c (revision 0) +++ gcc/testsuite/gcc.dg/vect/pr68417.c (working copy) @@ -0,0 +1,32 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target vect_int } */ +/* { dg-require-effective-target vect_condition } */ + +#define N 16 + +typedef struct { + int x; + int y; +} Point; + +void +foo(Point *p1, Point *p2, Point *p3, int *data) +{ + int m, m1, m2; + int i; + + for (i = 0; i < N; i++) { +m = *data++; + +m1 = p1->x - m; +m2 = p2->x + m; + +p3->y = (m1 >= m2) ? p1->y : p2->y; + +p1++; +p2++; +p3++; + } +} + +/* { dg-final { scan-tree-dump "vectorized 1 loops" "vect" } } */