Hi, One issue revealed in tree ifcvt is the pass stores/tracks DRs against its memory references in IR. This causes difficulty in identifying same memory references appearing in different forms. Given below example:
void foo (int a[], int b[]) { int i; for (i = 0; i < 100; i++) { if (a[i] ==0) a[i] = b[i]*4; else a[i] = b[i]*3; } } The gimple dump before tree ifcvt is as: <bb 2>: <bb 3>: # i_24 = PHI <i_19(7), 0(2)> # ivtmp_28 = PHI <ivtmp_23(7), 100(2)> _5 = (long unsigned int) i_24; _6 = _5 * 4; _8 = a_7(D) + _6; _9 = *_8; if (_9 == 0) goto <bb 4>; else goto <bb 5>; <bb 4>: _11 = b_10(D) + _6; _12 = *_11; _13 = _12 * 4; goto <bb 6>; <bb 5>: _15 = b_10(D) + _6; _16 = *_15; _17 = _16 * 3; <bb 6>: # cstore_1 = PHI <_13(4), _17(5)> *_8 = cstore_1; i_19 = i_24 + 1; ivtmp_23 = ivtmp_28 - 1; if (ivtmp_23 != 0) goto <bb 7>; else goto <bb 8>; <bb 7>: goto <bb 3>; <bb 8>: return; The two memory references "*_11" and "*_15" are actually the same thing, but ifcvt failed to discover this because they are recorded in different forms. This patch fixes the issue by recording/tracking memory reference against its innermost_loop_behavior if: the memory reference has innermost_loop_behavior and it is not a compound reference. BTW, PR56625 reported that this case couldn't be vectorized even tree if-conv can handle it. Interesting thing is at current time, it's tree if-conv that could not handle the case. Once it's if-converted with this patch, vectorizer are able to handle it too. Bootstrap and test on x86_64 and AArch64. Is it OK, not sure if it's GCC 7? Thanks, bin 2016-03-11 Bin Cheng <bin.ch...@arm.com> PR tree-optimization/56625 PR tree-optimization/69489 * tree-data-ref.h (innermost_loop_behavior_hash): New class for hashing struct innermost_loop_behavior. (DR_INNERMOST): New macro. * tree-if-conv.c (innermost_DR_map): New map. (ref_DR_map, baseref_DR_map): Revise the comment. (hash_memrefs_baserefs_and_store_DRs_read_written_info): Store DR to innermost_DR_map if it has innermost loop behavior and is not a compound reference. (ifcvt_memrefs_wont_trap): Get DR from innermost_DR_map if it has innermost loop behavior and is not a compound reference. (if_convertible_loop_p_1): Initialize innermost_DR_map. (if_convertible_loop_p): Release innermost_DR_map. gcc/testsuite/ChangeLog 2016-03-11 Bin Cheng <bin.ch...@arm.com> PR tree-optimization/56625 PR tree-optimization/69489 * gcc.dg/tree-ssa/ifc-pr69489-1.c: New test.
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ifc-pr69489-1.c b/gcc/testsuite/gcc.dg/tree-ssa/ifc-pr69489-1.c new file mode 100644 index 0000000..05db62b --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/ifc-pr69489-1.c @@ -0,0 +1,16 @@ +/* { dg-do compile } */ +/* { dg-options "-S -O2 -ftree-vectorize -fdump-tree-ifcvt-stats" { target *-*-* } } */ + +void foo (int a[], int b[]) +{ + int i; + for (i = 0; i < 100; i++) + { + if (a[i] ==0) + a[i] = b[i]*4; + else + a[i] = b[i]*3; + } +} + +/* { dg-final { scan-tree-dump-times "Applying if-conversion" 1 "ifcvt" } } */ diff --git a/gcc/tree-data-ref.h b/gcc/tree-data-ref.h index eb24d16..310514a 100644 --- a/gcc/tree-data-ref.h +++ b/gcc/tree-data-ref.h @@ -57,6 +57,61 @@ struct innermost_loop_behavior tree aligned_to; }; +/* Hash for struct innermost_loop_behavior. It depends on the user to + free the memory. */ + +struct innermost_loop_behavior_hash : nofree_ptr_hash <innermost_loop_behavior> +{ + static inline hashval_t hash (const value_type &); + static inline bool equal (const value_type &, + const compare_type &); +}; + +inline hashval_t +innermost_loop_behavior_hash::hash (const value_type &e) +{ + hashval_t hash; + + hash = iterative_hash_expr (e->base_address, 0); + hash = iterative_hash_expr (e->offset, hash); + hash = iterative_hash_expr (e->init, hash); + hash = iterative_hash_expr (e->step, hash); + return iterative_hash_expr (e->aligned_to, hash); +} + +inline bool +innermost_loop_behavior_hash::equal (const value_type &e1, + const compare_type &e2) +{ + bool equal_p; + + if ((e1->base_address && !e2->base_address) + || (!e1->base_address && e2->base_address) + || (!e1->offset && e2->offset) + || (e1->offset && !e2->offset) + || (!e1->init && e2->init) + || (e1->init && !e2->init) + || (!e1->step && e2->step) + || (e1->step && !e2->step) + || (!e1->aligned_to && e2->aligned_to) + || (e1->aligned_to && !e2->aligned_to)) + return false; + + equal_p = true; + if (e1->base_address && e2->base_address) + equal_p &= operand_equal_p (e1->base_address, e2->base_address, 0); + if (e1->offset && e2->offset) + equal_p &= operand_equal_p (e1->offset, e2->offset, 0); + if (e1->init && e2->init) + equal_p &= operand_equal_p (e1->init, e2->init, 0); + if (e1->step && e2->step) + equal_p &= operand_equal_p (e1->step, e2->step, 0); + if (e1->aligned_to && e2->aligned_to) + equal_p &= operand_equal_p (e1->aligned_to, e2->aligned_to, 0); + + return equal_p; +} + /* Describes the evolutions of indices of the memory reference. The indices are indices of the ARRAY_REFs, indexes in artificial dimensions added for member selection of records and the operands of MEM_REFs. @@ -144,6 +199,7 @@ struct data_reference #define DR_STEP(DR) (DR)->innermost.step #define DR_PTR_INFO(DR) (DR)->alias.ptr_info #define DR_ALIGNED_TO(DR) (DR)->innermost.aligned_to +#define DR_INNERMOST(DR) (DR)->innermost typedef struct data_reference *data_reference_p; diff --git a/gcc/tree-if-conv.c b/gcc/tree-if-conv.c index 9e305c7..1315370 100644 --- a/gcc/tree-if-conv.c +++ b/gcc/tree-if-conv.c @@ -120,10 +120,14 @@ static basic_block *ifc_bbs; /* Apply more aggressive (extended) if-conversion if true. */ static bool aggressive_if_conv; -/* Hash table to store references, DR pairs. */ +/* Hash table to store <references, DR> pairs. */ static hash_map<tree_operand_hash, data_reference_p> *ref_DR_map; -/* Hash table to store base reference, DR pairs. */ +/* Hash table to store <DR's innermost loop behavior, DR> pairs. */ +static hash_map<innermost_loop_behavior_hash, + data_reference_p> *innermost_DR_map; + +/* Hash table to store <base reference, DR> pairs. */ static hash_map<tree_operand_hash, data_reference_p> *baseref_DR_map; /* Structure used to predicate basic blocks. This is attached to the @@ -615,15 +619,29 @@ hash_memrefs_baserefs_and_store_DRs_read_written_info (data_reference_p a) data_reference_p *master_dr, *base_master_dr; tree ref = DR_REF (a); tree base_ref = DR_BASE_OBJECT (a); + innermost_loop_behavior *innermost = &DR_INNERMOST (a); tree ca = bb_predicate (gimple_bb (DR_STMT (a))); bool exist1, exist2; - while (TREE_CODE (ref) == COMPONENT_REF - || TREE_CODE (ref) == IMAGPART_EXPR - || TREE_CODE (ref) == REALPART_EXPR) - ref = TREE_OPERAND (ref, 0); + /* If reference in DR has innermost loop behavior and it is not + a compound memory reference, we store it to innermost_DR_map, + otherwise to ref_DR_map. */ + if (TREE_CODE (ref) == COMPONENT_REF + || TREE_CODE (ref) == IMAGPART_EXPR + || TREE_CODE (ref) == REALPART_EXPR + || !(DR_BASE_ADDRESS (a) || DR_OFFSET (a) + || DR_INIT (a) || DR_STEP (a) || DR_ALIGNED_TO (a))) + { + while (TREE_CODE (ref) == COMPONENT_REF + || TREE_CODE (ref) == IMAGPART_EXPR + || TREE_CODE (ref) == REALPART_EXPR) + ref = TREE_OPERAND (ref, 0); + + master_dr = &ref_DR_map->get_or_insert (ref, &exist1); + } + else + master_dr = &innermost_DR_map->get_or_insert (innermost, &exist1); - master_dr = &ref_DR_map->get_or_insert (ref, &exist1); if (!exist1) *master_dr = a; @@ -687,15 +705,29 @@ ifcvt_memrefs_wont_trap (gimple *stmt, vec<data_reference_p> drs) tree ref_base_a = DR_REF (a); tree base = DR_BASE_OBJECT (a); + innermost_loop_behavior *innermost = &DR_INNERMOST (a); gcc_assert (DR_STMT (a) == stmt); - while (TREE_CODE (ref_base_a) == COMPONENT_REF - || TREE_CODE (ref_base_a) == IMAGPART_EXPR - || TREE_CODE (ref_base_a) == REALPART_EXPR) - ref_base_a = TREE_OPERAND (ref_base_a, 0); + /* If reference in DR has innermost loop behavior and it is not + a compound memory reference, we get it from innermost_DR_map, + otherwise from ref_DR_map. */ + if (TREE_CODE (ref_base_a) == COMPONENT_REF + || TREE_CODE (ref_base_a) == IMAGPART_EXPR + || TREE_CODE (ref_base_a) == REALPART_EXPR + || !(DR_BASE_ADDRESS (a) || DR_OFFSET (a) + || DR_INIT (a) || DR_STEP (a) || DR_ALIGNED_TO (a))) + { + while (TREE_CODE (ref_base_a) == COMPONENT_REF + || TREE_CODE (ref_base_a) == IMAGPART_EXPR + || TREE_CODE (ref_base_a) == REALPART_EXPR) + ref_base_a = TREE_OPERAND (ref_base_a, 0); + + master_dr = ref_DR_map->get (ref_base_a); + } + else + master_dr = innermost_DR_map->get (innermost); - master_dr = ref_DR_map->get (ref_base_a); base_master_dr = baseref_DR_map->get (base); gcc_assert (master_dr != NULL); @@ -1229,6 +1261,8 @@ if_convertible_loop_p_1 (struct loop *loop, data_reference_p dr; ref_DR_map = new hash_map<tree_operand_hash, data_reference_p>; + innermost_DR_map + = new hash_map<innermost_loop_behavior_hash, data_reference_p>; baseref_DR_map = new hash_map<tree_operand_hash, data_reference_p>; predicate_bbs (loop); @@ -1341,6 +1375,9 @@ if_convertible_loop_p (struct loop *loop, bool *any_mask_load_store) delete ref_DR_map; ref_DR_map = NULL; + delete innermost_DR_map; + innermost_DR_map = NULL; + delete baseref_DR_map; baseref_DR_map = NULL;