Hi, In case of multiple loop exit phis in vectorization of reduction we reduce them to one vector. The result of this reduction is later ignored in case we do the final value extraction with scalar code. This causes wrong code generation for gfortran.dg/forall_7.f90 with -O3 -funroll-loops on Cell SPU. This patch fixes this.
Bootstrapped on powerpc64-suse-linux and tested on powerpc64-suse-linux and on spu-redhat-linux. Committed. Ira ChangeLog: * tree-vect-loop.c (vect_create_epilog_for_reduction): Use the result of multiple results reduction when extracting the final value using scalar code. Index: tree-vect-loop.c =================================================================== --- tree-vect-loop.c (revision 177266) +++ tree-vect-loop.c (working copy) @@ -3683,13 +3683,13 @@ vect_create_epilog_for_reduction (VEC (tree, heap) { tree first_vect = PHI_RESULT (VEC_index (gimple, new_phis, 0)); tree tmp; + gimple new_vec_stmt = NULL; vec_dest = vect_create_destination_var (scalar_dest, vectype); for (k = 1; k < VEC_length (gimple, new_phis); k++) { gimple next_phi = VEC_index (gimple, new_phis, k); tree second_vect = PHI_RESULT (next_phi); - gimple new_vec_stmt; tmp = build2 (code, vectype, first_vect, second_vect); new_vec_stmt = gimple_build_assign (vec_dest, tmp); @@ -3699,6 +3699,11 @@ vect_create_epilog_for_reduction (VEC (tree, heap) } new_phi_result = first_vect; + if (new_vec_stmt) + { + VEC_truncate (gimple, new_phis, 0); + VEC_safe_push (gimple, heap, new_phis, new_vec_stmt); + } } else new_phi_result = PHI_RESULT (VEC_index (gimple, new_phis, 0)); @@ -3809,7 +3814,10 @@ vect_create_epilog_for_reduction (VEC (tree, heap) vec_size_in_bits = tree_low_cst (TYPE_SIZE (vectype), 1); FOR_EACH_VEC_ELT (gimple, new_phis, i, new_phi) { - vec_temp = PHI_RESULT (new_phi); + if (gimple_code (new_phi) == GIMPLE_PHI) + vec_temp = PHI_RESULT (new_phi); + else + vec_temp = gimple_assign_lhs (new_phi); rhs = build3 (BIT_FIELD_REF, scalar_type, vec_temp, bitsize, bitsize_zero_node); epilog_stmt = gimple_build_assign (new_scalar_dest, rhs);