Hi, This patch adds a missing exit phi node for outer loop in vectorization of double reduction.
Bootstrapped and tested on powerpc64-suse-linux. Committed. Ira ChangeLog: PR middle-end/51285 * tree-vect-loop.c (vect_create_epilog_for_reduction): Create exit phi nodes for outer loop in case of double reduction. testsuite/ChangeLog: PR middle-end/51285 * gfortran.dg/vect/pr51285.f90: New test. Index: tree-vect-loop.c =================================================================== --- tree-vect-loop.c (revision 181984) +++ tree-vect-loop.c (working copy) @@ -3462,6 +3462,7 @@ vect_create_epilog_for_reduction (VEC (tree, heap) gimple use_stmt, orig_stmt, reduction_phi = NULL; bool nested_in_vect_loop = false; VEC (gimple, heap) *new_phis = NULL; + VEC (gimple, heap) *inner_phis = NULL; enum vect_def_type dt = vect_unknown_def_type; int j, i; VEC (tree, heap) *scalar_results = NULL; @@ -3470,6 +3471,7 @@ vect_create_epilog_for_reduction (VEC (tree, heap) VEC (gimple, heap) *phis; bool slp_reduc = false; tree new_phi_result; + gimple inner_phi = NULL; if (slp_node) group_size = VEC_length (gimple, SLP_TREE_SCALAR_STMTS (slp_node)); @@ -3626,11 +3628,36 @@ vect_create_epilog_for_reduction (VEC (tree, heap) } /* The epilogue is created for the outer-loop, i.e., for the loop being - vectorized. */ + vectorized. Create exit phis for the outer loop. */ if (double_reduc) { loop = outer_loop; exit_bb = single_exit (loop)->dest; + inner_phis = VEC_alloc (gimple, heap, VEC_length (tree, vect_defs)); + FOR_EACH_VEC_ELT (gimple, new_phis, i, phi) + { + gimple outer_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (phi)), + exit_bb); + SET_PHI_ARG_DEF (outer_phi, single_exit (loop)->dest_idx, + PHI_RESULT (phi)); + set_vinfo_for_stmt (outer_phi, new_stmt_vec_info (outer_phi, + loop_vinfo, NULL)); + VEC_quick_push (gimple, inner_phis, phi); + VEC_replace (gimple, new_phis, i, outer_phi); + prev_phi_info = vinfo_for_stmt (outer_phi); + while (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (phi))) + { + phi = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (phi)); + outer_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (phi)), + exit_bb); + SET_PHI_ARG_DEF (outer_phi, single_exit (loop)->dest_idx, + PHI_RESULT (phi)); + set_vinfo_for_stmt (outer_phi, new_stmt_vec_info (outer_phi, + loop_vinfo, NULL)); + STMT_VINFO_RELATED_STMT (prev_phi_info) = outer_phi; + prev_phi_info = vinfo_for_stmt (outer_phi); + } + } } exit_gsi = gsi_after_labels (exit_bb); @@ -4040,6 +4067,8 @@ vect_finalize_reduction: { epilog_stmt = VEC_index (gimple, new_phis, k / ratio); reduction_phi = VEC_index (gimple, reduction_phis, k / ratio); + if (double_reduc) + inner_phi = VEC_index (gimple, inner_phis, k / ratio); } if (slp_reduc) @@ -4123,7 +4152,7 @@ vect_finalize_reduction: vs1 was created previously in this function by a call to vect_get_vec_def_for_operand and is stored in vec_initial_def; - vs2 is defined by EPILOG_STMT, the vectorized EXIT_PHI; + vs2 is defined by INNER_PHI, the vectorized EXIT_PHI; vs0 is created here. */ /* Create vector phi node. */ @@ -4144,7 +4173,7 @@ vect_finalize_reduction: add_phi_arg (vect_phi, vect_phi_init, loop_preheader_edge (outer_loop), UNKNOWN_LOCATION); - add_phi_arg (vect_phi, PHI_RESULT (epilog_stmt), + add_phi_arg (vect_phi, PHI_RESULT (inner_phi), loop_latch_edge (outer_loop), UNKNOWN_LOCATION); if (vect_print_dump_info (REPORT_DETAILS)) { Index: testsuite/gfortran.dg/vect/pr51285.f90 =================================================================== --- testsuite/gfortran.dg/vect/pr51285.f90 (revision 0) +++ testsuite/gfortran.dg/vect/pr51285.f90 (revision 0) @@ -0,0 +1,36 @@ +! { dg-do compile } + + SUBROUTINE smm_dnn_4_10_10_1_1_2_1(A,B,C) + REAL :: C(4,10), B(10,10), A(4,10) + DO j= 1 , 10 , 2 + DO i= 1 , 4 , 1 + DO l= 1 , 10 , 1 + C(i+0,j+0)=C(i+0,j+0)+A(i+0,l+0)*B(l+0,j+0) + C(i+0,j+1)=C(i+0,j+1)+A(i+0,l+0)*B(l+0,j+1) + ENDDO + ENDDO + ENDDO + END SUBROUTINE + SUBROUTINE smm_dnn_4_10_10_6_4_1_1(A,B,C) + REAL :: C(4,10), B(10,10), A(4,10) + DO l= 1 , 10 , 1 + DO j= 1 , 10 , 1 + C(i+0,j+0)=C(i+0,j+0)+A(i+0,l+0)*B(l+0,j+0) + ENDDO + ENDDO + END SUBROUTINE + SUBROUTINE S(A,B,C) + INTEGER :: Nmin=2,Niter=100 + REAL, DIMENSION(:,:), ALLOCATABLE :: A,B,C + DO imin=1,Nmin + DO i=1,Niter + CALL smm_dnn_4_10_10_1_1_2_1(A,B,C) + ENDDO + DO i=1,Niter + CALL smm_dnn_4_10_10_6_4_1_1(A,B,C) + ENDDO + CALL foo() + ENDDO + END SUBROUTINE + +! { dg-final { cleanup-tree-dump "vect" } }