Alejandro Martinez Vicente <alejandro.martinezvice...@arm.com> writes: > Hi, > > This patch fixes bug 90681. It was caused by trying to SLP vectorize a non > grouped load. We've fixed it by tweaking a bit the implementation: mark > masked loads as not vectorizable, but support them as an special case. Then > the detect them in the test for normal non-grouped loads that was already > there. > > The bug reproducer now works and the performance test we added is still happy. > > Alejandro > > gcc/ChangeLog: > > 2019-05-31 Alejandro Martinez <alejandro.martinezvice...@arm.com> > > PR tree-optimization/90681 > * internal-fn.c (mask_load_direct): Mark as non-vectorizable again. > * tree-vect-slp.c (vect_build_slp_tree_1): Add masked loads as a > special case for SLP, but fail on non-grouped loads. > > > 2019-05-31 Alejandro Martinez <alejandro.martinezvice...@arm.com> > > gcc/testsuite/ > > PR tree-optimization/90681 > * gfortran.dg/vect/pr90681.f: Bug reproducer.
OK, thanks. Richard > diff --git a/gcc/internal-fn.c b/gcc/internal-fn.c > index 3051a7a..04081f3 100644 > --- a/gcc/internal-fn.c > +++ b/gcc/internal-fn.c > @@ -100,7 +100,7 @@ init_internal_fns () > /* Create static initializers for the information returned by > direct_internal_fn. */ > #define not_direct { -2, -2, false } > -#define mask_load_direct { -1, 2, true } > +#define mask_load_direct { -1, 2, false } > #define load_lanes_direct { -1, -1, false } > #define mask_load_lanes_direct { -1, -1, false } > #define gather_load_direct { -1, -1, false } > diff --git a/gcc/testsuite/gfortran.dg/vect/pr90681.f > b/gcc/testsuite/gfortran.dg/vect/pr90681.f > new file mode 100644 > index 0000000..03d3987 > --- /dev/null > +++ b/gcc/testsuite/gfortran.dg/vect/pr90681.f > @@ -0,0 +1,13 @@ > +C { dg-do compile } > +C { dg-additional-options "-march=armv8.2-a+sve" { target { aarch64*-*-* } } > } > + SUBROUTINE HMU (H1) > + COMMON DD(107) > + DIMENSION H1(NORBS,*) > + DO 70 J1 = IA,I1 > + H1(I1,J1) = 0 > + JO1 = J1 > + IF (JO1.EQ.1) THEN > + H1(I1,J1) = DD(NI) > + END IF > + 70 CONTINUE > + END > diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c > index 884db33..23a8a20 100644 > --- a/gcc/tree-vect-slp.c > +++ b/gcc/tree-vect-slp.c > @@ -661,6 +661,7 @@ vect_build_slp_tree_1 (unsigned char *swap, > machine_mode optab_op2_mode; > machine_mode vec_mode; > stmt_vec_info first_load = NULL, prev_first_load = NULL; > + bool load_p = false; > > /* For every stmt in NODE find its def stmt/s. */ > stmt_vec_info stmt_info; > @@ -714,7 +715,10 @@ vect_build_slp_tree_1 (unsigned char *swap, > if (gcall *call_stmt = dyn_cast <gcall *> (stmt)) > { > rhs_code = CALL_EXPR; > - if ((gimple_call_internal_p (call_stmt) > + > + if (gimple_call_internal_p (stmt, IFN_MASK_LOAD)) > + load_p = true; > + else if ((gimple_call_internal_p (call_stmt) > && (!vectorizable_internal_fn_p > (gimple_call_internal_fn (call_stmt)))) > || gimple_call_tail_p (call_stmt) > @@ -732,7 +736,10 @@ vect_build_slp_tree_1 (unsigned char *swap, > } > } > else > - rhs_code = gimple_assign_rhs_code (stmt); > + { > + rhs_code = gimple_assign_rhs_code (stmt); > + load_p = TREE_CODE_CLASS (rhs_code) == tcc_reference; > + } > > /* Check the operation. */ > if (i == 0) > @@ -899,7 +906,7 @@ vect_build_slp_tree_1 (unsigned char *swap, > } /* Grouped access. */ > else > { > - if (TREE_CODE_CLASS (rhs_code) == tcc_reference) > + if (load_p) > { > /* Not grouped load. */ > if (dump_enabled_p ())