Re: [PATCH] vect: Use statement vectype for conditional mask.
On Fri, Nov 17, 2023 at 8:20 PM Robin Dapp wrote: > > > No, you shouldn't place _7 != 0 inside the .COND_ADD but instead > > have an extra pattern stmt producing that so > > > > patt_8 = _7 != 0; > > patt_9 = .COND_ADD (patt_8, ...); > > > > that's probably still not enough, but I always quickly forget how > > bool patterns work ... basically a comparison like patt_8 = _7 != 0 > > vectorizes to a mask (aka vector boolean) while any "data" uses > > of bools are replaced by mask ? 1 : 0; - there's a complication for > > bool data producing loads which is why we need to insert the > > "fake" compares to produce a mask. IIRC. > > I already had call handling to vect_recog_bool_pattern in working > shape when I realized that vect_recog_mask_conversion_pattern already > handles most of what I need. The difference is that it doesn't do > patt_8 = _7 != 0 > but rather > patt_8 = () _7; > > It works equally well and most of the code can be reused. > > The attached was bootstrapped and regtested on x86 and aarch64 > and regtested on riscv. OK. Thanks, Richard. > Regards > Robin > > Subject: [PATCH] vect: Add bool pattern handling for COND_OPs. > > In order to handle masks properly for conditional operations this patch > teaches vect_recog_mask_conversion_pattern to also handle conditional > operations. Now we convert e.g. > > _mask = *_6; > _ifc123 = COND_OP (_mask, ...); > > into > _mask = *_6; > patt200 = () _mask; > patt201 = COND_OP (patt200, ...); > > This way the mask will be properly recognized as boolean mask and the > correct vector mask will be generated. > > gcc/ChangeLog: > > PR middle-end/112406 > > * tree-vect-patterns.cc (build_mask_conversion): > (vect_convert_mask_for_vectype): > > gcc/testsuite/ChangeLog: > > * gfortran.dg/pr112406.f90: New test. > --- > gcc/testsuite/gfortran.dg/pr112406.f90 | 21 + > gcc/tree-vect-patterns.cc | 26 ++ > 2 files changed, 39 insertions(+), 8 deletions(-) > create mode 100644 gcc/testsuite/gfortran.dg/pr112406.f90 > > diff --git a/gcc/testsuite/gfortran.dg/pr112406.f90 > b/gcc/testsuite/gfortran.dg/pr112406.f90 > new file mode 100644 > index 000..27e96df7e26 > --- /dev/null > +++ b/gcc/testsuite/gfortran.dg/pr112406.f90 > @@ -0,0 +1,21 @@ > +! { dg-do compile { target { aarch64-*-* || riscv*-*-* } } } > +! { dg-options "-Ofast -w -fprofile-generate" } > +! { dg-additional-options "-march=rv64gcv -mabi=lp64d" { target riscv*-*-* } > } > +! { dg-additional-options "-march=armv8-a+sve" { target aarch64-*-* } } > + > +module brute_force > + integer, parameter :: r=9 > + integer sudoku1(1, r) > + contains > +subroutine brute > +integer l(r), u(r) > + where(sudoku1(1, :) /= 1) > +l = 1 > + u = 1 > + end where > +do i1 = 1, u(1) > + do > + end do > + end do > +end > +end > diff --git a/gcc/tree-vect-patterns.cc b/gcc/tree-vect-patterns.cc > index 7debe7f0731..696b70b76a8 100644 > --- a/gcc/tree-vect-patterns.cc > +++ b/gcc/tree-vect-patterns.cc > @@ -5830,7 +5830,8 @@ vect_recog_mask_conversion_pattern (vec_info *vinfo, >tree rhs1_op0 = NULL_TREE, rhs1_op1 = NULL_TREE; >tree rhs1_op0_type = NULL_TREE, rhs1_op1_type = NULL_TREE; > > - /* Check for MASK_LOAD ans MASK_STORE calls requiring mask conversion. */ > + /* Check for MASK_LOAD and MASK_STORE as well as COND_OP calls requiring > mask > + conversion. */ >if (is_gimple_call (last_stmt) >&& gimple_call_internal_p (last_stmt)) > { > @@ -5842,6 +5843,7 @@ vect_recog_mask_conversion_pattern (vec_info *vinfo, > return NULL; > >bool store_p = internal_store_fn_p (ifn); > + bool load_p = internal_store_fn_p (ifn); >if (store_p) > { > int rhs_index = internal_fn_stored_value_index (ifn); > @@ -5856,15 +5858,21 @@ vect_recog_mask_conversion_pattern (vec_info *vinfo, > vectype1 = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs)); > } > > + if (!vectype1) > + return NULL; > + >tree mask_arg = gimple_call_arg (last_stmt, mask_argno); >tree mask_arg_type = integer_type_for_mask (mask_arg, vinfo); > - if (!mask_arg_type) > - return NULL; > - vectype2 = get_mask_type_for_scalar_type (vinfo, mask_arg_type); > + if (mask_arg_type) > + { > + vectype2 = get_mask_type_for_scalar_type (vinfo, mask_arg_type); > > - if (!vectype1 || !vectype2 > - || known_eq (TYPE_VECTOR_SUBPARTS (vectype1), > - TYPE_VECTOR_SUBPARTS (vectype2))) > + if (!vectype2 > + || known_eq (TYPE_VECTOR_SUBPARTS (vectype1), > + TYPE_VECTOR_SUBPARTS (vectype2))) > + return NULL; > + } > + else if (store_p || load_p) > return NULL; > >tmp = build_mask_conversion (vinfo, mask_arg, vectype1, stmt_vinfo); > @@ -5883,7 +5891,9 @@ vect_recog_mask_conversion_pa
Re: [PATCH] vect: Use statement vectype for conditional mask.
> No, you shouldn't place _7 != 0 inside the .COND_ADD but instead > have an extra pattern stmt producing that so > > patt_8 = _7 != 0; > patt_9 = .COND_ADD (patt_8, ...); > > that's probably still not enough, but I always quickly forget how > bool patterns work ... basically a comparison like patt_8 = _7 != 0 > vectorizes to a mask (aka vector boolean) while any "data" uses > of bools are replaced by mask ? 1 : 0; - there's a complication for > bool data producing loads which is why we need to insert the > "fake" compares to produce a mask. IIRC. I already had call handling to vect_recog_bool_pattern in working shape when I realized that vect_recog_mask_conversion_pattern already handles most of what I need. The difference is that it doesn't do patt_8 = _7 != 0 but rather patt_8 = () _7; It works equally well and most of the code can be reused. The attached was bootstrapped and regtested on x86 and aarch64 and regtested on riscv. Regards Robin Subject: [PATCH] vect: Add bool pattern handling for COND_OPs. In order to handle masks properly for conditional operations this patch teaches vect_recog_mask_conversion_pattern to also handle conditional operations. Now we convert e.g. _mask = *_6; _ifc123 = COND_OP (_mask, ...); into _mask = *_6; patt200 = () _mask; patt201 = COND_OP (patt200, ...); This way the mask will be properly recognized as boolean mask and the correct vector mask will be generated. gcc/ChangeLog: PR middle-end/112406 * tree-vect-patterns.cc (build_mask_conversion): (vect_convert_mask_for_vectype): gcc/testsuite/ChangeLog: * gfortran.dg/pr112406.f90: New test. --- gcc/testsuite/gfortran.dg/pr112406.f90 | 21 + gcc/tree-vect-patterns.cc | 26 ++ 2 files changed, 39 insertions(+), 8 deletions(-) create mode 100644 gcc/testsuite/gfortran.dg/pr112406.f90 diff --git a/gcc/testsuite/gfortran.dg/pr112406.f90 b/gcc/testsuite/gfortran.dg/pr112406.f90 new file mode 100644 index 000..27e96df7e26 --- /dev/null +++ b/gcc/testsuite/gfortran.dg/pr112406.f90 @@ -0,0 +1,21 @@ +! { dg-do compile { target { aarch64-*-* || riscv*-*-* } } } +! { dg-options "-Ofast -w -fprofile-generate" } +! { dg-additional-options "-march=rv64gcv -mabi=lp64d" { target riscv*-*-* } } +! { dg-additional-options "-march=armv8-a+sve" { target aarch64-*-* } } + +module brute_force + integer, parameter :: r=9 + integer sudoku1(1, r) + contains +subroutine brute +integer l(r), u(r) + where(sudoku1(1, :) /= 1) +l = 1 + u = 1 + end where +do i1 = 1, u(1) + do + end do + end do +end +end diff --git a/gcc/tree-vect-patterns.cc b/gcc/tree-vect-patterns.cc index 7debe7f0731..696b70b76a8 100644 --- a/gcc/tree-vect-patterns.cc +++ b/gcc/tree-vect-patterns.cc @@ -5830,7 +5830,8 @@ vect_recog_mask_conversion_pattern (vec_info *vinfo, tree rhs1_op0 = NULL_TREE, rhs1_op1 = NULL_TREE; tree rhs1_op0_type = NULL_TREE, rhs1_op1_type = NULL_TREE; - /* Check for MASK_LOAD ans MASK_STORE calls requiring mask conversion. */ + /* Check for MASK_LOAD and MASK_STORE as well as COND_OP calls requiring mask + conversion. */ if (is_gimple_call (last_stmt) && gimple_call_internal_p (last_stmt)) { @@ -5842,6 +5843,7 @@ vect_recog_mask_conversion_pattern (vec_info *vinfo, return NULL; bool store_p = internal_store_fn_p (ifn); + bool load_p = internal_store_fn_p (ifn); if (store_p) { int rhs_index = internal_fn_stored_value_index (ifn); @@ -5856,15 +5858,21 @@ vect_recog_mask_conversion_pattern (vec_info *vinfo, vectype1 = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs)); } + if (!vectype1) + return NULL; + tree mask_arg = gimple_call_arg (last_stmt, mask_argno); tree mask_arg_type = integer_type_for_mask (mask_arg, vinfo); - if (!mask_arg_type) - return NULL; - vectype2 = get_mask_type_for_scalar_type (vinfo, mask_arg_type); + if (mask_arg_type) + { + vectype2 = get_mask_type_for_scalar_type (vinfo, mask_arg_type); - if (!vectype1 || !vectype2 - || known_eq (TYPE_VECTOR_SUBPARTS (vectype1), - TYPE_VECTOR_SUBPARTS (vectype2))) + if (!vectype2 + || known_eq (TYPE_VECTOR_SUBPARTS (vectype1), + TYPE_VECTOR_SUBPARTS (vectype2))) + return NULL; + } + else if (store_p || load_p) return NULL; tmp = build_mask_conversion (vinfo, mask_arg, vectype1, stmt_vinfo); @@ -5883,7 +5891,9 @@ vect_recog_mask_conversion_pattern (vec_info *vinfo, lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL); gimple_call_set_lhs (pattern_stmt, lhs); } - gimple_call_set_nothrow (pattern_stmt, true); + + if (load_p || store_p) + gimple_call_set_nothrow (pattern_stmt, true); pattern_stmt_info = vinfo->add_stmt
Re: [PATCH] vect: Use statement vectype for conditional mask.
> Yes, your version is also OK. The attached was bootstrapped and regtested on aarch64, x86 and regtested on riscv. Going to commit it later unless somebody objects. Regards Robin Subject: [PATCH] vect: Pass truth type to vect_get_vec_defs. For conditional operations the mask is loop invariant and cannot be stored explicitly. By default, for reductions, we deduce the vectype from the statement or the loop but this does not work for conditional operations. Therefore this patch passes the truth type of the reduction input vectype for the mask operand instead. This will override the other choices and make sure we have the proper mask vectype. gcc/ChangeLog: * tree-vect-loop.cc (vect_transform_reduction): Pass truth vectype for mask operand. --- gcc/testsuite/gcc.target/aarch64/pr112406.c | 37 +++ .../gcc.target/riscv/rvv/autovec/pr112552.c | 16 gcc/tree-vect-loop.cc | 31 +++- 3 files changed, 75 insertions(+), 9 deletions(-) create mode 100644 gcc/testsuite/gcc.target/aarch64/pr112406.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/pr112552.c diff --git a/gcc/testsuite/gcc.target/aarch64/pr112406.c b/gcc/testsuite/gcc.target/aarch64/pr112406.c new file mode 100644 index 000..46459c68c4a --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/pr112406.c @@ -0,0 +1,37 @@ +/* { dg-do compile { target { aarch64*-*-* } } } */ +/* { dg-options "-march=armv8-a+sve -w -Ofast" } */ + +typedef struct { + int red +} MagickPixelPacket; + +GetImageChannelMoments_image, GetImageChannelMoments_image_0, +GetImageChannelMoments___trans_tmp_1, GetImageChannelMoments_M11_0, +GetImageChannelMoments_pixel_3, GetImageChannelMoments_y, +GetImageChannelMoments_p; + +double GetImageChannelMoments_M00_0, GetImageChannelMoments_M00_1, +GetImageChannelMoments_M01_1; + +MagickPixelPacket GetImageChannelMoments_pixel; + +SetMagickPixelPacket(int color, MagickPixelPacket *pixel) { + pixel->red = color; +} + +GetImageChannelMoments() { + for (; GetImageChannelMoments_y; GetImageChannelMoments_y++) { +SetMagickPixelPacket(GetImageChannelMoments_p, + &GetImageChannelMoments_pixel); +GetImageChannelMoments_M00_1 += GetImageChannelMoments_pixel.red; +if (GetImageChannelMoments_image) + GetImageChannelMoments_M00_1++; +GetImageChannelMoments_M01_1 += +GetImageChannelMoments_y * GetImageChannelMoments_pixel_3; +if (GetImageChannelMoments_image_0) + GetImageChannelMoments_M00_0++; +GetImageChannelMoments_M01_1 += +GetImageChannelMoments_y * GetImageChannelMoments_p++; + } + GetImageChannelMoments___trans_tmp_1 = atan(GetImageChannelMoments_M11_0); +} diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr112552.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr112552.c new file mode 100644 index 000..32d221ccede --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr112552.c @@ -0,0 +1,16 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -march=rv64gcv -mabi=lp64d --param=riscv-autovec-preference=fixed-vlmax -w" } */ + +int a, c, d; +void (*b)(); +void (*e)(); +void g(); + +void h() { + for (; a; --a) { +char *f = h; +e = b || g > 1 ? g : b; +d |= !e; +*f ^= c; + } +} diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc index fb8d999ee6b..e67ba6ac0b5 100644 --- a/gcc/tree-vect-loop.cc +++ b/gcc/tree-vect-loop.cc @@ -8470,15 +8470,28 @@ vect_transform_reduction (loop_vec_info loop_vinfo, /* Get NCOPIES vector definitions for all operands except the reduction definition. */ - vect_get_vec_defs (loop_vinfo, stmt_info, slp_node, ncopies, -single_defuse_cycle && reduc_index == 0 -? NULL_TREE : op.ops[0], &vec_oprnds0, -single_defuse_cycle && reduc_index == 1 -? NULL_TREE : op.ops[1], &vec_oprnds1, -op.num_ops == 4 -|| (op.num_ops == 3 -&& !(single_defuse_cycle && reduc_index == 2)) -? op.ops[2] : NULL_TREE, &vec_oprnds2); + if (!cond_fn_p) +{ + vect_get_vec_defs (loop_vinfo, stmt_info, slp_node, ncopies, +single_defuse_cycle && reduc_index == 0 +? NULL_TREE : op.ops[0], &vec_oprnds0, +single_defuse_cycle && reduc_index == 1 +? NULL_TREE : op.ops[1], &vec_oprnds1, +op.num_ops == 3 +&& !(single_defuse_cycle && reduc_index == 2) +? op.ops[2] : NULL_TREE, &vec_oprnds2); +} + else +{ + /* For a conditional operation pass the truth type as mask +vectype. */ + gcc_assert (single_defuse_cycle && reduc_index == 1); + vect_get_vec_defs (loop_vinfo, stmt_info, slp_node, ncopies, +op.ops[0], &vec_oprnds
Re: [PATCH] vect: Use statement vectype for conditional mask.
On Fri, Nov 17, 2023 at 9:45 AM Robin Dapp wrote: > > > But note you can explicitly specify a vector type as well, there's an > > overload for it, so we can fix the "invariant" case with the following > > (OK if you can test this on relevant targets) > > > > diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc > > index 3f59139cb01..936a3de9534 100644 > > --- a/gcc/tree-vect-loop.cc > > +++ b/gcc/tree-vect-loop.cc > > @@ -8450,12 +8450,14 @@ vect_transform_reduction (loop_vec_info loop_vinfo, > > value. */ > >bool cond_fn_p = code.is_internal_fn () > > && conditional_internal_fn_code (internal_fn (code)) != ERROR_MARK; > > + int cond_index = -1; > >if (cond_fn_p) > > { > >gcc_assert (code == IFN_COND_ADD || code == IFN_COND_SUB > > || code == IFN_COND_MUL || code == IFN_COND_AND > > || code == IFN_COND_IOR || code == IFN_COND_XOR); > >gcc_assert (op.num_ops == 4 && (op.ops[1] == op.ops[3])); > > + cond_index = 0; > > } > > > >bool masked_loop_p = LOOP_VINFO_FULLY_MASKED_P (loop_vinfo); > > @@ -8486,12 +8488,13 @@ vect_transform_reduction (loop_vec_info loop_vinfo, > >vect_get_vec_defs (loop_vinfo, stmt_info, slp_node, ncopies, > > single_defuse_cycle && reduc_index == 0 > > ? NULL_TREE : op.ops[0], &vec_oprnds0, > > +cond_index == 0 ? truth_type_for (vectype_in) : > > NULL_TREE, > > single_defuse_cycle && reduc_index == 1 > > -? NULL_TREE : op.ops[1], &vec_oprnds1, > > +? NULL_TREE : op.ops[1], &vec_oprnds1, NULL_TREE, > > op.num_ops == 4 > > || (op.num_ops == 3 > > && !(single_defuse_cycle && reduc_index == 2)) > > -? op.ops[2] : NULL_TREE, &vec_oprnds2); > > +? op.ops[2] : NULL_TREE, &vec_oprnds2, NULL_TREE); > > Ah, yes that's what I meant. I had something along those lines: > > + if (!cond_fn_p) > +{ > + vect_get_vec_defs (loop_vinfo, stmt_info, slp_node, ncopies, > +single_defuse_cycle && reduc_index == 0 > +? NULL_TREE : op.ops[0], &vec_oprnds0, > +single_defuse_cycle && reduc_index == 1 > +? NULL_TREE : op.ops[1], &vec_oprnds1, > +op.num_ops == 3 > +&& !(single_defuse_cycle && reduc_index == 2) > +? op.ops[2] : NULL_TREE, &vec_oprnds2); > +} > + else > +{ > + /* For a conditional operation, pass the truth type as vectype for the > +mask. */ > + gcc_assert (single_defuse_cycle && reduc_index == 1); > + vect_get_vec_defs (loop_vinfo, stmt_info, slp_node, ncopies, > +op.ops[0], &vec_oprnds0, > +truth_type_for (vectype_in), > +NULL_TREE, &vec_oprnds1, NULL_TREE, > +op.ops[2], &vec_oprnds2, NULL_TREE); > +} > > Even though this has a bit of duplication now I prefer it slightly > because of the. I'd hope, once fully tested (it's already running > but I'm having connection problems so don't know about the results > yet), this could go in independently of the pattern fix? Yes, your version is also OK. Thanks, Richard. > > Regards > Robin >
Re: [PATCH] vect: Use statement vectype for conditional mask.
> But note you can explicitly specify a vector type as well, there's an > overload for it, so we can fix the "invariant" case with the following > (OK if you can test this on relevant targets) > > diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc > index 3f59139cb01..936a3de9534 100644 > --- a/gcc/tree-vect-loop.cc > +++ b/gcc/tree-vect-loop.cc > @@ -8450,12 +8450,14 @@ vect_transform_reduction (loop_vec_info loop_vinfo, > value. */ >bool cond_fn_p = code.is_internal_fn () > && conditional_internal_fn_code (internal_fn (code)) != ERROR_MARK; > + int cond_index = -1; >if (cond_fn_p) > { >gcc_assert (code == IFN_COND_ADD || code == IFN_COND_SUB > || code == IFN_COND_MUL || code == IFN_COND_AND > || code == IFN_COND_IOR || code == IFN_COND_XOR); >gcc_assert (op.num_ops == 4 && (op.ops[1] == op.ops[3])); > + cond_index = 0; > } > >bool masked_loop_p = LOOP_VINFO_FULLY_MASKED_P (loop_vinfo); > @@ -8486,12 +8488,13 @@ vect_transform_reduction (loop_vec_info loop_vinfo, >vect_get_vec_defs (loop_vinfo, stmt_info, slp_node, ncopies, > single_defuse_cycle && reduc_index == 0 > ? NULL_TREE : op.ops[0], &vec_oprnds0, > +cond_index == 0 ? truth_type_for (vectype_in) : > NULL_TREE, > single_defuse_cycle && reduc_index == 1 > -? NULL_TREE : op.ops[1], &vec_oprnds1, > +? NULL_TREE : op.ops[1], &vec_oprnds1, NULL_TREE, > op.num_ops == 4 > || (op.num_ops == 3 > && !(single_defuse_cycle && reduc_index == 2)) > -? op.ops[2] : NULL_TREE, &vec_oprnds2); > +? op.ops[2] : NULL_TREE, &vec_oprnds2, NULL_TREE); Ah, yes that's what I meant. I had something along those lines: + if (!cond_fn_p) +{ + vect_get_vec_defs (loop_vinfo, stmt_info, slp_node, ncopies, +single_defuse_cycle && reduc_index == 0 +? NULL_TREE : op.ops[0], &vec_oprnds0, +single_defuse_cycle && reduc_index == 1 +? NULL_TREE : op.ops[1], &vec_oprnds1, +op.num_ops == 3 +&& !(single_defuse_cycle && reduc_index == 2) +? op.ops[2] : NULL_TREE, &vec_oprnds2); +} + else +{ + /* For a conditional operation, pass the truth type as vectype for the +mask. */ + gcc_assert (single_defuse_cycle && reduc_index == 1); + vect_get_vec_defs (loop_vinfo, stmt_info, slp_node, ncopies, +op.ops[0], &vec_oprnds0, +truth_type_for (vectype_in), +NULL_TREE, &vec_oprnds1, NULL_TREE, +op.ops[2], &vec_oprnds2, NULL_TREE); +} Even though this has a bit of duplication now I prefer it slightly because of the. I'd hope, once fully tested (it's already running but I'm having connection problems so don't know about the results yet), this could go in independently of the pattern fix? Regards Robin
Re: [PATCH] vect: Use statement vectype for conditional mask.
On Thu, Nov 16, 2023 at 11:30 PM Robin Dapp wrote: > > > For the fortran testcase we don't even run into this but hit an > > internal def and assert on > > > > gcc_assert (STMT_VINFO_VEC_STMTS (def_stmt_info).length () == > > ncopies); > > > > I think this shows missing handling of .COND_* in the bool pattern > > recognition > > as we get the 'bool' condition as boolean data vector rather than a mask. > > The > > same is true for the testcase with the invariant condition. This causes us > > to > > select the wrong vector type here. The "easiest" might be to look at > > how COND_EXPR is handled in vect_recog_bool_pattern and friends and > > handle .COND_* IFNs the same for the mask operand. > > For the first (imagick) testcase adding a bool pattern does not help > because we always pass NULL as vectype to vect_get_vec_defs. > Doing so we will always use get_vectype_for_scalar_type (i.e. > a "full" bool vector) because vectype of the (conditional) stmt > is the lhs type and not the mask's type. > For cond_exprs in vectorizable_condition we directly pass a > comp_vectype instead (truth_type). Wouldn't that, independently > of the pattern recog, make sense? The issue with the first testcase is that the condition operand of the .COND_ADD is loop invariant. When not using SLP there's no way to store the desired vector type for those as they are not code-generated explicitly but implicitly when we use them via vect_get_vec_defs. But note you can explicitly specify a vector type as well, there's an overload for it, so we can fix the "invariant" case with the following (OK if you can test this on relevant targets) diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc index 3f59139cb01..936a3de9534 100644 --- a/gcc/tree-vect-loop.cc +++ b/gcc/tree-vect-loop.cc @@ -8450,12 +8450,14 @@ vect_transform_reduction (loop_vec_info loop_vinfo, value. */ bool cond_fn_p = code.is_internal_fn () && conditional_internal_fn_code (internal_fn (code)) != ERROR_MARK; + int cond_index = -1; if (cond_fn_p) { gcc_assert (code == IFN_COND_ADD || code == IFN_COND_SUB || code == IFN_COND_MUL || code == IFN_COND_AND || code == IFN_COND_IOR || code == IFN_COND_XOR); gcc_assert (op.num_ops == 4 && (op.ops[1] == op.ops[3])); + cond_index = 0; } bool masked_loop_p = LOOP_VINFO_FULLY_MASKED_P (loop_vinfo); @@ -8486,12 +8488,13 @@ vect_transform_reduction (loop_vec_info loop_vinfo, vect_get_vec_defs (loop_vinfo, stmt_info, slp_node, ncopies, single_defuse_cycle && reduc_index == 0 ? NULL_TREE : op.ops[0], &vec_oprnds0, +cond_index == 0 ? truth_type_for (vectype_in) : NULL_TREE, single_defuse_cycle && reduc_index == 1 -? NULL_TREE : op.ops[1], &vec_oprnds1, +? NULL_TREE : op.ops[1], &vec_oprnds1, NULL_TREE, op.num_ops == 4 || (op.num_ops == 3 && !(single_defuse_cycle && reduc_index == 2)) -? op.ops[2] : NULL_TREE, &vec_oprnds2); +? op.ops[2] : NULL_TREE, &vec_oprnds2, NULL_TREE); /* For single def-use cycles get one copy of the vectorized reduction definition. */ > > Now for the Fortran testcase I'm still a bit lost. Opposed to > before we now vectorize with a variable VF and hit the problem > in the epilogue with ncopies = 2. > > .COND_ADD (_7, __gcov0.__brute_force_MOD_brute_I_lsm.21_67, 1, > __gcov0.__brute_force_MOD_brute_I_lsm.21_67); > where > _7 = *_6 > which is an internal_def. > > I played around with doing it analogously to the COND_EXPR > handling, so creating a COND_ADD (_7 != 0) which will required > several fixups in other places because we're not prepared to > handle that. In the end it seems to only shift the problem > because we will still need the definition of _7. No, you shouldn't place _7 != 0 inside the .COND_ADD but instead have an extra pattern stmt producing that so patt_8 = _7 != 0; patt_9 = .COND_ADD (patt_8, ...); that's probably still not enough, but I always quickly forget how bool patterns work ... basically a comparison like patt_8 = _7 != 0 vectorizes to a mask (aka vector boolean) while any "data" uses of bools are replaced by mask ? 1 : 0; - there's a complication for bool data producing loads which is why we need to insert the "fake" compares to produce a mask. IIRC. > I guess you're implying that the definition should have already > been handled by pattern recognition so that at the point when > we need it, it has a related pattern stmt with the proper mask > type? As said, the compare needs to be a separate pattern stmt part of the .COND_ADD patterns pattern sequence. Richard. > > Regards > Robin >
Re: [PATCH] vect: Use statement vectype for conditional mask.
> For the fortran testcase we don't even run into this but hit an > internal def and assert on > > gcc_assert (STMT_VINFO_VEC_STMTS (def_stmt_info).length () == ncopies); > > I think this shows missing handling of .COND_* in the bool pattern recognition > as we get the 'bool' condition as boolean data vector rather than a mask. The > same is true for the testcase with the invariant condition. This causes us to > select the wrong vector type here. The "easiest" might be to look at > how COND_EXPR is handled in vect_recog_bool_pattern and friends and > handle .COND_* IFNs the same for the mask operand. For the first (imagick) testcase adding a bool pattern does not help because we always pass NULL as vectype to vect_get_vec_defs. Doing so we will always use get_vectype_for_scalar_type (i.e. a "full" bool vector) because vectype of the (conditional) stmt is the lhs type and not the mask's type. For cond_exprs in vectorizable_condition we directly pass a comp_vectype instead (truth_type). Wouldn't that, independently of the pattern recog, make sense? Now for the Fortran testcase I'm still a bit lost. Opposed to before we now vectorize with a variable VF and hit the problem in the epilogue with ncopies = 2. .COND_ADD (_7, __gcov0.__brute_force_MOD_brute_I_lsm.21_67, 1, __gcov0.__brute_force_MOD_brute_I_lsm.21_67); where _7 = *_6 which is an internal_def. I played around with doing it analogously to the COND_EXPR handling, so creating a COND_ADD (_7 != 0) which will required several fixups in other places because we're not prepared to handle that. In the end it seems to only shift the problem because we will still need the definition of _7. I guess you're implying that the definition should have already been handled by pattern recognition so that at the point when we need it, it has a related pattern stmt with the proper mask type? Regards Robin
Re: [PATCH] vect: Use statement vectype for conditional mask.
On Wed, Nov 8, 2023 at 5:18 PM Robin Dapp wrote: > > Hi, > > as Tamar reported in PR112406 we still ICE on aarch64 in SPEC2017 > when creating COND_OPs in ifcvt. > > The problem is that we fail to deduce the mask's type from the statement > vectype and then end up with a non-matching mask in expand. This patch > checks if the current op is equal to the mask operand and, if so, uses > the truth type from the stmt_vectype. Is that a valid approach? > > Bootstrapped and regtested on aarch64, x86 is running. > > Besides, the testcase is Tamar's reduced example, originally from > SPEC. I hope it's ok to include it as is (as imagick is open source > anyway). For the fortran testcase we don't even run into this but hit an internal def and assert on gcc_assert (STMT_VINFO_VEC_STMTS (def_stmt_info).length () == ncopies); I think this shows missing handling of .COND_* in the bool pattern recognition as we get the 'bool' condition as boolean data vector rather than a mask. The same is true for the testcase with the invariant condition. This causes us to select the wrong vector type here. The "easiest" might be to look at how COND_EXPR is handled in vect_recog_bool_pattern and friends and handle .COND_* IFNs the same for the mask operand. Richard. > Regards > Robin > > gcc/ChangeLog: > > PR middle-end/112406 > > * tree-vect-stmts.cc (vect_get_vec_defs_for_operand): Handle > masks of conditional ops. > > gcc/testsuite/ChangeLog: > > * gcc.dg/pr112406.c: New test. > --- > gcc/testsuite/gcc.dg/pr112406.c | 37 + > gcc/tree-vect-stmts.cc | 20 +- > 2 files changed, 56 insertions(+), 1 deletion(-) > create mode 100644 gcc/testsuite/gcc.dg/pr112406.c > > diff --git a/gcc/testsuite/gcc.dg/pr112406.c b/gcc/testsuite/gcc.dg/pr112406.c > new file mode 100644 > index 000..46459c68c4a > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/pr112406.c > @@ -0,0 +1,37 @@ > +/* { dg-do compile { target { aarch64*-*-* } } } */ > +/* { dg-options "-march=armv8-a+sve -w -Ofast" } */ > + > +typedef struct { > + int red > +} MagickPixelPacket; > + > +GetImageChannelMoments_image, GetImageChannelMoments_image_0, > +GetImageChannelMoments___trans_tmp_1, GetImageChannelMoments_M11_0, > +GetImageChannelMoments_pixel_3, GetImageChannelMoments_y, > +GetImageChannelMoments_p; > + > +double GetImageChannelMoments_M00_0, GetImageChannelMoments_M00_1, > +GetImageChannelMoments_M01_1; > + > +MagickPixelPacket GetImageChannelMoments_pixel; > + > +SetMagickPixelPacket(int color, MagickPixelPacket *pixel) { > + pixel->red = color; > +} > + > +GetImageChannelMoments() { > + for (; GetImageChannelMoments_y; GetImageChannelMoments_y++) { > +SetMagickPixelPacket(GetImageChannelMoments_p, > + &GetImageChannelMoments_pixel); > +GetImageChannelMoments_M00_1 += GetImageChannelMoments_pixel.red; > +if (GetImageChannelMoments_image) > + GetImageChannelMoments_M00_1++; > +GetImageChannelMoments_M01_1 += > +GetImageChannelMoments_y * GetImageChannelMoments_pixel_3; > +if (GetImageChannelMoments_image_0) > + GetImageChannelMoments_M00_0++; > +GetImageChannelMoments_M01_1 += > +GetImageChannelMoments_y * GetImageChannelMoments_p++; > + } > + GetImageChannelMoments___trans_tmp_1 = atan(GetImageChannelMoments_M11_0); > +} > diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc > index 65883e04ad7..6793b01bf44 100644 > --- a/gcc/tree-vect-stmts.cc > +++ b/gcc/tree-vect-stmts.cc > @@ -1238,10 +1238,28 @@ vect_get_vec_defs_for_operand (vec_info *vinfo, > stmt_vec_info stmt_vinfo, >tree stmt_vectype = STMT_VINFO_VECTYPE (stmt_vinfo); >tree vector_type; > > + /* For a COND_OP the mask operand's type must not be deduced from the > +scalar type but from the statement's vectype. */ > + bool use_stmt_vectype = false; > + gcall *call; > + if ((call = dyn_cast (STMT_VINFO_STMT (stmt_vinfo))) > + && gimple_call_internal_p (call)) > + { > + internal_fn ifn = gimple_call_internal_fn (call); > + int mask_idx = -1; > + if (ifn != IFN_LAST > + && (mask_idx = internal_fn_mask_index (ifn)) != -1) > + { > + tree maskop = gimple_call_arg (call, mask_idx); > + if (op == maskop) > + use_stmt_vectype = true; > + } > + } > + >if (vectype) > vector_type = vectype; >else if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op)) > - && VECTOR_BOOLEAN_TYPE_P (stmt_vectype)) > + && (use_stmt_vectype || VECTOR_BOOLEAN_TYPE_P (stmt_vectype))) > vector_type = truth_type_for (stmt_vectype); >else > vector_type = get_vectype_for_scalar_type (loop_vinfo, TREE_TYPE > (op)); > -- > 2.41.0
[PATCH] vect: Use statement vectype for conditional mask.
Hi, as Tamar reported in PR112406 we still ICE on aarch64 in SPEC2017 when creating COND_OPs in ifcvt. The problem is that we fail to deduce the mask's type from the statement vectype and then end up with a non-matching mask in expand. This patch checks if the current op is equal to the mask operand and, if so, uses the truth type from the stmt_vectype. Is that a valid approach? Bootstrapped and regtested on aarch64, x86 is running. Besides, the testcase is Tamar's reduced example, originally from SPEC. I hope it's ok to include it as is (as imagick is open source anyway). Regards Robin gcc/ChangeLog: PR middle-end/112406 * tree-vect-stmts.cc (vect_get_vec_defs_for_operand): Handle masks of conditional ops. gcc/testsuite/ChangeLog: * gcc.dg/pr112406.c: New test. --- gcc/testsuite/gcc.dg/pr112406.c | 37 + gcc/tree-vect-stmts.cc | 20 +- 2 files changed, 56 insertions(+), 1 deletion(-) create mode 100644 gcc/testsuite/gcc.dg/pr112406.c diff --git a/gcc/testsuite/gcc.dg/pr112406.c b/gcc/testsuite/gcc.dg/pr112406.c new file mode 100644 index 000..46459c68c4a --- /dev/null +++ b/gcc/testsuite/gcc.dg/pr112406.c @@ -0,0 +1,37 @@ +/* { dg-do compile { target { aarch64*-*-* } } } */ +/* { dg-options "-march=armv8-a+sve -w -Ofast" } */ + +typedef struct { + int red +} MagickPixelPacket; + +GetImageChannelMoments_image, GetImageChannelMoments_image_0, +GetImageChannelMoments___trans_tmp_1, GetImageChannelMoments_M11_0, +GetImageChannelMoments_pixel_3, GetImageChannelMoments_y, +GetImageChannelMoments_p; + +double GetImageChannelMoments_M00_0, GetImageChannelMoments_M00_1, +GetImageChannelMoments_M01_1; + +MagickPixelPacket GetImageChannelMoments_pixel; + +SetMagickPixelPacket(int color, MagickPixelPacket *pixel) { + pixel->red = color; +} + +GetImageChannelMoments() { + for (; GetImageChannelMoments_y; GetImageChannelMoments_y++) { +SetMagickPixelPacket(GetImageChannelMoments_p, + &GetImageChannelMoments_pixel); +GetImageChannelMoments_M00_1 += GetImageChannelMoments_pixel.red; +if (GetImageChannelMoments_image) + GetImageChannelMoments_M00_1++; +GetImageChannelMoments_M01_1 += +GetImageChannelMoments_y * GetImageChannelMoments_pixel_3; +if (GetImageChannelMoments_image_0) + GetImageChannelMoments_M00_0++; +GetImageChannelMoments_M01_1 += +GetImageChannelMoments_y * GetImageChannelMoments_p++; + } + GetImageChannelMoments___trans_tmp_1 = atan(GetImageChannelMoments_M11_0); +} diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc index 65883e04ad7..6793b01bf44 100644 --- a/gcc/tree-vect-stmts.cc +++ b/gcc/tree-vect-stmts.cc @@ -1238,10 +1238,28 @@ vect_get_vec_defs_for_operand (vec_info *vinfo, stmt_vec_info stmt_vinfo, tree stmt_vectype = STMT_VINFO_VECTYPE (stmt_vinfo); tree vector_type; + /* For a COND_OP the mask operand's type must not be deduced from the +scalar type but from the statement's vectype. */ + bool use_stmt_vectype = false; + gcall *call; + if ((call = dyn_cast (STMT_VINFO_STMT (stmt_vinfo))) + && gimple_call_internal_p (call)) + { + internal_fn ifn = gimple_call_internal_fn (call); + int mask_idx = -1; + if (ifn != IFN_LAST + && (mask_idx = internal_fn_mask_index (ifn)) != -1) + { + tree maskop = gimple_call_arg (call, mask_idx); + if (op == maskop) + use_stmt_vectype = true; + } + } + if (vectype) vector_type = vectype; else if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op)) - && VECTOR_BOOLEAN_TYPE_P (stmt_vectype)) + && (use_stmt_vectype || VECTOR_BOOLEAN_TYPE_P (stmt_vectype))) vector_type = truth_type_for (stmt_vectype); else vector_type = get_vectype_for_scalar_type (loop_vinfo, TREE_TYPE (op)); -- 2.41.0