When recording the mask for loop masking of OMP SIMD calls we
currently fail to provide the correct vector type and number of
copies in all cases. The following tries to correct this.
Bootstrapped and tested on x86_64-unknown-linux-gnu, pushed.
PR tree-optimization/122736
* tree-vect-stmts.cc (vectorizable_simd_clone_call): Compute
num_mask_args for all mask modes. Pass the mask vector
type to vect_record_loop_mask and adjust ncopies according
to the number of mask arguments.
* gcc.target/i386/vect-pr122736.c: New testcase.
---
gcc/testsuite/gcc.target/i386/vect-pr122736.c | 22 +++++++++++++++
gcc/tree-vect-stmts.cc | 27 +++++++++++++------
2 files changed, 41 insertions(+), 8 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/i386/vect-pr122736.c
diff --git a/gcc/testsuite/gcc.target/i386/vect-pr122736.c
b/gcc/testsuite/gcc.target/i386/vect-pr122736.c
new file mode 100644
index 00000000000..2719a52d106
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/vect-pr122736.c
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fopenmp-simd -mavx512vl" } */
+
+#pragma omp declare simd
+double __attribute__((noinline))
+baz (double x)
+{
+ return x;
+}
+
+#pragma omp declare simd
+double
+foo (double d)
+{
+ return baz (d);
+}
+
+double __attribute__((noipa))
+fn (double x)
+{
+ return foo (x);
+}
diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index bc15ac9d085..0c23a9f23e2 100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -4333,10 +4333,11 @@ vectorizable_simd_clone_call (vec_info *vinfo,
stmt_vec_info stmt_info,
return false;
unsigned int num_mask_args = 0;
- if (SCALAR_INT_MODE_P (bestn->simdclone->mask_mode))
- for (i = 0; i < nargs; i++)
- if (bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_MASK)
- num_mask_args++;
+ for (i = 0; i < bestn->simdclone->nargs; i++)
+ if (bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_MASK)
+ num_mask_args++;
+ if (!SCALAR_INT_MODE_P (bestn->simdclone->mask_mode))
+ gcc_assert (num_mask_args <= 1);
for (i = 0; i < nargs; i++)
{
@@ -4483,10 +4484,20 @@ vectorizable_simd_clone_call (vec_info *vinfo,
stmt_vec_info stmt_info,
case SIMD_CLONE_ARG_TYPE_MASK:
if (loop_vinfo
&& LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
- vect_record_loop_mask (loop_vinfo,
- &LOOP_VINFO_MASKS (loop_vinfo),
- ncopies, vectype, op);
-
+ {
+ tree arg_vectype;
+ if (SCALAR_INT_MODE_P
+ (TYPE_MODE (bestn->simdclone->args[i].vector_type)))
+ arg_vectype = build_truth_vector_type_for_mode
+ (exact_div (bestn->simdclone->simdlen, num_mask_args),
+ TYPE_MODE (bestn->simdclone->args[i].vector_type));
+ else
+ arg_vectype = bestn->simdclone->args[i].vector_type;
+ vect_record_loop_mask (loop_vinfo,
+ &LOOP_VINFO_MASKS (loop_vinfo),
+ ncopies * num_mask_args, arg_vectype,
+ op);
+ }
break;
}
}
--
2.51.0