The testcase shows that we end up with (len-)masking a call even
when not semantically necessary. The following aligns the condition
to apply len-masking with the condition to apply loop-masking, adjusting
downstream conditions to look at the chosen ifn instead of replicating
a possibly complex decision.
Bootstrap and regtest running on x86_64-unknown-linux-gnu.
PR tree-optimization/123755
* tree-vect-stmts.cc (vectorizable_call): Adjust len-masking
condition. Simplify code generation.
* gcc.dg/vect/vect-pr123755-2.c: New testcase.
---
gcc/testsuite/gcc.dg/vect/vect-pr123755-2.c | 10 ++++++++
gcc/tree-vect-stmts.cc | 27 +++++++++------------
2 files changed, 21 insertions(+), 16 deletions(-)
create mode 100644 gcc/testsuite/gcc.dg/vect/vect-pr123755-2.c
diff --git a/gcc/testsuite/gcc.dg/vect/vect-pr123755-2.c
b/gcc/testsuite/gcc.dg/vect/vect-pr123755-2.c
new file mode 100644
index 00000000000..7befef09edc
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-pr123755-2.c
@@ -0,0 +1,10 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-ffast-math" } */
+
+double foo (double *a, double *b, double *c)
+{
+ double result = 0.0;
+ for (int i = 0; i < 1024; ++i)
+ result += i & 1 ? __builtin_fma (a[i], b[i], c[i]) : 0.0;
+ return result;
+}
diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index ba2e4633258..e7d6b4c123a 100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -3700,7 +3700,6 @@ vectorizable_call (vec_info *vinfo,
internal_fn cond_fn = (internal_fn_mask_index (ifn) != -1
? ifn : get_conditional_internal_fn (ifn));
internal_fn cond_len_fn = get_len_internal_fn (cond_fn);
- int len_opno = internal_fn_len_index (cond_len_fn);
vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL);
vec_loop_lens *lens = (loop_vinfo ? &LOOP_VINFO_LENS (loop_vinfo) : NULL);
unsigned int nvectors = vect_get_num_copies (vinfo, slp_node);
@@ -3768,26 +3767,22 @@ vectorizable_call (vec_info *vinfo,
bool masked_loop_p = loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
bool len_loop_p = loop_vinfo && LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo);
unsigned int vect_nargs = nargs;
- if (len_loop_p)
+ if (len_loop_p && (reduc_idx >= 0 || could_trap || mask_opno >= 0))
{
- if (len_opno >= 0)
- {
- ifn = cond_len_fn;
- /* COND_* -> COND_LEN_* takes 2 extra arguments:LEN,BIAS. */
- vect_nargs += 2;
- /* But unless there's a mask argument already we need that
- as well, and an else value. */
- if (mask_opno == -1)
- vect_nargs += 2;
- }
- else if (reduc_idx >= 0)
- gcc_unreachable ();
+ ifn = cond_len_fn;
+ /* COND_* -> COND_LEN_* takes 2 extra arguments:LEN,BIAS. */
+ vect_nargs += 2;
+ /* But unless there's a mask argument already we need that
+ as well, and an else value. */
+ if (mask_opno == -1)
+ vect_nargs += 2;
}
else if (masked_loop_p && mask_opno == -1 && (reduc_idx >= 0 || could_trap))
{
ifn = cond_fn;
vect_nargs += 2;
}
+ int len_opno = internal_fn_len_index (ifn);
if (clz_ctz_arg1)
++vect_nargs;
@@ -3828,7 +3823,7 @@ vectorizable_call (vec_info *vinfo,
int varg = 0;
/* Add the mask if necessary. */
if ((masked_loop_p || len_loop_p) && mask_opno == -1
- && (reduc_idx >= 0 || could_trap))
+ && internal_fn_mask_index (ifn) != -1)
{
gcc_assert (internal_fn_mask_index (ifn) == varg);
if (masked_loop_p)
@@ -3854,7 +3849,7 @@ vectorizable_call (vec_info *vinfo,
}
/* Add the else value if necessary. */
if ((masked_loop_p || len_loop_p) && mask_opno == -1
- && (reduc_idx >= 0 || could_trap))
+ && internal_fn_else_index (ifn) != -1)
{
gcc_assert (internal_fn_else_index (ifn) == varg);
if (reduc_idx >= 0)
--
2.51.0