> On Tue, 10 Feb 2026, Juergen Christ wrote:
>
> > When load/store with length is used and only QImode versions are
> > available, vectorizable_live_operation produces wrong results for
> > VEC_EXTRACT. Provide a flag to vect_get_loop_len to specify if
> > bias-adjusted length should be used or not.
> >
> > PR tree-optimization/122297
> >
> > gcc/ChangeLog:
> >
> > * tree-vect-loop.cc (vectorize_fold_left_reduction): Adjust.
> > (vectorizable_induction): Adjust.
> > (vectorizable_live_operation_1): Adjust.
> > (vect_get_loop_len): Provide parameter to select bias-adjusted
> > length.
> > (vect_gen_loop_len_mask): Adjust.
> > (vect_update_ivs_after_vectorizer_for_early_breaks): Adjust.
> > * tree-vect-stmts.cc (vect_get_strided_load_store_ops): Adjust.
> > (vectorizable_call): Adjust.
> > (vectorizable_operation): Adjust.
> > (vectorizable_store): Adjust.
> > (vectorizable_load): Adjust.
> > (vectorizable_condition): Adjust.
> > * tree-vectorizer.h (vect_get_loop_len): Add parameter.
> >
> > gcc/testsuite/ChangeLog:
> >
> > * gcc.dg/vect/nodump-extractlast-1.c: Fix typo.
> > * gcc.dg/vect/nodump-extractlast-2.c: New test.
> >
> > Bootstrapped and regtested on s390, x86, and ppc64le. Ok for trunk?
> >
> > Signed-off-by: Juergen Christ <[email protected]>
> > ---
> > .../gcc.dg/vect/nodump-extractlast-1.c | 2 +-
> > .../gcc.dg/vect/nodump-extractlast-2.c | 23 +++++++
> > gcc/tree-vect-loop.cc | 62 +++++++++++++------
> > gcc/tree-vect-stmts.cc | 24 +++----
> > gcc/tree-vectorizer.h | 2 +-
> > 5 files changed, 82 insertions(+), 31 deletions(-)
> > create mode 100644 gcc/testsuite/gcc.dg/vect/nodump-extractlast-2.c
> >
> > diff --git a/gcc/testsuite/gcc.dg/vect/nodump-extractlast-1.c
> > b/gcc/testsuite/gcc.dg/vect/nodump-extractlast-1.c
> > index 980ac3e42188..83d8a38f13e3 100644
> > --- a/gcc/testsuite/gcc.dg/vect/nodump-extractlast-1.c
> > +++ b/gcc/testsuite/gcc.dg/vect/nodump-extractlast-1.c
> > @@ -1,4 +1,4 @@
> > -/* Check for a bung in the treatment of LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS
> > when
> > +/* Check for a bug in the treatment of LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS
> > when
> > using VEC_EXTRACT. */
> > /* { dg-require-effective-target vect_int } */
> >
> > diff --git a/gcc/testsuite/gcc.dg/vect/nodump-extractlast-2.c
> > b/gcc/testsuite/gcc.dg/vect/nodump-extractlast-2.c
> > new file mode 100644
> > index 000000000000..9697687c1084
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.dg/vect/nodump-extractlast-2.c
> > @@ -0,0 +1,23 @@
> > +/* Check for a bug in the treatment of LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS
> > when
> > + using VEC_EXTRACT. This variant uses .LEN_LOAD which might use QImode
> > + vectors during load, but SImode vectors for the extraction. */
> > +int __attribute__ ((noinline, noclone))
> > +test_int (int *x, int n, int value)
> > +{
> > + int last;
> > + for (int j = 0; j < n; ++j)
> > + {
> > + last = x[j];
> > + x[j] = last * value;
> > + }
> > + return last;
> > +}
> > +
> > +int
> > +main ()
> > +{
> > + int arr[] = {1,2,3,4,5,1};
> > + if (test_int (arr, sizeof (arr) / sizeof (arr[0]), 42) != 1)
> > + __builtin_abort();
> > + return 0;
> > +}
> > diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
> > index 8e60a433596f..8e40f0ceb93b 100644
> > --- a/gcc/tree-vect-loop.cc
> > +++ b/gcc/tree-vect-loop.cc
> > @@ -6503,8 +6503,11 @@ vectorize_fold_left_reduction (loop_vec_info
> > loop_vinfo,
> > mask = vec_opmask[i];
> > if (LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo))
> > {
> > + /* ??? Why do we use LOAD_STORE_BIAS here when we do not do a load or
> > + a store? Shouldn't we instead just ensure that LEN represents the
> > + number of elements in the vector? */
>
> Yes. I think the comment is unwarranted.
Ups. Sorry, that comment should NOT be part of the patch. I will remove it.
>
> > len = vect_get_loop_len (loop_vinfo, gsi, lens, vec_num, vectype_in,
> > - i, 1);
> > + i, 1, false);
> > signed char biasval = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
> > bias = build_int_cst (intQI_type_node, biasval);
> > if (!is_cond_op)
> > @@ -9885,7 +9888,7 @@ vectorizable_induction (loop_vec_info loop_vinfo,
> > _21 = vect_vec_iv_.6_22 + vect_cst__22; */
> > vec_loop_lens *lens = &LOOP_VINFO_LENS (loop_vinfo);
> > tree len = vect_get_loop_len (loop_vinfo, NULL, lens, 1,
> > - vectype, 0, 0);
> > + vectype, 0, 0, false);
> > if (SCALAR_FLOAT_TYPE_P (stept))
> > expr = gimple_build (&stmts, FLOAT_EXPR, stept, len);
> > else
> > @@ -10032,7 +10035,7 @@ vectorizable_live_operation_1 (loop_vec_info
> > loop_vinfo, basic_block exit_bb,
> > {
> > /* Emit:
> >
> > - SCALAR_RES = VEC_EXTRACT <VEC_LHS, LEN - (BIAS + 1)>
> > + SCALAR_RES = VEC_EXTRACT <VEC_LHS, LEN - 1>
> >
> > where VEC_LHS is the vectorized live-out result, LEN is the length of
> > the vector, BIAS is the load-store bias. The bias should not be used
> > @@ -10043,21 +10046,14 @@ vectorizable_live_operation_1 (loop_vec_info
> > loop_vinfo, basic_block exit_bb,
> > gimple_stmt_iterator gsi = gsi_last (tem);
> > tree len = vect_get_loop_len (loop_vinfo, &gsi,
> > &LOOP_VINFO_LENS (loop_vinfo),
> > - 1, vectype, 0, 1);
> > + 1, vectype, 0, 1, false);
> > gimple_seq_add_seq (&stmts, tem);
> >
> > - /* BIAS + 1. */
> > - signed char biasval = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS
> > (loop_vinfo);
> > - tree bias_plus_one
> > - = int_const_binop (PLUS_EXPR,
> > - build_int_cst (TREE_TYPE (len), biasval),
> > - build_one_cst (TREE_TYPE (len)));
> > -
> > - /* LAST_INDEX = LEN - (BIAS + 1). */
> > + /* LAST_INDEX = LEN - 1. */
> > tree last_index = gimple_build (&stmts, MINUS_EXPR, TREE_TYPE (len),
> > - len, bias_plus_one);
> > + len, build_one_cst (TREE_TYPE (len)));
> >
> > - /* SCALAR_RES = VEC_EXTRACT <VEC_LHS, LEN - (BIAS + 1)>. */
> > + /* SCALAR_RES = VEC_EXTRACT <VEC_LHS, LEN - 1>. */
>
> So VEC_EXTRACT isn't affected by bias? I'll note that on all the
> .COND_LEN_* internal functions we have length and bias, so whether
> it is applied or not is in the end subject to target consideration.
> That possibly means VEC_EXTRACT would need to take a bias argument?
>
> I don't quite remember why we have both bias parameters but also
> appear to pass len adjusted by bias already. Huh.
>
> The patch looks OK, but it would be nice to have some overall
> documentation on this length + bias thing somewhere. IMO this
> belongs to md.texi, but there's a PR about the patterns being
> an unordered mess without structure. Possibly amending the
> first paragraph is an option, short of subdividing the
> standard pattern name listing into logical parts.
Do you want this to be part of this patch? Please tell me how to
continue with this patch (we need a fix for this problem since also
glibc does show wrong code when compiled with -O3 and this patch fixed
it).
Juergen
>
> Thanks,
> Richard.
>
> > tree scalar_res
> > = gimple_build (&stmts, CFN_VEC_EXTRACT, TREE_TYPE (vectype),
> > vec_lhs_phi, last_index);
> > @@ -10678,7 +10674,7 @@ vect_record_loop_len (loop_vec_info loop_vinfo,
> > vec_loop_lens *lens,
> > tree
> > vect_get_loop_len (loop_vec_info loop_vinfo, gimple_stmt_iterator *gsi,
> > vec_loop_lens *lens, unsigned int nvectors, tree vectype,
> > - unsigned int index, unsigned int factor)
> > + unsigned int index, unsigned int factor, bool adjusted)
> > {
> > rgroup_controls *rgl = &(*lens)[nvectors - 1];
> > bool use_bias_adjusted_len =
> > @@ -10711,7 +10707,7 @@ vect_get_loop_len (loop_vec_info loop_vinfo,
> > gimple_stmt_iterator *gsi,
> > }
> > }
> >
> > - if (use_bias_adjusted_len)
> > + if (use_bias_adjusted_len && adjusted)
> > return rgl->bias_adjusted_ctrl;
> >
> > tree loop_len = rgl->controls[index];
> > @@ -10734,6 +10730,36 @@ vect_get_loop_len (loop_vec_info loop_vinfo,
> > gimple_stmt_iterator *gsi,
> > gsi_insert_seq_before (gsi, seq, GSI_SAME_STMT);
> > }
> > }
> > + else if (factor && rgl->factor != factor)
> > + {
> > + /* The number of scalars per iteration, scalar occupied bytes and
> > + the number of vectors are both compile-time constants. */
> > + unsigned int nscalars_per_iter
> > + = exact_div (nvectors * TYPE_VECTOR_SUBPARTS (vectype),
> > + LOOP_VINFO_VECT_FACTOR (loop_vinfo)).to_constant ();
> > + unsigned int rglvecsize = rgl->factor * rgl->max_nscalars_per_iter;
> > + unsigned int vecsize = nscalars_per_iter * factor;
> > + if (rglvecsize > vecsize)
> > + {
> > + unsigned int fac = rglvecsize / vecsize;
> > + tree iv_type = LOOP_VINFO_RGROUP_IV_TYPE (loop_vinfo);
> > + gimple_seq seq = NULL;
> > + loop_len = gimple_build (&seq, EXACT_DIV_EXPR, iv_type, loop_len,
> > + build_int_cst (iv_type, fac));
> > + if (seq)
> > + gsi_insert_seq_before (gsi, seq, GSI_SAME_STMT);
> > + }
> > + else if (rglvecsize < vecsize)
> > + {
> > + unsigned int fac = vecsize / rglvecsize;
> > + tree iv_type = LOOP_VINFO_RGROUP_IV_TYPE (loop_vinfo);
> > + gimple_seq seq = NULL;
> > + loop_len = gimple_build (&seq, MULT_EXPR, iv_type, loop_len,
> > + build_int_cst (iv_type, fac));
> > + if (seq)
> > + gsi_insert_seq_before (gsi, seq, GSI_SAME_STMT);
> > + }
> > + }
> > return loop_len;
> > }
> >
> > @@ -10751,7 +10777,7 @@ vect_gen_loop_len_mask (loop_vec_info loop_vinfo,
> > gimple_stmt_iterator *gsi,
> > tree all_one_mask = build_all_ones_cst (vectype);
> > tree all_zero_mask = build_zero_cst (vectype);
> > tree len = vect_get_loop_len (loop_vinfo, gsi, lens, nvectors, vectype,
> > index,
> > - factor);
> > + factor, true);
> > tree bias = build_int_cst (intQI_type_node,
> > LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo));
> > tree len_mask = make_temp_ssa_name (TREE_TYPE (stmt), NULL,
> > "vec_len_mask");
> > @@ -11022,7 +11048,7 @@ vect_update_ivs_after_vectorizer_for_early_breaks
> > (loop_vec_info loop_vinfo)
> > {
> > vec_loop_lens *lens = &LOOP_VINFO_LENS (loop_vinfo);
> > tree_vf = vect_get_loop_len (loop_vinfo, NULL, lens, 1,
> > - NULL_TREE, 0, 0);
> > + NULL_TREE, 0, 0, true);
> > }
> >
> > tree iter_var;
> > diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
> > index ed2486451ca0..a97f7ec07c4f 100644
> > --- a/gcc/tree-vect-stmts.cc
> > +++ b/gcc/tree-vect-stmts.cc
> > @@ -3182,7 +3182,7 @@ vect_get_strided_load_store_ops (stmt_vec_info
> > stmt_info, slp_tree node,
> > .MASK_LEN_SCATTER_STORE (vectp_a.9_7, ... );
> > vectp_a.9_26 = vectp_a.9_7 + ivtmp_8; */
> > tree loop_len
> > - = vect_get_loop_len (loop_vinfo, gsi, loop_lens, 1, vectype, 0, 0);
> > + = vect_get_loop_len (loop_vinfo, gsi, loop_lens, 1, vectype, 0, 0,
> > true);
> > tree tmp
> > = fold_build2 (MULT_EXPR, sizetype,
> > fold_convert (sizetype, unshare_expr (DR_STEP (dr))),
> > @@ -3253,7 +3253,7 @@ vect_get_loop_variant_data_ptr_increment (
> > addr = addr + .SELECT_VL (ARG..) * step;
> > */
> > tree loop_len
> > - = vect_get_loop_len (loop_vinfo, gsi, loop_lens, 1, aggr_type, 0, 0);
> > + = vect_get_loop_len (loop_vinfo, gsi, loop_lens, 1, aggr_type, 0, 0,
> > true);
> > tree len_type = TREE_TYPE (loop_len);
> > /* Since the outcome of .SELECT_VL is element size, we should adjust
> > it into bytesize so that it can be used in address pointer variable
> > @@ -3842,7 +3842,7 @@ vectorizable_call (vec_info *vinfo,
> > {
> > unsigned int vec_num = vec_oprnds0.length ();
> > tree len = vect_get_loop_len (loop_vinfo, gsi, lens,
> > - vec_num, vectype_out, i, 1);
> > + vec_num, vectype_out, i, 1,
> > true);
> > signed char biasval
> > = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
> > tree bias = build_int_cst (intQI_type_node, biasval);
> > @@ -7041,7 +7041,7 @@ vectorizable_operation (vec_info *vinfo,
> > if (len_loop_p)
> > {
> > tree len = vect_get_loop_len (loop_vinfo, gsi, lens,
> > - vec_num, vectype, i, 1);
> > + vec_num, vectype, i, 1, true);
> > signed char biasval
> > = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
> > tree bias = build_int_cst (intQI_type_node, biasval);
> > @@ -8757,7 +8757,7 @@ vectorizable_store (vec_info *vinfo,
> > {
> > if (loop_lens)
> > final_len = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
> > - ncopies, vectype, j, 1);
> > + ncopies, vectype, j, 1, true);
> > else
> > final_len = size_int (TYPE_VECTOR_SUBPARTS (vectype));
> > signed char biasval
> > @@ -8950,7 +8950,7 @@ vectorizable_store (vec_info *vinfo,
> > if (loop_lens)
> > final_len = vect_get_loop_len (loop_vinfo, gsi,
> > loop_lens, num_stmts,
> > - vectype, j, 1);
> > + vectype, j, 1, true);
> > else
> > final_len = size_int (TYPE_VECTOR_SUBPARTS (vectype));
> >
> > @@ -9334,7 +9334,7 @@ vectorizable_store (vec_info *vinfo,
> > unsigned factor
> > = (new_ovmode == vmode) ? 1 : GET_MODE_UNIT_SIZE (vmode);
> > final_len = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
> > - vec_num, vectype, i, factor);
> > + vec_num, vectype, i, factor, true);
> > }
> > else if (final_mask)
> > {
> > @@ -10697,7 +10697,7 @@ vectorizable_load (vec_info *vinfo,
> > {
> > if (loop_lens)
> > final_len = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
> > - ncopies, vectype, j, 1);
> > + ncopies, vectype, j, 1, true);
> > else
> > final_len = size_int (TYPE_VECTOR_SUBPARTS (vectype));
> > signed char biasval
> > @@ -10900,7 +10900,7 @@ vectorizable_load (vec_info *vinfo,
> > {
> > if (loop_lens)
> > final_len = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
> > - vec_num, vectype, i, 1);
> > + vec_num, vectype, i, 1,
> > true);
> > else
> > final_len = build_int_cst (sizetype,
> > TYPE_VECTOR_SUBPARTS (vectype));
> > @@ -11352,7 +11352,7 @@ vectorizable_load (vec_info *vinfo,
> > unsigned factor
> > = (new_ovmode == vmode) ? 1 : GET_MODE_UNIT_SIZE (vmode);
> > final_len = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
> > - vec_num, vectype, i, factor);
> > + vec_num, vectype, i, factor,
> > true);
> > }
> > else if (final_mask)
> > {
> > @@ -12429,8 +12429,10 @@ vectorizable_condition (vec_info *vinfo,
> > {
> > if (lens)
> > {
> > + /* ??? Do we really want the adjusted LEN here? Isn't this
> > + based on number of elements? */
> > len = vect_get_loop_len (loop_vinfo, gsi, lens,
> > - vec_num, vectype, i, 1);
> > + vec_num, vectype, i, 1, true);
> > signed char biasval
> > = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
> > bias = build_int_cst (intQI_type_node, biasval);
> > diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
> > index 7a38d4969cf2..2116507b69ce 100644
> > --- a/gcc/tree-vectorizer.h
> > +++ b/gcc/tree-vectorizer.h
> > @@ -2673,7 +2673,7 @@ extern void vect_record_loop_len (loop_vec_info,
> > vec_loop_lens *, unsigned int,
> > tree, unsigned int);
> > extern tree vect_get_loop_len (loop_vec_info, gimple_stmt_iterator *,
> > vec_loop_lens *, unsigned int, tree,
> > - unsigned int, unsigned int);
> > + unsigned int, unsigned int, bool);
> > extern tree vect_gen_loop_len_mask (loop_vec_info, gimple_stmt_iterator *,
> > gimple_stmt_iterator *, vec_loop_lens *,
> > unsigned int, tree, tree, unsigned int,
> >
>
> --
> Richard Biener <[email protected]>
> SUSE Software Solutions Germany GmbH,
> Frankenstrasse 146, 90461 Nuernberg, Germany;
> GF: Jochen Jaser, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)