> On Tue, 10 Feb 2026, Tamar Christina wrote:
>
> > > -----Original Message-----
> > > From: Richard Biener <[email protected]>
> > > Sent: 10 February 2026 09:49
> > > To: Juergen Christ <[email protected]>
> > > Cc: [email protected]; Tamar Christina <[email protected]>;
> > > [email protected]; [email protected]
> > > Subject: Re: [PATCH] tree-optimization/122297 - fix load/store bias
> > > handling
> > >
> > > On Tue, 10 Feb 2026, Juergen Christ wrote:
> > >
> > > > When load/store with length is used and only QImode versions are
> > > > available, vectorizable_live_operation produces wrong results for
> > > > VEC_EXTRACT. Provide a flag to vect_get_loop_len to specify if
> > > > bias-adjusted length should be used or not.
> > > >
> > > > PR tree-optimization/122297
> > > >
> > > > gcc/ChangeLog:
> > > >
> > > > * tree-vect-loop.cc (vectorize_fold_left_reduction): Adjust.
> > > > (vectorizable_induction): Adjust.
> > > > (vectorizable_live_operation_1): Adjust.
> > > > (vect_get_loop_len): Provide parameter to select bias-adjusted
> > > > length.
> > > > (vect_gen_loop_len_mask): Adjust.
> > > > (vect_update_ivs_after_vectorizer_for_early_breaks): Adjust.
> > > > * tree-vect-stmts.cc (vect_get_strided_load_store_ops): Adjust.
> > > > (vectorizable_call): Adjust.
> > > > (vectorizable_operation): Adjust.
> > > > (vectorizable_store): Adjust.
> > > > (vectorizable_load): Adjust.
> > > > (vectorizable_condition): Adjust.
> > > > * tree-vectorizer.h (vect_get_loop_len): Add parameter.
> > > >
> > > > gcc/testsuite/ChangeLog:
> > > >
> > > > * gcc.dg/vect/nodump-extractlast-1.c: Fix typo.
> > > > * gcc.dg/vect/nodump-extractlast-2.c: New test.
> > > >
> > > > Bootstrapped and regtested on s390, x86, and ppc64le. Ok for trunk?
> > > >
> > > > Signed-off-by: Juergen Christ <[email protected]>
> > > > ---
> > > > .../gcc.dg/vect/nodump-extractlast-1.c | 2 +-
> > > > .../gcc.dg/vect/nodump-extractlast-2.c | 23 +++++++
> > > > gcc/tree-vect-loop.cc | 62 +++++++++++++------
> > > > gcc/tree-vect-stmts.cc | 24 +++----
> > > > gcc/tree-vectorizer.h | 2 +-
> > > > 5 files changed, 82 insertions(+), 31 deletions(-)
> > > > create mode 100644 gcc/testsuite/gcc.dg/vect/nodump-extractlast-2.c
> > > >
> > > > diff --git a/gcc/testsuite/gcc.dg/vect/nodump-extractlast-1.c
> > > b/gcc/testsuite/gcc.dg/vect/nodump-extractlast-1.c
> > > > index 980ac3e42188..83d8a38f13e3 100644
> > > > --- a/gcc/testsuite/gcc.dg/vect/nodump-extractlast-1.c
> > > > +++ b/gcc/testsuite/gcc.dg/vect/nodump-extractlast-1.c
> > > > @@ -1,4 +1,4 @@
> > > > -/* Check for a bung in the treatment of
> > > LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS when
> > > > +/* Check for a bug in the treatment of
> > > LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS when
> > > > using VEC_EXTRACT. */
> > > > /* { dg-require-effective-target vect_int } */
> > > >
> > > > diff --git a/gcc/testsuite/gcc.dg/vect/nodump-extractlast-2.c
> > > b/gcc/testsuite/gcc.dg/vect/nodump-extractlast-2.c
> > > > new file mode 100644
> > > > index 000000000000..9697687c1084
> > > > --- /dev/null
> > > > +++ b/gcc/testsuite/gcc.dg/vect/nodump-extractlast-2.c
> > > > @@ -0,0 +1,23 @@
> > > > +/* Check for a bug in the treatment of
> > > LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS when
> > > > + using VEC_EXTRACT. This variant uses .LEN_LOAD which might use
> > > QImode
> > > > + vectors during load, but SImode vectors for the extraction. */
> > > > +int __attribute__ ((noinline, noclone))
> > > > +test_int (int *x, int n, int value)
> > > > +{
> > > > + int last;
> > > > + for (int j = 0; j < n; ++j)
> > > > + {
> > > > + last = x[j];
> > > > + x[j] = last * value;
> > > > + }
> > > > + return last;
> > > > +}
> > > > +
> > > > +int
> > > > +main ()
> > > > +{
> > > > + int arr[] = {1,2,3,4,5,1};
> > > > + if (test_int (arr, sizeof (arr) / sizeof (arr[0]), 42) != 1)
> > > > + __builtin_abort();
> > > > + return 0;
> > > > +}
> > > > diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
> > > > index 8e60a433596f..8e40f0ceb93b 100644
> > > > --- a/gcc/tree-vect-loop.cc
> > > > +++ b/gcc/tree-vect-loop.cc
> > > > @@ -6503,8 +6503,11 @@ vectorize_fold_left_reduction (loop_vec_info
> > > loop_vinfo,
> > > > mask = vec_opmask[i];
> > > > if (LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo))
> > > > {
> > > > + /* ??? Why do we use LOAD_STORE_BIAS here when we do not do a
> > > load or
> > > > + a store? Shouldn't we instead just ensure that LEN
> > > > represents the
> > > > + number of elements in the vector? */
> > >
> > > Yes. I think the comment is unwarranted.
> > >
> > > > len = vect_get_loop_len (loop_vinfo, gsi, lens, vec_num,
> > > > vectype_in,
> > > > - i, 1);
> > > > + i, 1, false);
> > > > signed char biasval = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS
> > > (loop_vinfo);
> > > > bias = build_int_cst (intQI_type_node, biasval);
> > > > if (!is_cond_op)
> > > > @@ -9885,7 +9888,7 @@ vectorizable_induction (loop_vec_info
> > > loop_vinfo,
> > > > _21 = vect_vec_iv_.6_22 + vect_cst__22; */
> > > > vec_loop_lens *lens = &LOOP_VINFO_LENS (loop_vinfo);
> > > > tree len = vect_get_loop_len (loop_vinfo, NULL, lens, 1,
> > > > - vectype, 0, 0);
> > > > + vectype, 0, 0, false);
> > > > if (SCALAR_FLOAT_TYPE_P (stept))
> > > > expr = gimple_build (&stmts, FLOAT_EXPR, stept, len);
> > > > else
> > > > @@ -10032,7 +10035,7 @@ vectorizable_live_operation_1 (loop_vec_info
> > > loop_vinfo, basic_block exit_bb,
> > > > {
> > > > /* Emit:
> > > >
> > > > - SCALAR_RES = VEC_EXTRACT <VEC_LHS, LEN - (BIAS + 1)>
> > > > + SCALAR_RES = VEC_EXTRACT <VEC_LHS, LEN - 1>
> > > >
> > > > where VEC_LHS is the vectorized live-out result, LEN is the
> > > > length of
> > > > the vector, BIAS is the load-store bias. The bias should not
> > > > be used
> > > > @@ -10043,21 +10046,14 @@ vectorizable_live_operation_1
> > > (loop_vec_info loop_vinfo, basic_block exit_bb,
> > > > gimple_stmt_iterator gsi = gsi_last (tem);
> > > > tree len = vect_get_loop_len (loop_vinfo, &gsi,
> > > > &LOOP_VINFO_LENS (loop_vinfo),
> > > > - 1, vectype, 0, 1);
> > > > + 1, vectype, 0, 1, false);
> > > > gimple_seq_add_seq (&stmts, tem);
> > > >
> > > > - /* BIAS + 1. */
> > > > - signed char biasval = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS
> > > (loop_vinfo);
> > > > - tree bias_plus_one
> > > > - = int_const_binop (PLUS_EXPR,
> > > > - build_int_cst (TREE_TYPE (len), biasval),
> > > > - build_one_cst (TREE_TYPE (len)));
> > > > -
> > > > - /* LAST_INDEX = LEN - (BIAS + 1). */
> > > > + /* LAST_INDEX = LEN - 1. */
> > > > tree last_index = gimple_build (&stmts, MINUS_EXPR, TREE_TYPE
> > > > (len),
> > > > - len, bias_plus_one);
> > > > + len, build_one_cst (TREE_TYPE
> > > > (len)));
> > > >
> > > > - /* SCALAR_RES = VEC_EXTRACT <VEC_LHS, LEN - (BIAS + 1)>. */
> > > > + /* SCALAR_RES = VEC_EXTRACT <VEC_LHS, LEN - 1>. */
> > >
> > > So VEC_EXTRACT isn't affected by bias? I'll note that on all the
> > > .COND_LEN_* internal functions we have length and bias, so whether
> > > it is applied or not is in the end subject to target consideration.
> > > That possibly means VEC_EXTRACT would need to take a bias argument?
> > >
> > > I don't quite remember why we have both bias parameters but also
> > > appear to pass len adjusted by bias already. Huh.
> > >
> > > The patch looks OK, but it would be nice to have some overall
> > > documentation on this length + bias thing somewhere. IMO this
> > > belongs to md.texi, but there's a PR about the patterns being
> > > an unordered mess without structure. Possibly amending the
> > > first paragraph is an option, short of subdividing the
> > > standard pattern name listing into logical parts.
> >
> > I've been trying to understand this myself, so lets see if my current
> > understanding is in the right direction. My understanding is that on some
> > targets len == 0 isn't a valid option, and so for those target a bias is
> > applied
> > such that the minimum len is biased so the operation is valid.
> >
> > At least that's my naïve understanding. How far off am I Robin?
>
> What adds to my confusion is that we pass down both 'len' and 'bias'
> but (IIRC for optimization reasons) apply the bias already to 'len'?
>
> So I thought vect_get_loop_len would always return biased len and
> to arrive at the actual "len" we have to un-apply the bias?
>
All of that was obviously introduced before my time, but from an s390
perspective, it looks to me it is supposed to solve this problem:
On s390, we specify the last byte index to load. Hence, len == 0
means we load 1 byte (byte index 0 in the vector). It is not possible
on s390 to have a vector load with length that loads nothing. To
solve this, we set bias == -1 for load/store with length operations.
But it is only needed for load/store with length operations. All
other operations (especially VEC_EXTRACT) do not have any bias. Also,
other operations where we already have a vector loaded do not need any
bias. From the naming in the code (e.g.,
LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS) it also suggests to only be used
during load/store of partial vectors. All other operations should not
be affected by this.
Now, bias handling is only part of this patch. The other part somehow
also only appeared on s390. Here, we get wrong code since we register
a length for a load with length. On s390, this creates a length in
bytes. This length is then later used during a VEC_EXTRACT where it
is interpreted as a length in SImode. So the previous code path could
not correctly adjust the length. It is not sufficient to simply undo
the bias handling. The length also has to be scaled from QImode to
SImode. This is why I added the additional parameter to tell
vect_get_loop_len if the operation is a load/store operation and,
thus, should apply the bias (including the vector mode used for
load/store), or if it should not apply the bias and scale to the
desired vector mode.
Juergen
> Richard.
>
> > Thanks,
> > Tamar
> >
> > >
> > > Thanks,
> > > Richard.
> > >
> > > > tree scalar_res
> > > > = gimple_build (&stmts, CFN_VEC_EXTRACT, TREE_TYPE (vectype),
> > > > vec_lhs_phi, last_index);
> > > > @@ -10678,7 +10674,7 @@ vect_record_loop_len (loop_vec_info
> > > loop_vinfo, vec_loop_lens *lens,
> > > > tree
> > > > vect_get_loop_len (loop_vec_info loop_vinfo, gimple_stmt_iterator *gsi,
> > > > vec_loop_lens *lens, unsigned int nvectors, tree
> > > > vectype,
> > > > - unsigned int index, unsigned int factor)
> > > > + unsigned int index, unsigned int factor, bool
> > > > adjusted)
> > > > {
> > > > rgroup_controls *rgl = &(*lens)[nvectors - 1];
> > > > bool use_bias_adjusted_len =
> > > > @@ -10711,7 +10707,7 @@ vect_get_loop_len (loop_vec_info loop_vinfo,
> > > gimple_stmt_iterator *gsi,
> > > > }
> > > > }
> > > >
> > > > - if (use_bias_adjusted_len)
> > > > + if (use_bias_adjusted_len && adjusted)
> > > > return rgl->bias_adjusted_ctrl;
> > > >
> > > > tree loop_len = rgl->controls[index];
> > > > @@ -10734,6 +10730,36 @@ vect_get_loop_len (loop_vec_info
> > > loop_vinfo, gimple_stmt_iterator *gsi,
> > > > gsi_insert_seq_before (gsi, seq, GSI_SAME_STMT);
> > > > }
> > > > }
> > > > + else if (factor && rgl->factor != factor)
> > > > + {
> > > > + /* The number of scalars per iteration, scalar occupied bytes and
> > > > + the number of vectors are both compile-time constants. */
> > > > + unsigned int nscalars_per_iter
> > > > + = exact_div (nvectors * TYPE_VECTOR_SUBPARTS (vectype),
> > > > + LOOP_VINFO_VECT_FACTOR (loop_vinfo)).to_constant
> > > > ();
> > > > + unsigned int rglvecsize = rgl->factor *
> > > > rgl->max_nscalars_per_iter;
> > > > + unsigned int vecsize = nscalars_per_iter * factor;
> > > > + if (rglvecsize > vecsize)
> > > > + {
> > > > + unsigned int fac = rglvecsize / vecsize;
> > > > + tree iv_type = LOOP_VINFO_RGROUP_IV_TYPE (loop_vinfo);
> > > > + gimple_seq seq = NULL;
> > > > + loop_len = gimple_build (&seq, EXACT_DIV_EXPR, iv_type,
> > > > loop_len,
> > > > + build_int_cst (iv_type, fac));
> > > > + if (seq)
> > > > + gsi_insert_seq_before (gsi, seq, GSI_SAME_STMT);
> > > > + }
> > > > + else if (rglvecsize < vecsize)
> > > > + {
> > > > + unsigned int fac = vecsize / rglvecsize;
> > > > + tree iv_type = LOOP_VINFO_RGROUP_IV_TYPE (loop_vinfo);
> > > > + gimple_seq seq = NULL;
> > > > + loop_len = gimple_build (&seq, MULT_EXPR, iv_type, loop_len,
> > > > + build_int_cst (iv_type, fac));
> > > > + if (seq)
> > > > + gsi_insert_seq_before (gsi, seq, GSI_SAME_STMT);
> > > > + }
> > > > + }
> > > > return loop_len;
> > > > }
> > > >
> > > > @@ -10751,7 +10777,7 @@ vect_gen_loop_len_mask (loop_vec_info
> > > loop_vinfo, gimple_stmt_iterator *gsi,
> > > > tree all_one_mask = build_all_ones_cst (vectype);
> > > > tree all_zero_mask = build_zero_cst (vectype);
> > > > tree len = vect_get_loop_len (loop_vinfo, gsi, lens, nvectors,
> > > > vectype, index,
> > > > - factor);
> > > > + factor, true);
> > > > tree bias = build_int_cst (intQI_type_node,
> > > > LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS
> > > (loop_vinfo));
> > > > tree len_mask = make_temp_ssa_name (TREE_TYPE (stmt), NULL,
> > > "vec_len_mask");
> > > > @@ -11022,7 +11048,7 @@
> > > vect_update_ivs_after_vectorizer_for_early_breaks (loop_vec_info
> > > loop_vinfo)
> > > > {
> > > > vec_loop_lens *lens = &LOOP_VINFO_LENS (loop_vinfo);
> > > > tree_vf = vect_get_loop_len (loop_vinfo, NULL, lens, 1,
> > > > - NULL_TREE, 0, 0);
> > > > + NULL_TREE, 0, 0, true);
> > > > }
> > > >
> > > > tree iter_var;
> > > > diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
> > > > index ed2486451ca0..a97f7ec07c4f 100644
> > > > --- a/gcc/tree-vect-stmts.cc
> > > > +++ b/gcc/tree-vect-stmts.cc
> > > > @@ -3182,7 +3182,7 @@ vect_get_strided_load_store_ops (stmt_vec_info
> > > stmt_info, slp_tree node,
> > > > .MASK_LEN_SCATTER_STORE (vectp_a.9_7, ... );
> > > > vectp_a.9_26 = vectp_a.9_7 + ivtmp_8; */
> > > > tree loop_len
> > > > - = vect_get_loop_len (loop_vinfo, gsi, loop_lens, 1, vectype, 0,
> > > > 0);
> > > > + = vect_get_loop_len (loop_vinfo, gsi, loop_lens, 1, vectype, 0,
> > > > 0, true);
> > > > tree tmp
> > > > = fold_build2 (MULT_EXPR, sizetype,
> > > > fold_convert (sizetype, unshare_expr (DR_STEP
> > > > (dr))),
> > > > @@ -3253,7 +3253,7 @@ vect_get_loop_variant_data_ptr_increment (
> > > > addr = addr + .SELECT_VL (ARG..) * step;
> > > > */
> > > > tree loop_len
> > > > - = vect_get_loop_len (loop_vinfo, gsi, loop_lens, 1, aggr_type, 0,
> > > > 0);
> > > > + = vect_get_loop_len (loop_vinfo, gsi, loop_lens, 1, aggr_type, 0,
> > > > 0, true);
> > > > tree len_type = TREE_TYPE (loop_len);
> > > > /* Since the outcome of .SELECT_VL is element size, we should adjust
> > > > it into bytesize so that it can be used in address pointer
> > > > variable
> > > > @@ -3842,7 +3842,7 @@ vectorizable_call (vec_info *vinfo,
> > > > {
> > > > unsigned int vec_num = vec_oprnds0.length ();
> > > > tree len = vect_get_loop_len (loop_vinfo, gsi,
> > > > lens,
> > > > - vec_num,
> > > > vectype_out, i,
> > > 1);
> > > > + vec_num,
> > > > vectype_out, i, 1,
> > > true);
> > > > signed char biasval
> > > > = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS
> > > (loop_vinfo);
> > > > tree bias = build_int_cst (intQI_type_node,
> > > > biasval);
> > > > @@ -7041,7 +7041,7 @@ vectorizable_operation (vec_info *vinfo,
> > > > if (len_loop_p)
> > > > {
> > > > tree len = vect_get_loop_len (loop_vinfo, gsi, lens,
> > > > - vec_num, vectype, i, 1);
> > > > + vec_num, vectype, i, 1,
> > > > true);
> > > > signed char biasval
> > > > = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
> > > > tree bias = build_int_cst (intQI_type_node, biasval);
> > > > @@ -8757,7 +8757,7 @@ vectorizable_store (vec_info *vinfo,
> > > > {
> > > > if (loop_lens)
> > > > final_len = vect_get_loop_len (loop_vinfo, gsi,
> > > > loop_lens,
> > > > - ncopies, vectype, j, 1);
> > > > + ncopies, vectype, j, 1,
> > > > true);
> > > > else
> > > > final_len = size_int (TYPE_VECTOR_SUBPARTS (vectype));
> > > > signed char biasval
> > > > @@ -8950,7 +8950,7 @@ vectorizable_store (vec_info *vinfo,
> > > > if (loop_lens)
> > > > final_len = vect_get_loop_len (loop_vinfo, gsi,
> > > > loop_lens, num_stmts,
> > > > - vectype, j, 1);
> > > > + vectype, j, 1, true);
> > > > else
> > > > final_len = size_int (TYPE_VECTOR_SUBPARTS
> > > > (vectype));
> > > >
> > > > @@ -9334,7 +9334,7 @@ vectorizable_store (vec_info *vinfo,
> > > > unsigned factor
> > > > = (new_ovmode == vmode) ? 1 : GET_MODE_UNIT_SIZE (vmode);
> > > > final_len = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
> > > > - vec_num, vectype, i, factor);
> > > > + vec_num, vectype, i, factor,
> > > > true);
> > > > }
> > > > else if (final_mask)
> > > > {
> > > > @@ -10697,7 +10697,7 @@ vectorizable_load (vec_info *vinfo,
> > > > {
> > > > if (loop_lens)
> > > > final_len = vect_get_loop_len (loop_vinfo, gsi,
> > > > loop_lens,
> > > > - ncopies, vectype, j, 1);
> > > > + ncopies, vectype, j, 1,
> > > > true);
> > > > else
> > > > final_len = size_int (TYPE_VECTOR_SUBPARTS (vectype));
> > > > signed char biasval
> > > > @@ -10900,7 +10900,7 @@ vectorizable_load (vec_info *vinfo,
> > > > {
> > > > if (loop_lens)
> > > > final_len = vect_get_loop_len (loop_vinfo, gsi,
> > > > loop_lens,
> > > > - vec_num, vectype, i,
> > > > 1);
> > > > + vec_num, vectype, i,
> > > > 1,
> > > true);
> > > > else
> > > > final_len = build_int_cst (sizetype,
> > > > TYPE_VECTOR_SUBPARTS
> > > (vectype));
> > > > @@ -11352,7 +11352,7 @@ vectorizable_load (vec_info *vinfo,
> > > > unsigned factor
> > > > = (new_ovmode == vmode) ? 1 : GET_MODE_UNIT_SIZE
> > > (vmode);
> > > > final_len = vect_get_loop_len (loop_vinfo, gsi,
> > > > loop_lens,
> > > > - vec_num, vectype, i,
> > > > factor);
> > > > + vec_num, vectype, i,
> > > > factor, true);
> > > > }
> > > > else if (final_mask)
> > > > {
> > > > @@ -12429,8 +12429,10 @@ vectorizable_condition (vec_info *vinfo,
> > > > {
> > > > if (lens)
> > > > {
> > > > + /* ??? Do we really want the adjusted LEN here?
> > > > Isn't this
> > > > + based on number of elements? */
> > > > len = vect_get_loop_len (loop_vinfo, gsi, lens,
> > > > - vec_num, vectype, i, 1);
> > > > + vec_num, vectype, i, 1,
> > > > true);
> > > > signed char biasval
> > > > = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
> > > > bias = build_int_cst (intQI_type_node, biasval);
> > > > diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
> > > > index 7a38d4969cf2..2116507b69ce 100644
> > > > --- a/gcc/tree-vectorizer.h
> > > > +++ b/gcc/tree-vectorizer.h
> > > > @@ -2673,7 +2673,7 @@ extern void vect_record_loop_len
> > > (loop_vec_info, vec_loop_lens *, unsigned int,
> > > > tree, unsigned int);
> > > > extern tree vect_get_loop_len (loop_vec_info, gimple_stmt_iterator *,
> > > > vec_loop_lens *, unsigned int, tree,
> > > > - unsigned int, unsigned int);
> > > > + unsigned int, unsigned int, bool);
> > > > extern tree vect_gen_loop_len_mask (loop_vec_info, gimple_stmt_iterator
> > > *,
> > > > gimple_stmt_iterator *,
> > > > vec_loop_lens *,
> > > > unsigned int, tree, tree, unsigned
> > > > int,
> > > >
> > >
> > > --
> > > Richard Biener <[email protected]>
> > > SUSE Software Solutions Germany GmbH,
> > > Frankenstrasse 146, 90461 Nuernberg, Germany;
> > > GF: Jochen Jaser, Andrew McDonald, Werner Knoblich; (HRB 36809, AG
> > > Nuernberg)
> >
>
> --
> Richard Biener <[email protected]>
> SUSE Software Solutions Germany GmbH,
> Frankenstrasse 146, 90461 Nuernberg, Germany;
> GF: Jochen Jaser, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)