> On Tue, 10 Feb 2026, Juergen Christ wrote:
> 
> > When load/store with length is used and only QImode versions are
> > available, vectorizable_live_operation produces wrong results for
> > VEC_EXTRACT.  Provide a flag to vect_get_loop_len to specify if
> > bias-adjusted length should be used or not.
> > 
> >     PR tree-optimization/122297      
> > 
> > gcc/ChangeLog:
> > 
> >     * tree-vect-loop.cc (vectorize_fold_left_reduction): Adjust.
> >     (vectorizable_induction): Adjust.
> >     (vectorizable_live_operation_1): Adjust.
> >     (vect_get_loop_len): Provide parameter to select bias-adjusted
> >     length.
> >     (vect_gen_loop_len_mask): Adjust.
> >     (vect_update_ivs_after_vectorizer_for_early_breaks): Adjust.
> >     * tree-vect-stmts.cc (vect_get_strided_load_store_ops): Adjust.
> >     (vectorizable_call): Adjust.
> >     (vectorizable_operation): Adjust.
> >     (vectorizable_store): Adjust.
> >     (vectorizable_load): Adjust.
> >     (vectorizable_condition): Adjust.
> >     * tree-vectorizer.h (vect_get_loop_len): Add parameter.
> > 
> > gcc/testsuite/ChangeLog:
> > 
> >     * gcc.dg/vect/nodump-extractlast-1.c: Fix typo.
> >     * gcc.dg/vect/nodump-extractlast-2.c: New test.
> > 
> > Bootstrapped and regtested on s390, x86, and ppc64le.  Ok for trunk?
> > 
> > Signed-off-by: Juergen Christ <[email protected]>
> > ---
> >  .../gcc.dg/vect/nodump-extractlast-1.c        |  2 +-
> >  .../gcc.dg/vect/nodump-extractlast-2.c        | 23 +++++++
> >  gcc/tree-vect-loop.cc                         | 62 +++++++++++++------
> >  gcc/tree-vect-stmts.cc                        | 24 +++----
> >  gcc/tree-vectorizer.h                         |  2 +-
> >  5 files changed, 82 insertions(+), 31 deletions(-)
> >  create mode 100644 gcc/testsuite/gcc.dg/vect/nodump-extractlast-2.c
> > 
> > diff --git a/gcc/testsuite/gcc.dg/vect/nodump-extractlast-1.c 
> > b/gcc/testsuite/gcc.dg/vect/nodump-extractlast-1.c
> > index 980ac3e42188..83d8a38f13e3 100644
> > --- a/gcc/testsuite/gcc.dg/vect/nodump-extractlast-1.c
> > +++ b/gcc/testsuite/gcc.dg/vect/nodump-extractlast-1.c
> > @@ -1,4 +1,4 @@
> > -/* Check for a bung in the treatment of LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS 
> > when
> > +/* Check for a bug in the treatment of LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS 
> > when
> >     using VEC_EXTRACT.  */
> >  /* { dg-require-effective-target vect_int } */
> >  
> > diff --git a/gcc/testsuite/gcc.dg/vect/nodump-extractlast-2.c 
> > b/gcc/testsuite/gcc.dg/vect/nodump-extractlast-2.c
> > new file mode 100644
> > index 000000000000..9697687c1084
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.dg/vect/nodump-extractlast-2.c
> > @@ -0,0 +1,23 @@
> > +/* Check for a bug in the treatment of LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS 
> > when
> > +   using VEC_EXTRACT.  This variant uses .LEN_LOAD which might use QImode
> > +   vectors during load, but SImode vectors for the extraction.  */
> > +int __attribute__ ((noinline, noclone))
> > +test_int (int *x, int n, int value)
> > +{
> > +  int last;
> > +  for (int j = 0; j < n; ++j)
> > +    {
> > +      last = x[j];
> > +      x[j] = last * value;
> > +    }
> > +  return last;
> > +}
> > +
> > +int
> > +main ()
> > +{
> > +  int arr[] = {1,2,3,4,5,1};
> > +  if (test_int (arr, sizeof (arr) / sizeof (arr[0]), 42) != 1)
> > +    __builtin_abort();
> > +  return 0;
> > +}
> > diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
> > index 8e60a433596f..8e40f0ceb93b 100644
> > --- a/gcc/tree-vect-loop.cc
> > +++ b/gcc/tree-vect-loop.cc
> > @@ -6503,8 +6503,11 @@ vectorize_fold_left_reduction (loop_vec_info 
> > loop_vinfo,
> >     mask = vec_opmask[i];
> >        if (LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo))
> >     {
> > +     /* ??? Why do we use LOAD_STORE_BIAS here when we do not do a load or
> > +        a store?  Shouldn't we instead just ensure that LEN represents the
> > +        number of elements in the vector?  */
> 
> Yes.  I think the comment is unwarranted.

Ups.  Sorry, that comment should NOT be part of the patch.  I will remove it.

> 
> >       len = vect_get_loop_len (loop_vinfo, gsi, lens, vec_num, vectype_in,
> > -                              i, 1);
> > +                              i, 1, false);
> >       signed char biasval = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
> >       bias = build_int_cst (intQI_type_node, biasval);
> >       if (!is_cond_op)
> > @@ -9885,7 +9888,7 @@ vectorizable_induction (loop_vec_info loop_vinfo,
> >                _21 = vect_vec_iv_.6_22 + vect_cst__22;  */
> >           vec_loop_lens *lens = &LOOP_VINFO_LENS (loop_vinfo);
> >           tree len = vect_get_loop_len (loop_vinfo, NULL, lens, 1,
> > -                                       vectype, 0, 0);
> > +                                       vectype, 0, 0, false);
> >           if (SCALAR_FLOAT_TYPE_P (stept))
> >             expr = gimple_build (&stmts, FLOAT_EXPR, stept, len);
> >           else
> > @@ -10032,7 +10035,7 @@ vectorizable_live_operation_1 (loop_vec_info 
> > loop_vinfo, basic_block exit_bb,
> >      {
> >        /* Emit:
> >  
> > -    SCALAR_RES = VEC_EXTRACT <VEC_LHS, LEN - (BIAS + 1)>
> > +    SCALAR_RES = VEC_EXTRACT <VEC_LHS, LEN - 1>
> >  
> >      where VEC_LHS is the vectorized live-out result, LEN is the length of
> >      the vector, BIAS is the load-store bias.  The bias should not be used
> > @@ -10043,21 +10046,14 @@ vectorizable_live_operation_1 (loop_vec_info 
> > loop_vinfo, basic_block exit_bb,
> >        gimple_stmt_iterator gsi = gsi_last (tem);
> >        tree len = vect_get_loop_len (loop_vinfo, &gsi,
> >                                 &LOOP_VINFO_LENS (loop_vinfo),
> > -                               1, vectype, 0, 1);
> > +                               1, vectype, 0, 1, false);
> >        gimple_seq_add_seq (&stmts, tem);
> >  
> > -      /* BIAS + 1.  */
> > -      signed char biasval = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS 
> > (loop_vinfo);
> > -      tree bias_plus_one
> > -   = int_const_binop (PLUS_EXPR,
> > -                      build_int_cst (TREE_TYPE (len), biasval),
> > -                      build_one_cst (TREE_TYPE (len)));
> > -
> > -      /* LAST_INDEX = LEN - (BIAS + 1).  */
> > +      /* LAST_INDEX = LEN - 1.  */
> >        tree last_index = gimple_build (&stmts, MINUS_EXPR, TREE_TYPE (len),
> > -                                len, bias_plus_one);
> > +                                len, build_one_cst (TREE_TYPE (len)));
> >  
> > -      /* SCALAR_RES = VEC_EXTRACT <VEC_LHS, LEN - (BIAS + 1)>.  */
> > +      /* SCALAR_RES = VEC_EXTRACT <VEC_LHS, LEN - 1>.  */
> 
> So VEC_EXTRACT isn't affected by bias?  I'll note that on all the
> .COND_LEN_* internal functions we have length and bias, so whether
> it is applied or not is in the end subject to target consideration.
> That possibly means VEC_EXTRACT would need to take a bias argument?
> 
> I don't quite remember why we have both bias parameters but also
> appear to pass len adjusted by bias already.  Huh.
> 
> The patch looks OK, but it would be nice to have some overall 
> documentation on this length + bias thing somewhere.  IMO this
> belongs to md.texi, but there's a PR about the patterns being
> an unordered mess without structure.  Possibly amending the
> first paragraph is an option, short of subdividing the
> standard pattern name listing into logical parts.

Do you want this to be part of this patch?  Please tell me how to
continue with this patch (we need a fix for this problem since also
glibc does show wrong code when compiled with -O3 and this patch fixed
it).

Juergen

> 
> Thanks,
> Richard.
> 
> >        tree scalar_res
> >     = gimple_build (&stmts, CFN_VEC_EXTRACT, TREE_TYPE (vectype),
> >                     vec_lhs_phi, last_index);
> > @@ -10678,7 +10674,7 @@ vect_record_loop_len (loop_vec_info loop_vinfo, 
> > vec_loop_lens *lens,
> >  tree
> >  vect_get_loop_len (loop_vec_info loop_vinfo, gimple_stmt_iterator *gsi,
> >                vec_loop_lens *lens, unsigned int nvectors, tree vectype,
> > -              unsigned int index, unsigned int factor)
> > +              unsigned int index, unsigned int factor, bool adjusted)
> >  {
> >    rgroup_controls *rgl = &(*lens)[nvectors - 1];
> >    bool use_bias_adjusted_len =
> > @@ -10711,7 +10707,7 @@ vect_get_loop_len (loop_vec_info loop_vinfo, 
> > gimple_stmt_iterator *gsi,
> >     }
> >      }
> >  
> > -  if (use_bias_adjusted_len)
> > +  if (use_bias_adjusted_len && adjusted)
> >      return rgl->bias_adjusted_ctrl;
> >  
> >    tree loop_len = rgl->controls[index];
> > @@ -10734,6 +10730,36 @@ vect_get_loop_len (loop_vec_info loop_vinfo, 
> > gimple_stmt_iterator *gsi,
> >         gsi_insert_seq_before (gsi, seq, GSI_SAME_STMT);
> >     }
> >      }
> > +  else if (factor && rgl->factor != factor)
> > +    {
> > +      /* The number of scalars per iteration, scalar occupied bytes and
> > +    the number of vectors are both compile-time constants.  */
> > +      unsigned int nscalars_per_iter
> > +   = exact_div (nvectors * TYPE_VECTOR_SUBPARTS (vectype),
> > +                LOOP_VINFO_VECT_FACTOR (loop_vinfo)).to_constant ();
> > +      unsigned int rglvecsize = rgl->factor * rgl->max_nscalars_per_iter;
> > +      unsigned int vecsize = nscalars_per_iter * factor;
> > +      if (rglvecsize > vecsize)
> > +   {
> > +     unsigned int fac = rglvecsize / vecsize;
> > +     tree iv_type = LOOP_VINFO_RGROUP_IV_TYPE (loop_vinfo);
> > +     gimple_seq seq = NULL;
> > +     loop_len = gimple_build (&seq, EXACT_DIV_EXPR, iv_type, loop_len,
> > +                              build_int_cst (iv_type, fac));
> > +     if (seq)
> > +       gsi_insert_seq_before (gsi, seq, GSI_SAME_STMT);
> > +   }
> > +      else if (rglvecsize < vecsize)
> > +   {
> > +     unsigned int fac = vecsize / rglvecsize;
> > +     tree iv_type = LOOP_VINFO_RGROUP_IV_TYPE (loop_vinfo);
> > +     gimple_seq seq = NULL;
> > +     loop_len = gimple_build (&seq, MULT_EXPR, iv_type, loop_len,
> > +                              build_int_cst (iv_type, fac));
> > +     if (seq)
> > +       gsi_insert_seq_before (gsi, seq, GSI_SAME_STMT);
> > +   }
> > +    }
> >    return loop_len;
> >  }
> >  
> > @@ -10751,7 +10777,7 @@ vect_gen_loop_len_mask (loop_vec_info loop_vinfo, 
> > gimple_stmt_iterator *gsi,
> >    tree all_one_mask = build_all_ones_cst (vectype);
> >    tree all_zero_mask = build_zero_cst (vectype);
> >    tree len = vect_get_loop_len (loop_vinfo, gsi, lens, nvectors, vectype, 
> > index,
> > -                           factor);
> > +                           factor, true);
> >    tree bias = build_int_cst (intQI_type_node,
> >                          LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo));
> >    tree len_mask = make_temp_ssa_name (TREE_TYPE (stmt), NULL, 
> > "vec_len_mask");
> > @@ -11022,7 +11048,7 @@ vect_update_ivs_after_vectorizer_for_early_breaks 
> > (loop_vec_info loop_vinfo)
> >      {
> >        vec_loop_lens *lens = &LOOP_VINFO_LENS (loop_vinfo);
> >        tree_vf = vect_get_loop_len (loop_vinfo, NULL, lens, 1,
> > -                              NULL_TREE, 0, 0);
> > +                              NULL_TREE, 0, 0, true);
> >      }
> >  
> >    tree iter_var;
> > diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
> > index ed2486451ca0..a97f7ec07c4f 100644
> > --- a/gcc/tree-vect-stmts.cc
> > +++ b/gcc/tree-vect-stmts.cc
> > @@ -3182,7 +3182,7 @@ vect_get_strided_load_store_ops (stmt_vec_info 
> > stmt_info, slp_tree node,
> >      .MASK_LEN_SCATTER_STORE (vectp_a.9_7, ... );
> >      vectp_a.9_26 = vectp_a.9_7 + ivtmp_8;  */
> >        tree loop_len
> > -   = vect_get_loop_len (loop_vinfo, gsi, loop_lens, 1, vectype, 0, 0);
> > +   = vect_get_loop_len (loop_vinfo, gsi, loop_lens, 1, vectype, 0, 0, 
> > true);
> >        tree tmp
> >     = fold_build2 (MULT_EXPR, sizetype,
> >                    fold_convert (sizetype, unshare_expr (DR_STEP (dr))),
> > @@ -3253,7 +3253,7 @@ vect_get_loop_variant_data_ptr_increment (
> >       addr = addr + .SELECT_VL (ARG..) * step;
> >    */
> >    tree loop_len
> > -    = vect_get_loop_len (loop_vinfo, gsi, loop_lens, 1, aggr_type, 0, 0);
> > +    = vect_get_loop_len (loop_vinfo, gsi, loop_lens, 1, aggr_type, 0, 0, 
> > true);
> >    tree len_type = TREE_TYPE (loop_len);
> >    /* Since the outcome of .SELECT_VL is element size, we should adjust
> >       it into bytesize so that it can be used in address pointer variable
> > @@ -3842,7 +3842,7 @@ vectorizable_call (vec_info *vinfo,
> >                 {
> >                   unsigned int vec_num = vec_oprnds0.length ();
> >                   tree len = vect_get_loop_len (loop_vinfo, gsi, lens,
> > -                                               vec_num, vectype_out, i, 1);
> > +                                               vec_num, vectype_out, i, 1, 
> > true);
> >                   signed char biasval
> >                     = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
> >                   tree bias = build_int_cst (intQI_type_node, biasval);
> > @@ -7041,7 +7041,7 @@ vectorizable_operation (vec_info *vinfo,
> >       if (len_loop_p)
> >         {
> >           tree len = vect_get_loop_len (loop_vinfo, gsi, lens,
> > -                                       vec_num, vectype, i, 1);
> > +                                       vec_num, vectype, i, 1, true);
> >           signed char biasval
> >             = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
> >           tree bias = build_int_cst (intQI_type_node, biasval);
> > @@ -8757,7 +8757,7 @@ vectorizable_store (vec_info *vinfo,
> >         {
> >           if (loop_lens)
> >             final_len = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
> > -                                          ncopies, vectype, j, 1);
> > +                                          ncopies, vectype, j, 1, true);
> >           else
> >             final_len = size_int (TYPE_VECTOR_SUBPARTS (vectype));
> >           signed char biasval
> > @@ -8950,7 +8950,7 @@ vectorizable_store (vec_info *vinfo,
> >               if (loop_lens)
> >                 final_len = vect_get_loop_len (loop_vinfo, gsi,
> >                                                loop_lens, num_stmts,
> > -                                              vectype, j, 1);
> > +                                              vectype, j, 1, true);
> >               else
> >                 final_len = size_int (TYPE_VECTOR_SUBPARTS (vectype));
> >  
> > @@ -9334,7 +9334,7 @@ vectorizable_store (vec_info *vinfo,
> >       unsigned factor
> >         = (new_ovmode == vmode) ? 1 : GET_MODE_UNIT_SIZE (vmode);
> >       final_len = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
> > -                                    vec_num, vectype, i, factor);
> > +                                    vec_num, vectype, i, factor, true);
> >     }
> >        else if (final_mask)
> >     {
> > @@ -10697,7 +10697,7 @@ vectorizable_load (vec_info *vinfo,
> >         {
> >           if (loop_lens)
> >             final_len = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
> > -                                          ncopies, vectype, j, 1);
> > +                                          ncopies, vectype, j, 1, true);
> >           else
> >             final_len = size_int (TYPE_VECTOR_SUBPARTS (vectype));
> >           signed char biasval
> > @@ -10900,7 +10900,7 @@ vectorizable_load (vec_info *vinfo,
> >             {
> >               if (loop_lens)
> >                 final_len = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
> > -                                              vec_num, vectype, i, 1);
> > +                                              vec_num, vectype, i, 1, 
> > true);
> >               else
> >                 final_len = build_int_cst (sizetype,
> >                                            TYPE_VECTOR_SUBPARTS (vectype));
> > @@ -11352,7 +11352,7 @@ vectorizable_load (vec_info *vinfo,
> >             unsigned factor
> >               = (new_ovmode == vmode) ? 1 : GET_MODE_UNIT_SIZE (vmode);
> >             final_len = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
> > -                                          vec_num, vectype, i, factor);
> > +                                          vec_num, vectype, i, factor, 
> > true);
> >           }
> >         else if (final_mask)
> >           {
> > @@ -12429,8 +12429,10 @@ vectorizable_condition (vec_info *vinfo,
> >         {
> >           if (lens)
> >             {
> > +             /* ??? Do we really want the adjusted LEN here?  Isn't this
> > +                based on number of elements?  */
> >               len = vect_get_loop_len (loop_vinfo, gsi, lens,
> > -                                      vec_num, vectype, i, 1);
> > +                                      vec_num, vectype, i, 1, true);
> >               signed char biasval
> >                 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
> >               bias = build_int_cst (intQI_type_node, biasval);
> > diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
> > index 7a38d4969cf2..2116507b69ce 100644
> > --- a/gcc/tree-vectorizer.h
> > +++ b/gcc/tree-vectorizer.h
> > @@ -2673,7 +2673,7 @@ extern void vect_record_loop_len (loop_vec_info, 
> > vec_loop_lens *, unsigned int,
> >                               tree, unsigned int);
> >  extern tree vect_get_loop_len (loop_vec_info, gimple_stmt_iterator *,
> >                            vec_loop_lens *, unsigned int, tree,
> > -                          unsigned int, unsigned int);
> > +                          unsigned int, unsigned int, bool);
> >  extern tree vect_gen_loop_len_mask (loop_vec_info, gimple_stmt_iterator *,
> >                                 gimple_stmt_iterator *, vec_loop_lens *,
> >                                 unsigned int, tree, tree, unsigned int,
> > 
> 
> -- 
> Richard Biener <[email protected]>
> SUSE Software Solutions Germany GmbH,
> Frankenstrasse 146, 90461 Nuernberg, Germany;
> GF: Jochen Jaser, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)

Reply via email to