On Fri, 26 Jun 2026, Richard Sandiford wrote:

> Richard Biener <[email protected]> writes:
> > The following adds a function like tree_to_vec_perm_builder but
> > for building vec_perm_indices directly from all operands of a
> > VEC_PERM_EXPR.  This avoids errors such as shown in PR123175
> > and allows us to extract information from the permuted inputs
> > to implement vec_shl/vec_shr recognition in can_vec_perm_const_p,
> > making that fully cover what vector lowering will assess.
> >
> > Bootstrapped and tested on x86_64-unknown-linux-gnu and aarch64-linux.
> >
> > I went for a toplevel function rather than a method to provide
> > better mitigation against PR123175 and to elide the builder which
> > in most cases isn't necessary to expose.
> >
> > OK for trunk?
> 
> LGTM FWIW.
> 
> Very minor, but on the formatting of the inline function definitions:
> I thought the convention was not to indent the "{", to match what we do
> for non-inline functions.  I realise the codebase isn't consistent though.

I fixed that and pushed.

Richard.

> Thanks,
> Richard
> 
> 
> >
> > Thanks,
> > Richard.
> >
> >     * vec-perm-indices.h (vec_perm_indices::new_vector): New overload.
> >     (vec_perm_indices::input_bitwise_zero_p): New method.
> >     (vec_perm_indices::m_input0_bitwise_zero_p,
> >     vec_perm_indices::m_input1_bitwise_zero_p): New members.
> >     (vec_perm_indices::vec_perm_indices): Adjust.
> >     (tree_to_vec_perm_indices): Declare.
> >     * vec-perm-indices.cc (vec_perm_indices::new_vector): New overload.
> >     (vec_perm_indices::new_expanded_vector): Adjust.
> >     (tree_to_vec_perm_indices): New function.
> >     * optabs-query.cc (can_vec_perm_const_p): Handle permute
> >     patterns mapping to vec_shl/vec_shr.
> >     * tree-vect-generic.cc (lower_vec_perm): Move vec_shl/vec_shr
> >     detection to can_vec_perm_const_p and simplify.
> > ---
> >  gcc/optabs-query.cc      | 53 ++++++++++++++++++++++++++++++----
> >  gcc/tree-vect-generic.cc | 62 ++--------------------------------------
> >  gcc/vec-perm-indices.cc  | 25 ++++++++++++++++
> >  gcc/vec-perm-indices.h   | 21 ++++++++++++--
> >  4 files changed, 94 insertions(+), 67 deletions(-)
> >
> > diff --git a/gcc/optabs-query.cc b/gcc/optabs-query.cc
> > index 58842e40ed6..da0d37cf5df 100644
> > --- a/gcc/optabs-query.cc
> > +++ b/gcc/optabs-query.cc
> > @@ -415,12 +415,7 @@ can_vec_perm_var_p (machine_mode mode)
> >  /* Return true if the target directly supports VEC_PERM_EXPRs on vectors
> >     of mode OP_MODE and result vector of mode MODE using the selector SEL.
> >     ALLOW_VARIABLE_P is true if it is acceptable to force the selector into 
> > a
> > -   register and use a variable permute (if the target supports that).
> > -
> > -   Note that additional permutations representing whole-vector shifts may
> > -   also be handled via the vec_shr or vec_shl optab, but only where the
> > -   second input vector is entirely constant zeroes; this case is not dealt
> > -   with here.  */
> > +   register and use a variable permute (if the target supports that).  */
> >  
> >  bool
> >  can_vec_perm_const_p (machine_mode mode, machine_mode op_mode,
> > @@ -465,6 +460,52 @@ can_vec_perm_const_p (machine_mode mode, machine_mode 
> > op_mode,
> >      into integer operations.  */
> >      }
> >  
> > +  unsigned elements;
> > +  if (mode == op_mode
> > +      && GET_MODE_NUNITS (mode).is_constant (&elements))
> > +    {
> > +      if (sel.input_bitwise_zero_p (0)
> > +     && can_implement_p (vec_shl_optab, mode))
> > +   {
> > +     unsigned int first = 0, i;
> > +     for (i = 0; i < elements; ++i)
> > +       if (known_eq (poly_uint64 (sel[i]), elements))
> > +         {
> > +           if (i == 0 || first)
> > +             break;
> > +           first = i;
> > +         }
> > +       else if (first
> > +                ? maybe_ne (poly_uint64 (sel[i]),
> > +                            elements + i - first)
> > +                : maybe_ge (poly_uint64 (sel[i]), elements))
> > +         break;
> > +     if (first && i == elements)
> > +       return true;
> > +   }
> > +      if (sel.input_bitwise_zero_p (1)
> > +     && maybe_ne (sel[0], 0)
> > +     && known_lt (sel[0], elements)
> > +     && can_implement_p (vec_shr_optab, mode))
> > +   {
> > +     if (sel.series_p (0, 1, sel[0], 1))
> > +       return true;
> > +     unsigned i;
> > +     for (i = 1; i < elements; ++i)
> > +       {
> > +         poly_uint64 actual = sel[i];
> > +         poly_uint64 expected = i + sel[0];
> > +         /* Indices into the second vector are all equivalent.  */
> > +         if (maybe_lt (actual, elements)
> > +             ? maybe_ne (actual, expected)
> > +             : maybe_lt (expected, elements))
> > +           break;
> > +       }
> > +     if (i == elements)
> > +       return true;
> > +   }
> > +    }
> > +
> >    return false;
> >  }
> >  
> > diff --git a/gcc/tree-vect-generic.cc b/gcc/tree-vect-generic.cc
> > index fddb44bfe86..a8c31974973 100644
> > --- a/gcc/tree-vect-generic.cc
> > +++ b/gcc/tree-vect-generic.cc
> > @@ -1640,76 +1640,20 @@ lower_vec_perm (gimple_stmt_iterator *gsi)
> >     mask = gimple_assign_rhs1 (def_stmt);
> >      }
> >  
> > -  vec_perm_builder sel_int;
> >  
> > +  vec_perm_indices indices;
> >    if (TREE_CODE (mask) == VECTOR_CST
> > -      && tree_to_vec_perm_builder (&sel_int, mask))
> > +      && tree_to_vec_perm_indices (&indices, vec0, vec1, mask))
> >      {
> > -      vec_perm_indices indices (sel_int, 2, in_elements);
> >        machine_mode vmode = TYPE_MODE (vect_type);
> >        tree lhs_type = TREE_TYPE (gimple_assign_lhs (stmt));
> >        machine_mode lhs_mode = TYPE_MODE (lhs_type);
> > -      if (can_vec_perm_const_p (lhs_mode, vmode, indices))
> > +      if (can_vec_perm_const_p (lhs_mode, vmode, indices, true))
> >     {
> >       gimple_assign_set_rhs3 (stmt, mask);
> >       update_stmt (stmt);
> >       return;
> >     }
> > -      /* Also detect vec_shr pattern - VEC_PERM_EXPR with zero
> > -    vector as VEC1 and a right element shift MASK.  */
> > -      if (can_implement_p (vec_shr_optab, TYPE_MODE (vect_type))
> > -     && TREE_CODE (vec1) == VECTOR_CST
> > -     && initializer_zerop (vec1)
> > -     && maybe_ne (indices[0], 0)
> > -     && known_lt (poly_uint64 (indices[0]), elements))
> > -   {
> > -     bool ok_p = indices.series_p (0, 1, indices[0], 1);
> > -     if (!ok_p)
> > -       {
> > -         for (i = 1; i < elements; ++i)
> > -           {
> > -             poly_uint64 actual = indices[i];
> > -             poly_uint64 expected = i + indices[0];
> > -             /* Indices into the second vector are all equivalent.  */
> > -             if (maybe_lt (actual, elements)
> > -                 ? maybe_ne (actual, expected)
> > -                 : maybe_lt (expected, elements))
> > -               break;
> > -           }
> > -         ok_p = i == elements;
> > -       }
> > -     if (ok_p)
> > -       {
> > -         gimple_assign_set_rhs3 (stmt, mask);
> > -         update_stmt (stmt);
> > -         return;
> > -       }
> > -   }
> > -      /* And similarly vec_shl pattern.  */
> > -      if (can_implement_p (vec_shl_optab, TYPE_MODE (vect_type))
> > -     && TREE_CODE (vec0) == VECTOR_CST
> > -     && initializer_zerop (vec0))
> > -   {
> > -     unsigned int first = 0;
> > -     for (i = 0; i < elements; ++i)
> > -       if (known_eq (poly_uint64 (indices[i]), elements))
> > -         {
> > -           if (i == 0 || first)
> > -             break;
> > -           first = i;
> > -         }
> > -       else if (first
> > -                ? maybe_ne (poly_uint64 (indices[i]),
> > -                                         elements + i - first)
> > -                : maybe_ge (poly_uint64 (indices[i]), elements))
> > -         break;
> > -     if (first && i == elements)
> > -       {
> > -         gimple_assign_set_rhs3 (stmt, mask);
> > -         update_stmt (stmt);
> > -         return;
> > -       }
> > -   }
> >      }
> >    else if (can_vec_perm_var_p (TYPE_MODE (vect_type)))
> >      return;
> > diff --git a/gcc/vec-perm-indices.cc b/gcc/vec-perm-indices.cc
> > index 5b54a84caee..6bb2f39ff91 100644
> > --- a/gcc/vec-perm-indices.cc
> > +++ b/gcc/vec-perm-indices.cc
> > @@ -31,6 +31,7 @@ along with GCC; see the file COPYING3.  If not see
> >  #include "selftest.h"
> >  #include "rtx-vector-builder.h"
> >  
> > +
> >  /* Switch to a new permutation vector that selects between NINPUTS vector
> >     inputs that have NELTS_PER_INPUT elements each.  Take the elements of 
> > the
> >     new permutation vector from ELEMENTS, clamping each one to be in range. 
> >  */
> > @@ -38,10 +39,14 @@ along with GCC; see the file COPYING3.  If not see
> >  void
> >  vec_perm_indices::new_vector (const vec_perm_builder &elements,
> >                           unsigned int ninputs,
> > +                         bool input0_bitwise_zero_p,
> > +                         bool input1_bitwise_zero_p,
> >                           poly_uint64 nelts_per_input)
> >  {
> >    m_ninputs = ninputs;
> >    m_nelts_per_input = nelts_per_input;
> > +  m_input0_bitwise_zero_p = input0_bitwise_zero_p;
> > +  m_input1_bitwise_zero_p = input1_bitwise_zero_p;
> >    /* If the vector has a constant number of elements, expand the
> >       encoding and clamp each element.  E.g. { 0, 2, 4, ... } might
> >       wrap halfway if there is only one vector input, and we want
> > @@ -87,6 +92,8 @@ vec_perm_indices::new_expanded_vector (const 
> > vec_perm_indices &orig,
> >                                    unsigned int factor)
> >  {
> >    m_ninputs = orig.m_ninputs;
> > +  m_input0_bitwise_zero_p = orig.m_input0_bitwise_zero_p;
> > +  m_input1_bitwise_zero_p = orig.m_input1_bitwise_zero_p;
> >    m_nelts_per_input = orig.m_nelts_per_input * factor;
> >    m_encoding.new_vector (orig.m_encoding.full_nelts () * factor,
> >                      orig.m_encoding.npatterns () * factor,
> > @@ -301,6 +308,24 @@ tree_to_vec_perm_builder (vec_perm_builder *builder, 
> > tree cst)
> >    return true;
> >  }
> >  
> > +/* Try to read the contents of VECTOR_CST PERM_CST as a constant 
> > permutation
> > +   vector permuting OP0 and OP1.  Return true and populate INDICES on 
> > success,
> > +   otherwise return false without modifying INDICES.  */
> > +
> > +bool
> > +tree_to_vec_perm_indices (vec_perm_indices *indices, tree op0, tree op1,
> > +                     tree perm_cst)
> > +{
> > +  vec_perm_builder builder;
> > +  if (!tree_to_vec_perm_builder (&builder, perm_cst))
> > +    return false;
> > +  indices->new_vector (builder, op0 == op1 ? 1 : 2,
> > +                  initializer_zerop (op0), initializer_zerop (op1),
> > +                  TYPE_VECTOR_SUBPARTS (TREE_TYPE (op0)));
> > +  return true;
> > +}
> > +
> > +
> >  /* Return a VECTOR_CST of type TYPE for the permutation vector in INDICES. 
> >  */
> >  
> >  tree
> > diff --git a/gcc/vec-perm-indices.h b/gcc/vec-perm-indices.h
> > index 5e68f7f62ec..b7bf5aa3f05 100644
> > --- a/gcc/vec-perm-indices.h
> > +++ b/gcc/vec-perm-indices.h
> > @@ -55,7 +55,12 @@ public:
> >    vec_perm_indices ();
> >    vec_perm_indices (const vec_perm_builder &, unsigned int, poly_uint64);
> >  
> > -  void new_vector (const vec_perm_builder &, unsigned int, poly_uint64);
> > +  void new_vector (const vec_perm_builder &b, unsigned int ni, poly_uint64 
> > ne)
> > +    {
> > +      new_vector (b, ni, false, false, ne);
> > +    }
> > +  void new_vector (const vec_perm_builder &, unsigned int, bool, bool,
> > +              poly_uint64);
> >    void new_expanded_vector (const vec_perm_indices &, unsigned int);
> >    bool new_shrunk_vector (const vec_perm_indices &, unsigned int);
> >    void rotate_inputs (int delta);
> > @@ -70,6 +75,13 @@ public:
> >    /* Return the number of input vectors being permuted.  */
> >    unsigned int ninputs () const { return m_ninputs; }
> >  
> > +  /* Return whether the input N is known bitwise zero.  */
> > +  bool input_bitwise_zero_p (unsigned n) const
> > +    {
> > +      return (n == 0 ? m_input0_bitwise_zero_p
> > +         : (n == 1 ? m_input1_bitwise_zero_p : false));
> > +    }
> > +
> >    /* Return the number of elements in each input vector.  */
> >    poly_uint64 nelts_per_input () const { return m_nelts_per_input; }
> >  
> > @@ -87,16 +99,21 @@ private:
> >  
> >    vec_perm_builder m_encoding;
> >    unsigned int m_ninputs;
> > +  bool m_input0_bitwise_zero_p;
> > +  bool m_input1_bitwise_zero_p;
> >    poly_uint64 m_nelts_per_input;
> >  };
> >  
> >  bool tree_to_vec_perm_builder (vec_perm_builder *, tree);
> > +bool tree_to_vec_perm_indices (vec_perm_indices *, tree, tree, tree);
> >  tree vec_perm_indices_to_tree (tree, const vec_perm_indices &);
> >  rtx vec_perm_indices_to_rtx (machine_mode, const vec_perm_indices &);
> >  
> >  inline
> >  vec_perm_indices::vec_perm_indices ()
> >    : m_ninputs (0),
> > +    m_input0_bitwise_zero_p (false),
> > +    m_input1_bitwise_zero_p (false),
> >      m_nelts_per_input (0)
> >  {
> >  }
> > @@ -110,7 +127,7 @@ vec_perm_indices::vec_perm_indices (const 
> > vec_perm_builder &elements,
> >                                 unsigned int ninputs,
> >                                 poly_uint64 nelts_per_input)
> >  {
> > -  new_vector (elements, ninputs, nelts_per_input);
> > +  new_vector (elements, ninputs, false, false, nelts_per_input);
> >  }
> >  
> >  /* Return the canonical value for permutation vector element ELT,
> 

-- 
Richard Biener <[email protected]>
SUSE Software Solutions Germany GmbH,
Frankenstrasse 146, 90461 Nuernberg, Germany;
GF: Jochen Jaser, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)

Reply via email to