Gentle Ping!

please review.

Thanks & Regards
Jeevitha


On 27/10/25 12:06 pm, jeevitha wrote:
> Ping!
> 
> please review.
> 
> Thanks & Regards
> Jeevitha
> 
> On 18/09/25 3:25 pm, jeevitha wrote:
>> Hi All,
>>
>> The following patch has been bootstrapped and regtested on powerpc64le-linux.
>>
>> PowerPC vector shift left instructions (vslb, vslh, vslw, vsld) use modulo
>> semantics for the shift amount. Shifts by (element_bit_width - 1) can be
>> optimized by replacing the shift amount splat with a vector of 0xFF..FF. On
>> Power8, this reduces instruction overhead by using vspltis[wd].
>>
>> This patch adds rs6000_optimize_vector_bitwidth_shift to detect splat 
>> constants
>> of (element_bit_width - 1) and replace them with a vector of all -1s, thereby
>> avoiding unnecessary memory loads.
>>
>> 2025-09-18  Jeevitha Palanisamy  <[email protected]>
>>
>> gcc/
>>      PR target/119912
>>      * config/rs6000/rs6000-builtin.cc (rs6000_gimple_fold_builtin): Call
>>      to new function.
>>      (rs6000_optimize_vector_bitwidth_shift): New function to optimize
>>      vector immediate shifts.
>>
>> gcc/testsuite/
>>      PR target/119912
>>      * gcc.target/powerpc/pr119912.c: New test.
>>
>> diff --git a/gcc/config/rs6000/rs6000-builtin.cc 
>> b/gcc/config/rs6000/rs6000-builtin.cc
>> index bc1580f051b..517c99bfcfb 100644
>> --- a/gcc/config/rs6000/rs6000-builtin.cc
>> +++ b/gcc/config/rs6000/rs6000-builtin.cc
>> @@ -1264,6 +1264,68 @@ rs6000_gimple_fold_mma_builtin (gimple_stmt_iterator 
>> *gsi,
>>    return true;
>>  }
>>  
>> +/* Try to optimize shift by splat(element_bit_width - 1).
>> +   Returns true if handled, false otherwise.  */
>> +static bool
>> +rs6000_optimize_vector_bitwidth_shift (gimple_stmt_iterator *gsi,
>> +                                   tree arg0, tree arg1,
>> +                                   tree lhs, location_t loc, enum tree_code 
>> subcode)
>> +{
>> +  int element_bit_width = 128 / VECTOR_CST_NELTS (arg1);
>> +  tree arg1_type = TREE_TYPE (arg1);
>> +  tree unsigned_arg1_type = unsigned_type_for (TREE_TYPE (arg1));
>> +  tree unsigned_element_type = unsigned_type_for (TREE_TYPE (arg1_type));
>> +  tree check_arg = arg1;
>> +
>> +  if (TARGET_P9_VECTOR || TYPE_PRECISION (unsigned_element_type) <= 16)
>> +    return false;
>> +
>> +  while (TREE_CODE (check_arg) == SSA_NAME
>> +     || TREE_CODE (check_arg) == VIEW_CONVERT_EXPR)
>> +    {
>> +      if (TREE_CODE (check_arg) == SSA_NAME)
>> +    {
>> +      gimple *def_stmt = SSA_NAME_DEF_STMT (check_arg);
>> +      if (!def_stmt || !gimple_assign_lhs (def_stmt))
>> +        break;
>> +      check_arg = gimple_assign_rhs1 (def_stmt);
>> +    }
>> +      else
>> +    check_arg = TREE_OPERAND (check_arg, 0);
>> +    }
>> +
>> +  /* Optimize if splat of (element_bit_width - 1). */
>> +  if (TREE_CODE (check_arg) == VECTOR_CST)
>> +    {
>> +      tree first_elt = vector_cst_elt (check_arg, 0);
>> +      bool is_splat = true;
>> +
>> +      if (wi::to_widest (first_elt) != element_bit_width - 1)
>> +    return false;
>> +
>> +      for (size_t i = 1; i < VECTOR_CST_NELTS (check_arg); i++)
>> +    if (!operand_equal_p (vector_cst_elt (check_arg, i), first_elt, 0))
>> +      {
>> +        is_splat = false;
>> +        break;
>> +      }
>> +
>> +      if (is_splat)
>> +    {
>> +      int n_elts = VECTOR_CST_NELTS (arg1);
>> +      tree_vector_builder elts (unsigned_arg1_type, n_elts, 1);
>> +      for (int i = 0; i < n_elts; i++)
>> +        elts.safe_push (build_int_cst (unsigned_element_type, -1));
>> +      tree new_arg1 = elts.build ();
>> +      gimple *g = gimple_build_assign (lhs, subcode, arg0, new_arg1);
>> +      gimple_set_location (g, loc);
>> +      gsi_replace (gsi, g, true);
>> +      return true;
>> +    }
>> +    }
>> +  return false;
>> +}
>> +
>>  /* Fold a machine-dependent built-in in GIMPLE.  (For folding into
>>     a constant, use rs6000_fold_builtin.)  */
>>  bool
>> @@ -1720,6 +1782,11 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi)
>>      tree unsigned_element_type = unsigned_type_for (TREE_TYPE (arg1_type));
>>      loc = gimple_location (stmt);
>>      lhs = gimple_call_lhs (stmt);
>> +
>> +    if (rs6000_optimize_vector_bitwidth_shift (gsi, arg0, arg1, lhs, loc, 
>> LSHIFT_EXPR))
>> +      {
>> +        return true;
>> +      }
>>      /* Force arg1 into the range valid matching the arg0 type.  */
>>      /* Build a vector consisting of the max valid bit-size values.  */
>>      int n_elts = VECTOR_CST_NELTS (arg1);
>> diff --git a/gcc/testsuite/gcc.target/powerpc/pr119912.c 
>> b/gcc/testsuite/gcc.target/powerpc/pr119912.c
>> new file mode 100644
>> index 00000000000..d1802bba801
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.target/powerpc/pr119912.c
>> @@ -0,0 +1,18 @@
>> +/* { dg-do compile } */
>> +/* { dg-options "-mdejagnu-cpu=power8 -mvsx -O2" } */
>> +
>> +#include <altivec.h>
>> +
>> +vector unsigned int shlw(vector unsigned int in)
>> +{
>> +    return vec_sl(in, (vector unsigned int)vec_splats((unsigned char)31));
>> +}
>> +
>> +vector unsigned long long shld(vector unsigned long long in)
>> +{
>> +    return vec_sl(in, (vector unsigned long long)vec_splats(63));
>> +}
>> +
>> +/* { dg-final { scan-assembler-times {\mvspltis[bhwd] [0-9]+,-1\M} 2 } } */
>> +/* { dg-final { scan-assembler-times {\mvsl[bhwd]\M} 2 } } */
>> +/* { dg-final { scan-assembler-times {\mlvx\M} 0 } } */
>>
> 

Reply via email to