Hi All,

The following patch has been bootstrapped and regtested on powerpc64le-linux.

PowerPC vector shift left instructions (vslb, vslh, vslw, vsld) use modulo
semantics for the shift amount. Shifts by (element_bit_width - 1) can be
optimized by replacing the shift amount splat with a vector of 0xFF..FF. On
Power8, this reduces instruction overhead by using vspltis[wd].

This patch adds rs6000_optimize_vector_bitwidth_shift to detect splat constants
of (element_bit_width - 1) and replace them with a vector of all -1s, thereby
avoiding unnecessary memory loads.

2025-09-18  Jeevitha Palanisamy  <[email protected]>

gcc/
        PR target/119912
        * config/rs6000/rs6000-builtin.cc (rs6000_gimple_fold_builtin): Call
        to new function.
        (rs6000_optimize_vector_bitwidth_shift): New function to optimize
        vector immediate shifts.

gcc/testsuite/
        PR target/119912
        * gcc.target/powerpc/pr119912.c: New test.

diff --git a/gcc/config/rs6000/rs6000-builtin.cc 
b/gcc/config/rs6000/rs6000-builtin.cc
index bc1580f051b..517c99bfcfb 100644
--- a/gcc/config/rs6000/rs6000-builtin.cc
+++ b/gcc/config/rs6000/rs6000-builtin.cc
@@ -1264,6 +1264,68 @@ rs6000_gimple_fold_mma_builtin (gimple_stmt_iterator 
*gsi,
   return true;
 }
 
+/* Try to optimize shift by splat(element_bit_width - 1).
+   Returns true if handled, false otherwise.  */
+static bool
+rs6000_optimize_vector_bitwidth_shift (gimple_stmt_iterator *gsi,
+                                      tree arg0, tree arg1,
+                                      tree lhs, location_t loc, enum tree_code 
subcode)
+{
+  int element_bit_width = 128 / VECTOR_CST_NELTS (arg1);
+  tree arg1_type = TREE_TYPE (arg1);
+  tree unsigned_arg1_type = unsigned_type_for (TREE_TYPE (arg1));
+  tree unsigned_element_type = unsigned_type_for (TREE_TYPE (arg1_type));
+  tree check_arg = arg1;
+
+  if (TARGET_P9_VECTOR || TYPE_PRECISION (unsigned_element_type) <= 16)
+    return false;
+
+  while (TREE_CODE (check_arg) == SSA_NAME
+        || TREE_CODE (check_arg) == VIEW_CONVERT_EXPR)
+    {
+      if (TREE_CODE (check_arg) == SSA_NAME)
+       {
+         gimple *def_stmt = SSA_NAME_DEF_STMT (check_arg);
+         if (!def_stmt || !gimple_assign_lhs (def_stmt))
+           break;
+         check_arg = gimple_assign_rhs1 (def_stmt);
+       }
+      else
+       check_arg = TREE_OPERAND (check_arg, 0);
+    }
+
+  /* Optimize if splat of (element_bit_width - 1). */
+  if (TREE_CODE (check_arg) == VECTOR_CST)
+    {
+      tree first_elt = vector_cst_elt (check_arg, 0);
+      bool is_splat = true;
+
+      if (wi::to_widest (first_elt) != element_bit_width - 1)
+       return false;
+
+      for (size_t i = 1; i < VECTOR_CST_NELTS (check_arg); i++)
+       if (!operand_equal_p (vector_cst_elt (check_arg, i), first_elt, 0))
+         {
+           is_splat = false;
+           break;
+         }
+
+      if (is_splat)
+       {
+         int n_elts = VECTOR_CST_NELTS (arg1);
+         tree_vector_builder elts (unsigned_arg1_type, n_elts, 1);
+         for (int i = 0; i < n_elts; i++)
+           elts.safe_push (build_int_cst (unsigned_element_type, -1));
+         tree new_arg1 = elts.build ();
+         gimple *g = gimple_build_assign (lhs, subcode, arg0, new_arg1);
+         gimple_set_location (g, loc);
+         gsi_replace (gsi, g, true);
+         return true;
+       }
+    }
+  return false;
+}
+
 /* Fold a machine-dependent built-in in GIMPLE.  (For folding into
    a constant, use rs6000_fold_builtin.)  */
 bool
@@ -1720,6 +1782,11 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi)
        tree unsigned_element_type = unsigned_type_for (TREE_TYPE (arg1_type));
        loc = gimple_location (stmt);
        lhs = gimple_call_lhs (stmt);
+
+       if (rs6000_optimize_vector_bitwidth_shift (gsi, arg0, arg1, lhs, loc, 
LSHIFT_EXPR))
+         {
+           return true;
+         }
        /* Force arg1 into the range valid matching the arg0 type.  */
        /* Build a vector consisting of the max valid bit-size values.  */
        int n_elts = VECTOR_CST_NELTS (arg1);
diff --git a/gcc/testsuite/gcc.target/powerpc/pr119912.c 
b/gcc/testsuite/gcc.target/powerpc/pr119912.c
new file mode 100644
index 00000000000..d1802bba801
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr119912.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-mdejagnu-cpu=power8 -mvsx -O2" } */
+
+#include <altivec.h>
+
+vector unsigned int shlw(vector unsigned int in)
+{
+    return vec_sl(in, (vector unsigned int)vec_splats((unsigned char)31));
+}
+
+vector unsigned long long shld(vector unsigned long long in)
+{
+    return vec_sl(in, (vector unsigned long long)vec_splats(63));
+}
+
+/* { dg-final { scan-assembler-times {\mvspltis[bhwd] [0-9]+,-1\M} 2 } } */
+/* { dg-final { scan-assembler-times {\mvsl[bhwd]\M} 2 } } */
+/* { dg-final { scan-assembler-times {\mlvx\M} 0 } } */

Reply via email to