Hi All, The following patch has been bootstrapped and regtested on powerpc64le-linux.
PowerPC vector shift left instructions (vslb, vslh, vslw, vsld) use modulo semantics for the shift amount. Shifts by (element_bit_width - 1) can be optimized by replacing the shift amount splat with a vector of 0xFF..FF. On Power8, this reduces instruction overhead by using vspltis[wd]. This patch adds rs6000_optimize_vector_bitwidth_shift to detect splat constants of (element_bit_width - 1) and replace them with a vector of all -1s, thereby avoiding unnecessary memory loads. 2025-09-18 Jeevitha Palanisamy <[email protected]> gcc/ PR target/119912 * config/rs6000/rs6000-builtin.cc (rs6000_gimple_fold_builtin): Call to new function. (rs6000_optimize_vector_bitwidth_shift): New function to optimize vector immediate shifts. gcc/testsuite/ PR target/119912 * gcc.target/powerpc/pr119912.c: New test. diff --git a/gcc/config/rs6000/rs6000-builtin.cc b/gcc/config/rs6000/rs6000-builtin.cc index bc1580f051b..517c99bfcfb 100644 --- a/gcc/config/rs6000/rs6000-builtin.cc +++ b/gcc/config/rs6000/rs6000-builtin.cc @@ -1264,6 +1264,68 @@ rs6000_gimple_fold_mma_builtin (gimple_stmt_iterator *gsi, return true; } +/* Try to optimize shift by splat(element_bit_width - 1). + Returns true if handled, false otherwise. */ +static bool +rs6000_optimize_vector_bitwidth_shift (gimple_stmt_iterator *gsi, + tree arg0, tree arg1, + tree lhs, location_t loc, enum tree_code subcode) +{ + int element_bit_width = 128 / VECTOR_CST_NELTS (arg1); + tree arg1_type = TREE_TYPE (arg1); + tree unsigned_arg1_type = unsigned_type_for (TREE_TYPE (arg1)); + tree unsigned_element_type = unsigned_type_for (TREE_TYPE (arg1_type)); + tree check_arg = arg1; + + if (TARGET_P9_VECTOR || TYPE_PRECISION (unsigned_element_type) <= 16) + return false; + + while (TREE_CODE (check_arg) == SSA_NAME + || TREE_CODE (check_arg) == VIEW_CONVERT_EXPR) + { + if (TREE_CODE (check_arg) == SSA_NAME) + { + gimple *def_stmt = SSA_NAME_DEF_STMT (check_arg); + if (!def_stmt || !gimple_assign_lhs (def_stmt)) + break; + check_arg = gimple_assign_rhs1 (def_stmt); + } + else + check_arg = TREE_OPERAND (check_arg, 0); + } + + /* Optimize if splat of (element_bit_width - 1). */ + if (TREE_CODE (check_arg) == VECTOR_CST) + { + tree first_elt = vector_cst_elt (check_arg, 0); + bool is_splat = true; + + if (wi::to_widest (first_elt) != element_bit_width - 1) + return false; + + for (size_t i = 1; i < VECTOR_CST_NELTS (check_arg); i++) + if (!operand_equal_p (vector_cst_elt (check_arg, i), first_elt, 0)) + { + is_splat = false; + break; + } + + if (is_splat) + { + int n_elts = VECTOR_CST_NELTS (arg1); + tree_vector_builder elts (unsigned_arg1_type, n_elts, 1); + for (int i = 0; i < n_elts; i++) + elts.safe_push (build_int_cst (unsigned_element_type, -1)); + tree new_arg1 = elts.build (); + gimple *g = gimple_build_assign (lhs, subcode, arg0, new_arg1); + gimple_set_location (g, loc); + gsi_replace (gsi, g, true); + return true; + } + } + return false; +} + /* Fold a machine-dependent built-in in GIMPLE. (For folding into a constant, use rs6000_fold_builtin.) */ bool @@ -1720,6 +1782,11 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi) tree unsigned_element_type = unsigned_type_for (TREE_TYPE (arg1_type)); loc = gimple_location (stmt); lhs = gimple_call_lhs (stmt); + + if (rs6000_optimize_vector_bitwidth_shift (gsi, arg0, arg1, lhs, loc, LSHIFT_EXPR)) + { + return true; + } /* Force arg1 into the range valid matching the arg0 type. */ /* Build a vector consisting of the max valid bit-size values. */ int n_elts = VECTOR_CST_NELTS (arg1); diff --git a/gcc/testsuite/gcc.target/powerpc/pr119912.c b/gcc/testsuite/gcc.target/powerpc/pr119912.c new file mode 100644 index 00000000000..d1802bba801 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/pr119912.c @@ -0,0 +1,18 @@ +/* { dg-do compile } */ +/* { dg-options "-mdejagnu-cpu=power8 -mvsx -O2" } */ + +#include <altivec.h> + +vector unsigned int shlw(vector unsigned int in) +{ + return vec_sl(in, (vector unsigned int)vec_splats((unsigned char)31)); +} + +vector unsigned long long shld(vector unsigned long long in) +{ + return vec_sl(in, (vector unsigned long long)vec_splats(63)); +} + +/* { dg-final { scan-assembler-times {\mvspltis[bhwd] [0-9]+,-1\M} 2 } } */ +/* { dg-final { scan-assembler-times {\mvsl[bhwd]\M} 2 } } */ +/* { dg-final { scan-assembler-times {\mlvx\M} 0 } } */
