[PATCH v2] aarch64: Use SBFIZ for widening signed pow2 multiplies

Abhishek Kaushik Tue, 30 Jun 2026 06:36:00 -0700

For a widening conversion of the result of a signed multiply by a
positive power of two, rewrite the sequence to widen the multiplicand
before multiplying.  This exposes the form that AArch64 can emit as
SBFIZ and avoids a separate sign extension.  The rewrite is valid
because overflow in the original signed multiply is undefined.


Bootstrapped and regression tested on aarch64-linux-gnu.

gcc/
        * config/aarch64/aarch64.cc: Include gimple-fold.h.
        (aarch64_try_widen_mult_by_pow2): New function.
        (aarch64_instruction_selection): Call it for conversion assignments.

gcc/testsuite/
        * gcc.target/aarch64/sbfiz-widen-mult-1.c: New test.
---
Changes since v1:
- Check that TYPE precision is not bigger than BITS_PER_WORD
- Use gimple_convert instead of gimple_build_assign
- Use check-function-bodies in tests

 gcc/config/aarch64/aarch64.cc                 | 86 +++++++++++++++++++
 .../gcc.target/aarch64/sbfiz-widen-mult-1.c   | 49 +++++++++++
 2 files changed, 135 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sbfiz-widen-mult-1.c

diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index 124e6dc37cc..4b5ecc00411 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -61,6 +61,7 @@
 #include "dwarf2.h"
 #include "dwarf2out.h"
 #include "gimple-iterator.h"
+#include "gimple-fold.h"
 #include "tree-vectorizer.h"
 #include "aarch64-cost-tables.h"
 #include "dumpfile.h"
@@ -2229,6 +2230,87 @@ aarch64_preferred_else_value (unsigned, tree, unsigned 
int nops, tree *ops)
   return nops == 3 ? ops[2] : ops[0];
 }
 
+/* Try to widen a signed, overflow-undefined multiply by a power of two before
+   converting it to a wider integral type.
+
+   This helps AArch64 instruction selection expose a form that can be emitted
+   as SBFIZ, avoiding an otherwise separate sign-extension around the
+   shift/bitfield operation.
+
+   For example, rewrite:
+
+     _2 = _1 * 2;
+     _3 = (long int) _2;
+
+   into:
+
+     _6 = (long int) _1;
+     _3 = _6 * 2;
+
+   This is valid because overflow in the original narrow signed multiply is
+   undefined.  For all defined executions, widening the multiplicand before the
+   multiply produces the same value as multiplying in the narrow type and then
+   converting the result.
+
+   The original narrow multiply is removed immediately.  There is no DCE pass
+   after AArch64 instruction selection, so leaving it behind would keep dead
+   multiplications in the final optimized GIMPLE dump.  */
+static bool
+aarch64_try_widen_mult_by_pow2 (const gassign *convert,
+                               gimple_stmt_iterator *gsi)
+{
+  tree type = TREE_TYPE (gimple_assign_lhs (convert));
+  tree inner = gimple_assign_rhs1 (convert);
+  tree inner_type = TREE_TYPE (inner);
+
+  if (!INTEGRAL_TYPE_P (type)
+      || !INTEGRAL_TYPE_P (inner_type)
+      || !TYPE_OVERFLOW_UNDEFINED (inner_type)
+      || TYPE_PRECISION (type) <= TYPE_PRECISION (inner_type)
+      || TYPE_PRECISION (type) > BITS_PER_WORD
+      || TREE_CODE (inner) != SSA_NAME
+      || !has_single_use (inner))
+    return false;
+
+  gimple *stmt = SSA_NAME_DEF_STMT (inner);
+  if (!is_gimple_assign (stmt)
+      || gimple_assign_rhs_code (stmt) != MULT_EXPR)
+    return false;
+
+  tree multiplicand = gimple_assign_rhs1 (stmt);
+  tree pow2const = gimple_assign_rhs2 (stmt);
+  if (!integer_pow2p (pow2const)
+      || tree_int_cst_sgn (pow2const) <= 0)
+    return false;
+
+  gimple_stmt_iterator stmt_gsi = gsi_for_stmt (stmt);
+
+  gimple_seq stmts = NULL;
+  tree widened_multiplicand = gimple_convert (&stmts,
+                                             gimple_location (convert),
+                                             type, multiplicand);
+
+  gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
+
+  tree widened_pow2const = fold_convert (type, pow2const);
+
+  tree mul_lhs = gimple_assign_lhs (convert);
+  gassign *mul_stmt
+    = gimple_build_assign (mul_lhs, MULT_EXPR,
+                          widened_multiplicand,
+                          widened_pow2const);
+
+  gsi_replace (gsi, mul_stmt, true);
+
+  /* INNER was used only by CONVERT, which we just replaced.  The defining
+     multiply is therefore dead, so remove it.  */
+  gcc_checking_assert (has_zero_uses (inner));
+  gsi_remove (&stmt_gsi, true);
+  release_defs (stmt);
+
+  return true;
+}
+
 /* Implement TARGET_INSTRUCTION_SELECTION.  The target hook is used to
    change generic sequences to a form AArch64 has an easier time expanding
    instructions for.  It's not supposed to be used for generic rewriting that
@@ -2243,6 +2325,10 @@ aarch64_instruction_selection (function * /* fun */, 
gimple_stmt_iterator *gsi)
   if (!assign)
     return false;
 
+  if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (assign))
+      && aarch64_try_widen_mult_by_pow2 (assign, gsi))
+    return true;
+
   /* Convert
        p == q ? s1 : s2;
      to
diff --git a/gcc/testsuite/gcc.target/aarch64/sbfiz-widen-mult-1.c 
b/gcc/testsuite/gcc.target/aarch64/sbfiz-widen-mult-1.c
new file mode 100644
index 00000000000..801016672e1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sbfiz-widen-mult-1.c
@@ -0,0 +1,49 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include <stdint.h>
+
+/*
+** extend_8to32_by2:
+**     sbfiz   w0, w0, 1, 8
+**     ret
+*/
+int32_t extend_8to32_by2  (int8_t x)  { return x * 2; }
+
+/*
+** extend_16to32_by2:
+**     sbfiz   w0, w0, 1, 16
+**     ret
+*/
+int32_t extend_16to32_by2 (int16_t x) { return x * 2; }
+
+/*
+** extend_8to64_by2:
+**     sbfiz   x0, x0, 1, 8
+**     ret
+*/
+int64_t extend_8to64_by2  (int8_t x)  { return x * 2; }
+
+/*
+** extend_16to64_by2:
+**     sbfiz   x0, x0, 1, 16
+**     ret
+*/
+int64_t extend_16to64_by2 (int16_t x) { return x * 2; }
+
+/*
+** extend_32to64_by2:
+**     sbfiz   x0, x0, 1, 32
+**     ret
+*/
+int64_t extend_32to64_by2 (int32_t x) { return 2 * x; }
+
+/*
+** extend_64to128_by2:
+**     mov     x1, x0
+**     lsl     x0, x0, 1
+**     sbfx    x1, x1, 62, 1
+**     ret
+*/
+__int128_t extend_64to128_by2  (long long x)  { return x * 2; }
-- 
2.43.0

[PATCH v2] aarch64: Use SBFIZ for widening signed pow2 multiplies

Reply via email to