[PATCH v4 2/2] asf: Enable pass at O2 or higher on AArch64

Konstantinos Eleftheriou Wed, 19 Nov 2025 06:29:22 -0800

From: kelefth <[email protected]>

This patch enables the avoid-store-forwarding patch by default at O2 or
higher on AArch64.


The assembly patterns in `bitfield-bitint-abi-align16.c` and
`bitfield-bitint-abi-align8.c` have been updated to account for
the asf transformations.

gcc/ChangeLog:

        * common/config/aarch64/aarch64-common.cc: Enable asf at O2.
        * doc/invoke.texi: Document asf as an O2 enabled option for AArch64.

gcc/testsuite/ChangeLog:

        * gcc.target/aarch64/bitfield-bitint-abi-align16.c:
        Modify testcases to account for the asf transformations.
        * gcc.target/aarch64/bitfield-bitint-abi-align8.c: Likewise.
        * gcc.target/aarch64/avoid-store-forwarding-6.c: New test.

Co-Authored-By: Christoph Müllner <[email protected]>
Signed-off-by: Konstantinos Eleftheriou <[email protected]>
---

Changes in v4:
- Enable asf on AArch64 only.
- Remove `avoid_store_forwarding_reorder_cost_p` function.
- Remove `asf-default-cost-value` parameter.

Changes in v3:
- Add `avoid_store_forwarding_reorder_cost_p` function.
- Add `asf-default-cost-value` parameter.
- Use `avoid_store_forwarding_reorder_cost_p` for each store in ASF.
- Update x86_64 testcases.
- Update assembly patterns in `bitfield-bitint-abi-align16.c` and
`bitfield-bitint-abi-align8.c`.
- Fix bug where the result of `store_bit_field` is not used in the
following instructions.

Changes in v2:
- Update assembly patterns in `bitfield-bitint-abi-align16.c` and
`bitfield-bitint-abi-align8.c`.

Changes in v1:
- Enable asf by default at O2 or higher.

 gcc/common/config/aarch64/aarch64-common.cc   |  2 ++
 gcc/doc/invoke.texi                           |  2 +-
 .../aarch64/avoid-store-forwarding-6.c        | 29 +++++++++++++++++++
 .../aarch64/bitfield-bitint-abi-align16.c     | 25 +++++++++-------
 .../aarch64/bitfield-bitint-abi-align8.c      | 25 +++++++++-------
 5 files changed, 60 insertions(+), 23 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/aarch64/avoid-store-forwarding-6.c

diff --git a/gcc/common/config/aarch64/aarch64-common.cc 
b/gcc/common/config/aarch64/aarch64-common.cc
index 1488697c6ce4..c3cd3eca9b84 100644
--- a/gcc/common/config/aarch64/aarch64-common.cc
+++ b/gcc/common/config/aarch64/aarch64-common.cc
@@ -60,6 +60,8 @@ static const struct default_options 
aarch_option_optimization_table[] =
     /* Enable redundant extension instructions removal at -O2 and higher.  */
     { OPT_LEVELS_2_PLUS, OPT_free, NULL, 1 },
     { OPT_LEVELS_2_PLUS, OPT_mearly_ra_, NULL, AARCH64_EARLY_RA_ALL },
+    /* Enable store forwarding avoidance at -O2 and higher.  */
+    { OPT_LEVELS_2_PLUS, OPT_favoid_store_forwarding, NULL, 1 },
 #if (TARGET_DEFAULT_ASYNC_UNWIND_TABLES == 1)
     { OPT_LEVELS_ALL, OPT_fasynchronous_unwind_tables, NULL, 1 },
     { OPT_LEVELS_ALL, OPT_funwind_tables, NULL, 1},
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 32b9c48f155c..6fc0210e5ee7 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -13313,7 +13313,7 @@ Many CPUs will stall for many cycles when a load 
partially depends on previous
 smaller stores.  This pass tries to detect such cases and avoid the penalty by
 changing the order of the load and store and then fixing up the loaded value.
 
-Disabled by default.
+Enabled by default at @option{-O2} and higher on AArch64.
 
 @opindex ffp-contract
 @item -ffp-contract=@var{style}
diff --git a/gcc/testsuite/gcc.target/aarch64/avoid-store-forwarding-6.c 
b/gcc/testsuite/gcc.target/aarch64/avoid-store-forwarding-6.c
new file mode 100644
index 000000000000..320fa5e101e6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/avoid-store-forwarding-6.c
@@ -0,0 +1,29 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+/* Same as avoid-store-forwarding-1.c but without -favoid-store-forwarding.  */
+
+typedef union {
+    char arr_8[8];
+    long long_value;
+} DataUnion;
+
+long ssll_1 (DataUnion *data, char x)
+{
+  data->arr_8[0] = x;
+  return data->long_value;
+}
+
+long ssll_2 (DataUnion *data, char x)
+{
+  data->arr_8[1] = x;
+  return data->long_value;
+}
+
+long ssll_3 (DataUnion *data, char x)
+{
+  data->arr_8[7] = x;
+  return data->long_value;
+}
+
+/* { dg-final { scan-assembler-times {ldr\tx[0-9]+, 
\[x[0-9]+\]\n\tstrb\tw[0-9]+, \[x[0-9]+(, \d+)?\]\n\tbfi\tx[0-9]+, x[0-9]+, 
\d+, \d+} 3 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/bitfield-bitint-abi-align16.c 
b/gcc/testsuite/gcc.target/aarch64/bitfield-bitint-abi-align16.c
index c29a230a7713..34f3d7f9653c 100644
--- a/gcc/testsuite/gcc.target/aarch64/bitfield-bitint-abi-align16.c
+++ b/gcc/testsuite/gcc.target/aarch64/bitfield-bitint-abi-align16.c
@@ -87,14 +87,15 @@
 **     sub     sp, sp, #16
 **     mov     (x[0-9]+), x0
 **     mov     w0, w1
+**     mov     x1, 0
 **     sbfx    x([0-9]+), \1, 0, 63
 **     mov     (w[0-9]+), 0
 **     bfi     \3, w\2, 0, 1
 **     and     x3, x\2, 9223372036854775807
-**     mov     x2, 0
-**     str     xzr, \[sp\]
+**     str     x1, \[sp\]
 **     strb    \3, \[sp\]
-**     ldr     x1, \[sp\]
+**     bfi     x1, x2, 0, 8
+**     mov     x2, 0
 **     add     sp, sp, 16
 **     b       fp
 */
@@ -183,19 +184,20 @@
 **     sxtw    (x[0-9]+), w1
 **     mov     x0, \2
 **     and     x7, \2, 9223372036854775807
+**     mov     x2, 0
 **     mov     (w[0-9]+), 0
 **     bfi     \3, w\1, 0, 1
 **     strb    wzr, \[sp, 16\]
 **     mov     x6, x7
 **     mov     x5, x7
 **     mov     x4, x7
+**     mov     x1, x7
+**     str     x2, \[sp, 48\]
+**     strb    \3, \[sp, 48\]
+**     bfi     x2, x3, 0, 8
+**     stp     x7, x2, \[sp\]
 **     mov     x3, x7
 **     mov     x2, x7
-**     str     xzr, \[sp, 48\]
-**     strb    \3, \[sp, 48\]
-**     ldr     (x[0-9]+), \[sp, 48\]
-**     stp     x7, \4, \[sp\]
-**     mov     x1, x7
 **     bl      fp_stack
 **     sbfx    x0, x0, 0, 63
 **...
@@ -341,12 +343,13 @@
 **...
 **     mov     x([0-9]+), x0
 **     mov     w0, w1
+**     mov     x1, 0
 **     mov     (w[0-9]+), 0
 **     bfi     \2, w\1, 0, 1
-**     mov     x2, 0
-**     str     xzr, \[sp\]
+**     str     x1, \[sp\]
 **     strb    \2, \[sp\]
-**     ldr     x1, \[sp\]
+**     bfi     x1, x2, 0, 8
+**     mov     x2, 0
 **...
 **     b       fp_stdarg
 */
diff --git a/gcc/testsuite/gcc.target/aarch64/bitfield-bitint-abi-align8.c 
b/gcc/testsuite/gcc.target/aarch64/bitfield-bitint-abi-align8.c
index 13ffbf416cab..d9cefbabb80c 100644
--- a/gcc/testsuite/gcc.target/aarch64/bitfield-bitint-abi-align8.c
+++ b/gcc/testsuite/gcc.target/aarch64/bitfield-bitint-abi-align8.c
@@ -87,14 +87,15 @@
 **     sub     sp, sp, #16
 **     mov     (x[0-9]+), x0
 **     mov     w0, w1
+**     mov     x1, 0
 **     sbfx    x([0-9]+), \1, 0, 63
 **     mov     (w[0-9]+), 0
 **     bfi     \3, w\2, 0, 1
 **     and     x3, x\2, 9223372036854775807
-**     mov     x2, 0
-**     str     xzr, \[sp\]
+**     str     x1, \[sp\]
 **     strb    \3, \[sp\]
-**     ldr     x1, \[sp\]
+**     bfi     x1, x2, 0, 8
+**     mov     x2, 0
 **     add     sp, sp, 16
 **     b       fp
 */
@@ -183,19 +184,20 @@
 **     sxtw    (x[0-9]+), w1
 **     mov     x0, \2
 **     and     x7, \2, 9223372036854775807
+**     mov     x2, 0
 **     mov     (w[0-9]+), 0
 **     bfi     \3, w\1, 0, 1
 **     strb    wzr, \[sp, 16\]
 **     mov     x6, x7
 **     mov     x5, x7
 **     mov     x4, x7
+**     mov     x1, x7
+**     str     x2, \[sp, 48\]
+**     strb    \3, \[sp, 48\]
+**     bfi     x2, x3, 0, 8
+**     stp     x7, x2, \[sp\]
 **     mov     x3, x7
 **     mov     x2, x7
-**     str     xzr, \[sp, 48\]
-**     strb    \3, \[sp, 48\]
-**     ldr     (x[0-9]+), \[sp, 48\]
-**     stp     x7, \4, \[sp\]
-**     mov     x1, x7
 **     bl      fp_stack
 **     sbfx    x0, x0, 0, 63
 **...
@@ -343,12 +345,13 @@
 **...
 **     mov     x([0-9]+), x0
 **     mov     w0, w1
+**     mov     x1, 0
 **     mov     (w[0-9]+), 0
 **     bfi     \2, w\1, 0, 1
-**     mov     x2, 0
-**     str     xzr, \[sp\]
+**     str     x1, \[sp\]
 **     strb    \2, \[sp\]
-**     ldr     x1, \[sp\]
+**     bfi     x1, x2, 0, 8
+**     mov     x2, 0
 **...
 **     b       fp_stdarg
 */
-- 
2.50.1

[PATCH v4 2/2] asf: Enable pass at O2 or higher on AArch64

Reply via email to