On 23/02/2026 10:24, Torbjörn SVENSSON wrote:
> With this patch, both variations are accepted.
> In trunk, only the first one is used unless -fno-fuse-ops-with-volatile-access
> is given, so I sent a separate patch for trunk in
> https://gcc.gnu.org/pipermail/gcc-patches/2026-February/709021.html
> 
> Also worth noting that the tests still fail if -mtune=cortex-m7 is used.
> On releases/gcc-15, Cortex-M7 produces this assembler instead:
> stacktest1:
>         sub     sp, sp, #8
>         strh    r0, [sp, #6]    @ __bf16
>         add     r3, sp, #6
>         ldrh    r0, [sp, #6]    @ __bf16
>         add     sp, sp, #8
>         bx      lr
> 
> On trunk, this does not happen.
> 
> 
> Ok for releases/gcc-15?
> 
> --
> 
> Some targets generate sub-optimal assembly where an intermediate
> register is used for the stack offset, while others avoids the extra
> register and produce slightly smaller code.  Adjust expected assembler
> to match both.
> 
> Example assembler for thumb/arch=armv6s-m/tune=cortex-m0/float-abi=soft
> stacktest1:
>         sub     sp, sp, #8
>         strh    r0, [sp, #6]    @ __bf16
>         ldrh    r0, [sp, #6]    @ __bf16
>         add     sp, sp, #8
>         bx      lr

If GCC can now generate this, then

> 
> Example assembler for thumb/cpu=cortex-m0/float-abi=soft
> stacktest1:
>         sub     sp, sp, #8
>         add     r3, sp, #6
>         strh    r0, [r3]        @ __bf16
>         ldrh    r0, [sp, #6]    @ __bf16
>         add     sp, sp, #8
>         bx      lr
> 

Generating this now seems like a bug and we need to at least find out why.

> gcc/testsuite/ChangeLog:
> 
>       * gcc.target/arm/bfloat16_scalar_1_2.c: Adjust assembler to
>       match compiler.
>       * gcc.target/arm/bfloat16_scalar_2_2.c:
>       * gcc.target/arm/bfloat16_scalar_3_2.c:
>       * gcc.target/arm/bfloat16_simd_1_2.c:
>       * gcc.target/arm/bfloat16_simd_2_2.c:
>       * gcc.target/arm/bfloat16_simd_3_2.c:

So on that basis, I don't think this is the right way to fix this, at least, 
not without clarity as to the reason for the code difference.

R.
> ---
>  gcc/testsuite/gcc.target/arm/bfloat16_scalar_1_2.c | 10 ++++++++--
>  gcc/testsuite/gcc.target/arm/bfloat16_scalar_2_2.c | 10 ++++++++--
>  gcc/testsuite/gcc.target/arm/bfloat16_scalar_3_2.c | 10 ++++++++--
>  gcc/testsuite/gcc.target/arm/bfloat16_simd_1_2.c   | 10 ++++++++--
>  gcc/testsuite/gcc.target/arm/bfloat16_simd_2_2.c   | 10 ++++++++--
>  gcc/testsuite/gcc.target/arm/bfloat16_simd_3_2.c   | 10 ++++++++--
>  6 files changed, 48 insertions(+), 12 deletions(-)
> 
> diff --git a/gcc/testsuite/gcc.target/arm/bfloat16_scalar_1_2.c 
> b/gcc/testsuite/gcc.target/arm/bfloat16_scalar_1_2.c
> index 079814ef337..fb3f11c39f3 100644
> --- a/gcc/testsuite/gcc.target/arm/bfloat16_scalar_1_2.c
> +++ b/gcc/testsuite/gcc.target/arm/bfloat16_scalar_1_2.c
> @@ -9,8 +9,14 @@
>  /*
>  **stacktest1:
>  **   ...
> -**   strh    r[0-9]+, \[r[0-9]+\]    @ __bf16
> -**   ldrh    r[0-9]+, \[sp, #[0-9]+\]        @ __bf16
> +**(
> +**   strh    r[0-9]+, (\[sp, #[0-9]+\])      @ __bf16
> +**   ldrh    r[0-9]+, \1     @ __bf16
> +**|
> +**   add     (r[0-9]+), (sp, #[0-9]+)
> +**   strh    r[0-9]+, \[\2]  @ __bf16
> +**   ldrh    r[0-9]+, \[\3\] @ __bf16
> +**)
>  **   ...
>  **   bx      lr
>  */
> diff --git a/gcc/testsuite/gcc.target/arm/bfloat16_scalar_2_2.c 
> b/gcc/testsuite/gcc.target/arm/bfloat16_scalar_2_2.c
> index fc252b94edc..b08aeeed56c 100644
> --- a/gcc/testsuite/gcc.target/arm/bfloat16_scalar_2_2.c
> +++ b/gcc/testsuite/gcc.target/arm/bfloat16_scalar_2_2.c
> @@ -12,8 +12,14 @@
>  /*
>  **stacktest1:
>  **   ...
> -**   strh    r[0-9]+, \[r[0-9]+\]    @ __bf16
> -**   ldrh    r[0-9]+, \[sp, #[0-9]+\]        @ __bf16
> +**(
> +**   strh    r[0-9]+, (\[sp, #[0-9]+\])      @ __bf16
> +**   ldrh    r[0-9]+, \1     @ __bf16
> +**|
> +**   add     (r[0-9]+), (sp, #[0-9]+)
> +**   strh    r[0-9]+, \[\2]  @ __bf16
> +**   ldrh    r[0-9]+, \[\3\] @ __bf16
> +**)
>  **   ...
>  **   bx      lr
>  */
> diff --git a/gcc/testsuite/gcc.target/arm/bfloat16_scalar_3_2.c 
> b/gcc/testsuite/gcc.target/arm/bfloat16_scalar_3_2.c
> index 079814ef337..fb3f11c39f3 100644
> --- a/gcc/testsuite/gcc.target/arm/bfloat16_scalar_3_2.c
> +++ b/gcc/testsuite/gcc.target/arm/bfloat16_scalar_3_2.c
> @@ -9,8 +9,14 @@
>  /*
>  **stacktest1:
>  **   ...
> -**   strh    r[0-9]+, \[r[0-9]+\]    @ __bf16
> -**   ldrh    r[0-9]+, \[sp, #[0-9]+\]        @ __bf16
> +**(
> +**   strh    r[0-9]+, (\[sp, #[0-9]+\])      @ __bf16
> +**   ldrh    r[0-9]+, \1     @ __bf16
> +**|
> +**   add     (r[0-9]+), (sp, #[0-9]+)
> +**   strh    r[0-9]+, \[\2]  @ __bf16
> +**   ldrh    r[0-9]+, \[\3\] @ __bf16
> +**)
>  **   ...
>  **   bx      lr
>  */
> diff --git a/gcc/testsuite/gcc.target/arm/bfloat16_simd_1_2.c 
> b/gcc/testsuite/gcc.target/arm/bfloat16_simd_1_2.c
> index 299bd60086b..58f75a73c05 100644
> --- a/gcc/testsuite/gcc.target/arm/bfloat16_simd_1_2.c
> +++ b/gcc/testsuite/gcc.target/arm/bfloat16_simd_1_2.c
> @@ -11,8 +11,14 @@
>  /*
>  **stacktest1:
>  **   ...
> -**   strh    r[0-9]+, \[r[0-9]+\]    @ __bf16
> -**   ldrh    r[0-9]+, \[sp, #[0-9]+\]        @ __bf16
> +**(
> +**   strh    r[0-9]+, (\[sp, #[0-9]+\])      @ __bf16
> +**   ldrh    r[0-9]+, \1     @ __bf16
> +**|
> +**   add     (r[0-9]+), (sp, #[0-9]+)
> +**   strh    r[0-9]+, \[\2]  @ __bf16
> +**   ldrh    r[0-9]+, \[\3\] @ __bf16
> +**)
>  **   ...
>  **   bx      lr
>  */
> diff --git a/gcc/testsuite/gcc.target/arm/bfloat16_simd_2_2.c 
> b/gcc/testsuite/gcc.target/arm/bfloat16_simd_2_2.c
> index 9b1ff278041..ca7473acfac 100644
> --- a/gcc/testsuite/gcc.target/arm/bfloat16_simd_2_2.c
> +++ b/gcc/testsuite/gcc.target/arm/bfloat16_simd_2_2.c
> @@ -14,8 +14,14 @@
>  /*
>  **stacktest1:
>  **   ...
> -**   strh    r[0-9]+, \[r[0-9]+\]    @ __bf16
> -**   ldrh    r[0-9]+, \[sp, #[0-9]+\]        @ __bf16
> +**(
> +**   strh    r[0-9]+, (\[sp, #[0-9]+\])      @ __bf16
> +**   ldrh    r[0-9]+, \1     @ __bf16
> +**|
> +**   add     (r[0-9]+), (sp, #[0-9]+)
> +**   strh    r[0-9]+, \[\2]  @ __bf16
> +**   ldrh    r[0-9]+, \[\3\] @ __bf16
> +**)
>  **   ...
>  **   bx      lr
>  */
> diff --git a/gcc/testsuite/gcc.target/arm/bfloat16_simd_3_2.c 
> b/gcc/testsuite/gcc.target/arm/bfloat16_simd_3_2.c
> index ec9f4a0d690..63f54e32c55 100644
> --- a/gcc/testsuite/gcc.target/arm/bfloat16_simd_3_2.c
> +++ b/gcc/testsuite/gcc.target/arm/bfloat16_simd_3_2.c
> @@ -11,8 +11,14 @@
>  /*
>  **stacktest1:
>  **   ...
> -**   strh    r[0-9]+, \[r[0-9]+\]    @ __bf16
> -**   ldrh    r[0-9]+, \[sp, #[0-9]+\]        @ __bf16
> +**(
> +**   strh    r[0-9]+, (\[sp, #[0-9]+\])      @ __bf16
> +**   ldrh    r[0-9]+, \1     @ __bf16
> +**|
> +**   add     (r[0-9]+), (sp, #[0-9]+)
> +**   strh    r[0-9]+, \[\2]  @ __bf16
> +**   ldrh    r[0-9]+, \[\3\] @ __bf16
> +**)
>  **   ...
>  **   bx      lr
>  */

Reply via email to