On Mon, Apr 14, 2025 at 2:39 AM Uros Bizjak <ubiz...@gmail.com> wrote:
>
> On Mon, Apr 14, 2025 at 8:54 AM Hongtao Liu <crazy...@gmail.com> wrote:
> >
> > On Mon, Apr 14, 2025 at 7:36 AM H.J. Lu <hjl.to...@gmail.com> wrote:
> > >
> > > Don't use red-zone when there are no caller-saved registers and APX is
> > > enabled since 128-byte red-zone is too small for 31 GPRs.
> > >
> > > gcc/
> > >
> > >         PR target/119784
> > >         * config/i386/i386.cc (ix86_using_red_zone): Don't use red-zone
> > >         with APX and no caller-saved registers.
> > >
> > > gcc/testsuite/
> > >
> > >         PR target/119784
> > >         * gcc.target/i386/pr119784a.c: New test.
> > >         * gcc.target/i386/pr119784b.c: Likewise.
> > >
> > > Signed-off-by: H.J. Lu <hjl.to...@gmail.com>
> > > ---
> > >  gcc/config/i386/i386.cc                   |  6 ++
> > >  gcc/testsuite/gcc.target/i386/pr119784a.c | 96 +++++++++++++++++++++++
> > >  gcc/testsuite/gcc.target/i386/pr119784b.c | 87 ++++++++++++++++++++
> > >  3 files changed, 189 insertions(+)
> > >  create mode 100644 gcc/testsuite/gcc.target/i386/pr119784a.c
> > >  create mode 100644 gcc/testsuite/gcc.target/i386/pr119784b.c
> > >
> > > diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
> > > index 4f8380c4a58..91af239d484 100644
> > > --- a/gcc/config/i386/i386.cc
> > > +++ b/gcc/config/i386/i386.cc
> > > @@ -458,6 +458,9 @@ int ix86_arch_specified;
> > >     indirect thunk pushes the return address onto stack, destroying
> > >     red-zone.
> > >
> > > +   NB: Don't use red-zone for functions with no_caller_saved_registers
> > > +   when APX is enabled since 128-byte red-zone is too small for 31 GPRs.
> > > +
> > >     TODO: If we can reserve the first 2 WORDs, for PUSH and, another
> > >     for CALL, in red-zone, we can allow local indirect jumps with
> > >     indirect thunk.  */
> > > @@ -467,6 +470,9 @@ ix86_using_red_zone (void)
> > >  {
> > >    return (TARGET_RED_ZONE
> > >           && !TARGET_64BIT_MS_ABI
> > > +         && (!TARGET_APX_F
> > Could we use !TARGET_APX_EGPR, it's more clear.

Fixed.    Here is the patch I am checking in.   OK for backport to
release branches?

Thanks.

> > Others LGTM, but please wait for a while in case Uros has a different 
> > opinion.
>
> LGTM too.
>
> Thanks,
> Uros.
>
> > > +             || (cfun->machine->call_saved_registers
> > > +                 != TYPE_NO_CALLER_SAVED_REGISTERS))
> > >           && (!cfun->machine->has_local_indirect_jump
> > >               || cfun->machine->indirect_branch_type == 
> > > indirect_branch_keep));
> > >  }
> > > diff --git a/gcc/testsuite/gcc.target/i386/pr119784a.c 
> > > b/gcc/testsuite/gcc.target/i386/pr119784a.c
> > > new file mode 100644
> > > index 00000000000..8a119d4cc1f
> > > --- /dev/null
> > > +++ b/gcc/testsuite/gcc.target/i386/pr119784a.c
> > > @@ -0,0 +1,96 @@
> > > +/* { dg-do compile { target { *-*-linux* && lp64 } } } */
> > > +/* { dg-options "-O2 -fno-pic -mtune=generic -mgeneral-regs-only -mapxf 
> > > -mtune-ctrl=prologue_using_move,epilogue_using_move" } */
> > > +/* Keep labels and directives ('.cfi_startproc', '.cfi_endproc').  */
> > > +/* { dg-final { check-function-bodies "**" "" "" { target "*-*-*" } 
> > > {^\t?\.}  } } */
> > > +
> > > +/* start must save and restore all caller saved registers.  */
> > > +
> > > +/*
> > > +**start:
> > > +**.LFB[0-9]+:
> > > +**     .cfi_startproc
> > > +**     subq    \$248, %rsp
> > > +**...
> > > +**     movq    %rax, \(%rsp\)
> > > +**     movq    %rdx, 8\(%rsp\)
> > > +**     movq    %rcx, 16\(%rsp\)
> > > +**     movq    %rbx, 24\(%rsp\)
> > > +**     movq    %rsi, 32\(%rsp\)
> > > +**     movq    %rdi, 40\(%rsp\)
> > > +**...
> > > +**     movq    %rbp, 48\(%rsp\)
> > > +**     movq    %r8, 56\(%rsp\)
> > > +**     movq    %r9, 64\(%rsp\)
> > > +**     movq    %r10, 72\(%rsp\)
> > > +**     movq    %r11, 80\(%rsp\)
> > > +**     movq    %r12, 88\(%rsp\)
> > > +**     movq    %r13, 96\(%rsp\)
> > > +**     movq    %r14, 104\(%rsp\)
> > > +**     movq    %r15, 112\(%rsp\)
> > > +**     movq    %r16, 120\(%rsp\)
> > > +**     movq    %r17, 128\(%rsp\)
> > > +**     movq    %r18, 136\(%rsp\)
> > > +**     movq    %r19, 144\(%rsp\)
> > > +**     movq    %r20, 152\(%rsp\)
> > > +**     movq    %r21, 160\(%rsp\)
> > > +**     movq    %r22, 168\(%rsp\)
> > > +**     movq    %r23, 176\(%rsp\)
> > > +**     movq    %r24, 184\(%rsp\)
> > > +**     movq    %r25, 192\(%rsp\)
> > > +**     movq    %r26, 200\(%rsp\)
> > > +**     movq    %r27, 208\(%rsp\)
> > > +**     movq    %r28, 216\(%rsp\)
> > > +**     movq    %r29, 224\(%rsp\)
> > > +**     movq    %r30, 232\(%rsp\)
> > > +**     movq    %r31, 240\(%rsp\)
> > > +**...
> > > +**     call    \*code\(%rip\)
> > > +**     movq    \(%rsp\), %rax
> > > +**     movq    8\(%rsp\), %rdx
> > > +**     movq    16\(%rsp\), %rcx
> > > +**     movq    24\(%rsp\), %rbx
> > > +**     movq    32\(%rsp\), %rsi
> > > +**     movq    40\(%rsp\), %rdi
> > > +**     movq    48\(%rsp\), %rbp
> > > +**     movq    56\(%rsp\), %r8
> > > +**     movq    64\(%rsp\), %r9
> > > +**     movq    72\(%rsp\), %r10
> > > +**     movq    80\(%rsp\), %r11
> > > +**     movq    88\(%rsp\), %r12
> > > +**     movq    96\(%rsp\), %r13
> > > +**     movq    104\(%rsp\), %r14
> > > +**     movq    112\(%rsp\), %r15
> > > +**     movq    120\(%rsp\), %r16
> > > +**     movq    128\(%rsp\), %r17
> > > +**     movq    136\(%rsp\), %r18
> > > +**     movq    144\(%rsp\), %r19
> > > +**     movq    152\(%rsp\), %r20
> > > +**     movq    160\(%rsp\), %r21
> > > +**     movq    168\(%rsp\), %r22
> > > +**     movq    176\(%rsp\), %r23
> > > +**     movq    184\(%rsp\), %r24
> > > +**     movq    192\(%rsp\), %r25
> > > +**     movq    200\(%rsp\), %r26
> > > +**     movq    208\(%rsp\), %r27
> > > +**     movq    216\(%rsp\), %r28
> > > +**     movq    224\(%rsp\), %r29
> > > +**     movq    232\(%rsp\), %r30
> > > +**     movq    240\(%rsp\), %r31
> > > +**     addq    \$248, %rsp
> > > +**...
> > > +**     ret
> > > +**     .cfi_endproc
> > > +**...
> > > +*/
> > > +
> > > +#define DONT_SAVE_REGS __attribute__((no_callee_saved_registers))
> > > +#define SAVE_REGS __attribute__((no_caller_saved_registers))
> > > +
> > > +typedef DONT_SAVE_REGS void (*op_t)(void);
> > > +
> > > +extern op_t code[];
> > > +
> > > +SAVE_REGS void start()
> > > +{
> > > +  code[0]();
> > > +}
> > > diff --git a/gcc/testsuite/gcc.target/i386/pr119784b.c 
> > > b/gcc/testsuite/gcc.target/i386/pr119784b.c
> > > new file mode 100644
> > > index 00000000000..c6761976ed6
> > > --- /dev/null
> > > +++ b/gcc/testsuite/gcc.target/i386/pr119784b.c
> > > @@ -0,0 +1,87 @@
> > > +/* { dg-do compile { target { *-*-linux* && x32 } } } */
> > > +/* { dg-options "-O2 -fno-pic -mtune=generic -mgeneral-regs-only -mapxf 
> > > -mtune-ctrl=prologue_using_move,epilogue_using_move" } */
> > > +/* Keep labels and directives ('.cfi_startproc', '.cfi_endproc').  */
> > > +/* { dg-final { check-function-bodies "**" "" "" { target "*-*-*" } 
> > > {^\t?\.}  } } */
> > > +
> > > +/* start must save and restore all caller saved registers.  */
> > > +
> > > +/*
> > > +**start:
> > > +**.LFB[0-9]+:
> > > +**     .cfi_startproc
> > > +**     subl    \$248, %esp
> > > +**...
> > > +**     movq    %rax, \(%rsp\)
> > > +**     movq    %rdx, 8\(%rsp\)
> > > +**     movq    %rcx, 16\(%rsp\)
> > > +**     movq    %rbx, 24\(%rsp\)
> > > +**     movq    %rsi, 32\(%rsp\)
> > > +**     movq    %rdi, 40\(%rsp\)
> > > +**...
> > > +**     movq    %rbp, 48\(%rsp\)
> > > +**     movq    %r8, 56\(%rsp\)
> > > +**     movq    %r9, 64\(%rsp\)
> > > +**     movq    %r10, 72\(%rsp\)
> > > +**     movq    %r11, 80\(%rsp\)
> > > +**     movq    %r12, 88\(%rsp\)
> > > +**     movq    %r13, 96\(%rsp\)
> > > +**     movq    %r14, 104\(%rsp\)
> > > +**     movq    %r15, 112\(%rsp\)
> > > +**     movq    %r16, 120\(%rsp\)
> > > +**     movq    %r17, 128\(%rsp\)
> > > +**     movq    %r18, 136\(%rsp\)
> > > +**     movq    %r19, 144\(%rsp\)
> > > +**     movq    %r20, 152\(%rsp\)
> > > +**     movq    %r21, 160\(%rsp\)
> > > +**     movq    %r22, 168\(%rsp\)
> > > +**     movq    %r23, 176\(%rsp\)
> > > +**     movq    %r24, 184\(%rsp\)
> > > +**     movq    %r25, 192\(%rsp\)
> > > +**     movq    %r26, 200\(%rsp\)
> > > +**     movq    %r27, 208\(%rsp\)
> > > +**     movq    %r28, 216\(%rsp\)
> > > +**     movq    %r29, 224\(%rsp\)
> > > +**     movq    %r30, 232\(%rsp\)
> > > +**     movq    %r31, 240\(%rsp\)
> > > +**...
> > > +**     movl    code\(%rip\), %ebp
> > > +**     call    \*%rbp
> > > +**     movq    \(%rsp\), %rax
> > > +**     movq    8\(%rsp\), %rdx
> > > +**     movq    16\(%rsp\), %rcx
> > > +**     movq    24\(%rsp\), %rbx
> > > +**     movq    32\(%rsp\), %rsi
> > > +**     movq    40\(%rsp\), %rdi
> > > +**     movq    48\(%rsp\), %rbp
> > > +**     movq    56\(%rsp\), %r8
> > > +**     movq    64\(%rsp\), %r9
> > > +**     movq    72\(%rsp\), %r10
> > > +**     movq    80\(%rsp\), %r11
> > > +**     movq    88\(%rsp\), %r12
> > > +**     movq    96\(%rsp\), %r13
> > > +**     movq    104\(%rsp\), %r14
> > > +**     movq    112\(%rsp\), %r15
> > > +**     movq    120\(%rsp\), %r16
> > > +**     movq    128\(%rsp\), %r17
> > > +**     movq    136\(%rsp\), %r18
> > > +**     movq    144\(%rsp\), %r19
> > > +**     movq    152\(%rsp\), %r20
> > > +**     movq    160\(%rsp\), %r21
> > > +**     movq    168\(%rsp\), %r22
> > > +**     movq    176\(%rsp\), %r23
> > > +**     movq    184\(%rsp\), %r24
> > > +**     movq    192\(%rsp\), %r25
> > > +**     movq    200\(%rsp\), %r26
> > > +**     movq    208\(%rsp\), %r27
> > > +**     movq    216\(%rsp\), %r28
> > > +**     movq    224\(%rsp\), %r29
> > > +**     movq    232\(%rsp\), %r30
> > > +**     movq    240\(%rsp\), %r31
> > > +**     addl    \$248, %esp
> > > +**...
> > > +**     ret
> > > +**     .cfi_endproc
> > > +**...
> > > +*/
> > > +
> > > +#include "pr119784a.c"
> > > --
> > > 2.49.0
> > >
> >
> >
> > --
> > BR,
> > Hongtao



-- 
H.J.
From dffa12463f5b551e9267634718c0d2af00119ee8 Mon Sep 17 00:00:00 2001
From: "H.J. Lu" <hjl.to...@gmail.com>
Date: Sun, 13 Apr 2025 12:20:42 -0700
Subject: [PATCH] APX: Don't use red-zone with 32 GPRs and no caller-saved
 registers

Don't use red-zone when there are no caller-saved registers with 32 GPRs
since 128-byte red-zone is too small for 31 GPRs.

gcc/

	PR target/119784
	* config/i386/i386.cc (ix86_using_red_zone): Don't use red-zone
	with 32 GPRs and no caller-saved registers.

gcc/testsuite/

	PR target/119784
	* gcc.target/i386/pr119784a.c: New test.
	* gcc.target/i386/pr119784b.c: Likewise.

Signed-off-by: H.J. Lu <hjl.to...@gmail.com>
---
 gcc/config/i386/i386.cc                   |  6 ++
 gcc/testsuite/gcc.target/i386/pr119784a.c | 96 +++++++++++++++++++++++
 gcc/testsuite/gcc.target/i386/pr119784b.c | 87 ++++++++++++++++++++
 3 files changed, 189 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr119784a.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr119784b.c

diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index 4f8380c4a58..b172f716c68 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -458,6 +458,9 @@ int ix86_arch_specified;
    indirect thunk pushes the return address onto stack, destroying
    red-zone.
 
+   NB: Don't use red-zone for functions with no_caller_saved_registers
+   and 32 GPRs since 128-byte red-zone is too small for 31 GPRs.
+
    TODO: If we can reserve the first 2 WORDs, for PUSH and, another
    for CALL, in red-zone, we can allow local indirect jumps with
    indirect thunk.  */
@@ -467,6 +470,9 @@ ix86_using_red_zone (void)
 {
   return (TARGET_RED_ZONE
 	  && !TARGET_64BIT_MS_ABI
+	  && (!TARGET_APX_EGPR
+	      || (cfun->machine->call_saved_registers
+		  != TYPE_NO_CALLER_SAVED_REGISTERS))
 	  && (!cfun->machine->has_local_indirect_jump
 	      || cfun->machine->indirect_branch_type == indirect_branch_keep));
 }
diff --git a/gcc/testsuite/gcc.target/i386/pr119784a.c b/gcc/testsuite/gcc.target/i386/pr119784a.c
new file mode 100644
index 00000000000..8a119d4cc1f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr119784a.c
@@ -0,0 +1,96 @@
+/* { dg-do compile { target { *-*-linux* && lp64 } } } */
+/* { dg-options "-O2 -fno-pic -mtune=generic -mgeneral-regs-only -mapxf -mtune-ctrl=prologue_using_move,epilogue_using_move" } */
+/* Keep labels and directives ('.cfi_startproc', '.cfi_endproc').  */
+/* { dg-final { check-function-bodies "**" "" "" { target "*-*-*" } {^\t?\.}  } } */
+
+/* start must save and restore all caller saved registers.  */
+
+/*
+**start:
+**.LFB[0-9]+:
+**	.cfi_startproc
+**	subq	\$248, %rsp
+**...
+**	movq	%rax, \(%rsp\)
+**	movq	%rdx, 8\(%rsp\)
+**	movq	%rcx, 16\(%rsp\)
+**	movq	%rbx, 24\(%rsp\)
+**	movq	%rsi, 32\(%rsp\)
+**	movq	%rdi, 40\(%rsp\)
+**...
+**	movq	%rbp, 48\(%rsp\)
+**	movq	%r8, 56\(%rsp\)
+**	movq	%r9, 64\(%rsp\)
+**	movq	%r10, 72\(%rsp\)
+**	movq	%r11, 80\(%rsp\)
+**	movq	%r12, 88\(%rsp\)
+**	movq	%r13, 96\(%rsp\)
+**	movq	%r14, 104\(%rsp\)
+**	movq	%r15, 112\(%rsp\)
+**	movq	%r16, 120\(%rsp\)
+**	movq	%r17, 128\(%rsp\)
+**	movq	%r18, 136\(%rsp\)
+**	movq	%r19, 144\(%rsp\)
+**	movq	%r20, 152\(%rsp\)
+**	movq	%r21, 160\(%rsp\)
+**	movq	%r22, 168\(%rsp\)
+**	movq	%r23, 176\(%rsp\)
+**	movq	%r24, 184\(%rsp\)
+**	movq	%r25, 192\(%rsp\)
+**	movq	%r26, 200\(%rsp\)
+**	movq	%r27, 208\(%rsp\)
+**	movq	%r28, 216\(%rsp\)
+**	movq	%r29, 224\(%rsp\)
+**	movq	%r30, 232\(%rsp\)
+**	movq	%r31, 240\(%rsp\)
+**...
+**	call	\*code\(%rip\)
+**	movq	\(%rsp\), %rax
+**	movq	8\(%rsp\), %rdx
+**	movq	16\(%rsp\), %rcx
+**	movq	24\(%rsp\), %rbx
+**	movq	32\(%rsp\), %rsi
+**	movq	40\(%rsp\), %rdi
+**	movq	48\(%rsp\), %rbp
+**	movq	56\(%rsp\), %r8
+**	movq	64\(%rsp\), %r9
+**	movq	72\(%rsp\), %r10
+**	movq	80\(%rsp\), %r11
+**	movq	88\(%rsp\), %r12
+**	movq	96\(%rsp\), %r13
+**	movq	104\(%rsp\), %r14
+**	movq	112\(%rsp\), %r15
+**	movq	120\(%rsp\), %r16
+**	movq	128\(%rsp\), %r17
+**	movq	136\(%rsp\), %r18
+**	movq	144\(%rsp\), %r19
+**	movq	152\(%rsp\), %r20
+**	movq	160\(%rsp\), %r21
+**	movq	168\(%rsp\), %r22
+**	movq	176\(%rsp\), %r23
+**	movq	184\(%rsp\), %r24
+**	movq	192\(%rsp\), %r25
+**	movq	200\(%rsp\), %r26
+**	movq	208\(%rsp\), %r27
+**	movq	216\(%rsp\), %r28
+**	movq	224\(%rsp\), %r29
+**	movq	232\(%rsp\), %r30
+**	movq	240\(%rsp\), %r31
+**	addq	\$248, %rsp
+**...
+**	ret
+**	.cfi_endproc
+**...
+*/
+
+#define DONT_SAVE_REGS __attribute__((no_callee_saved_registers))
+#define SAVE_REGS __attribute__((no_caller_saved_registers))
+
+typedef DONT_SAVE_REGS void (*op_t)(void); 
+
+extern op_t code[];
+
+SAVE_REGS void start()
+{
+  code[0]();
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr119784b.c b/gcc/testsuite/gcc.target/i386/pr119784b.c
new file mode 100644
index 00000000000..c6761976ed6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr119784b.c
@@ -0,0 +1,87 @@
+/* { dg-do compile { target { *-*-linux* && x32 } } } */
+/* { dg-options "-O2 -fno-pic -mtune=generic -mgeneral-regs-only -mapxf -mtune-ctrl=prologue_using_move,epilogue_using_move" } */
+/* Keep labels and directives ('.cfi_startproc', '.cfi_endproc').  */
+/* { dg-final { check-function-bodies "**" "" "" { target "*-*-*" } {^\t?\.}  } } */
+
+/* start must save and restore all caller saved registers.  */
+
+/*
+**start:
+**.LFB[0-9]+:
+**	.cfi_startproc
+**	subl	\$248, %esp
+**...
+**	movq	%rax, \(%rsp\)
+**	movq	%rdx, 8\(%rsp\)
+**	movq	%rcx, 16\(%rsp\)
+**	movq	%rbx, 24\(%rsp\)
+**	movq	%rsi, 32\(%rsp\)
+**	movq	%rdi, 40\(%rsp\)
+**...
+**	movq	%rbp, 48\(%rsp\)
+**	movq	%r8, 56\(%rsp\)
+**	movq	%r9, 64\(%rsp\)
+**	movq	%r10, 72\(%rsp\)
+**	movq	%r11, 80\(%rsp\)
+**	movq	%r12, 88\(%rsp\)
+**	movq	%r13, 96\(%rsp\)
+**	movq	%r14, 104\(%rsp\)
+**	movq	%r15, 112\(%rsp\)
+**	movq	%r16, 120\(%rsp\)
+**	movq	%r17, 128\(%rsp\)
+**	movq	%r18, 136\(%rsp\)
+**	movq	%r19, 144\(%rsp\)
+**	movq	%r20, 152\(%rsp\)
+**	movq	%r21, 160\(%rsp\)
+**	movq	%r22, 168\(%rsp\)
+**	movq	%r23, 176\(%rsp\)
+**	movq	%r24, 184\(%rsp\)
+**	movq	%r25, 192\(%rsp\)
+**	movq	%r26, 200\(%rsp\)
+**	movq	%r27, 208\(%rsp\)
+**	movq	%r28, 216\(%rsp\)
+**	movq	%r29, 224\(%rsp\)
+**	movq	%r30, 232\(%rsp\)
+**	movq	%r31, 240\(%rsp\)
+**...
+**	movl	code\(%rip\), %ebp
+**	call	\*%rbp
+**	movq	\(%rsp\), %rax
+**	movq	8\(%rsp\), %rdx
+**	movq	16\(%rsp\), %rcx
+**	movq	24\(%rsp\), %rbx
+**	movq	32\(%rsp\), %rsi
+**	movq	40\(%rsp\), %rdi
+**	movq	48\(%rsp\), %rbp
+**	movq	56\(%rsp\), %r8
+**	movq	64\(%rsp\), %r9
+**	movq	72\(%rsp\), %r10
+**	movq	80\(%rsp\), %r11
+**	movq	88\(%rsp\), %r12
+**	movq	96\(%rsp\), %r13
+**	movq	104\(%rsp\), %r14
+**	movq	112\(%rsp\), %r15
+**	movq	120\(%rsp\), %r16
+**	movq	128\(%rsp\), %r17
+**	movq	136\(%rsp\), %r18
+**	movq	144\(%rsp\), %r19
+**	movq	152\(%rsp\), %r20
+**	movq	160\(%rsp\), %r21
+**	movq	168\(%rsp\), %r22
+**	movq	176\(%rsp\), %r23
+**	movq	184\(%rsp\), %r24
+**	movq	192\(%rsp\), %r25
+**	movq	200\(%rsp\), %r26
+**	movq	208\(%rsp\), %r27
+**	movq	216\(%rsp\), %r28
+**	movq	224\(%rsp\), %r29
+**	movq	232\(%rsp\), %r30
+**	movq	240\(%rsp\), %r31
+**	addl	\$248, %esp
+**...
+**	ret
+**	.cfi_endproc
+**...
+*/
+
+#include "pr119784a.c"
-- 
2.49.0

Reply via email to