On Mon, Apr 14, 2025 at 2:39 AM Uros Bizjak <ubiz...@gmail.com> wrote: > > On Mon, Apr 14, 2025 at 8:54 AM Hongtao Liu <crazy...@gmail.com> wrote: > > > > On Mon, Apr 14, 2025 at 7:36 AM H.J. Lu <hjl.to...@gmail.com> wrote: > > > > > > Don't use red-zone when there are no caller-saved registers and APX is > > > enabled since 128-byte red-zone is too small for 31 GPRs. > > > > > > gcc/ > > > > > > PR target/119784 > > > * config/i386/i386.cc (ix86_using_red_zone): Don't use red-zone > > > with APX and no caller-saved registers. > > > > > > gcc/testsuite/ > > > > > > PR target/119784 > > > * gcc.target/i386/pr119784a.c: New test. > > > * gcc.target/i386/pr119784b.c: Likewise. > > > > > > Signed-off-by: H.J. Lu <hjl.to...@gmail.com> > > > --- > > > gcc/config/i386/i386.cc | 6 ++ > > > gcc/testsuite/gcc.target/i386/pr119784a.c | 96 +++++++++++++++++++++++ > > > gcc/testsuite/gcc.target/i386/pr119784b.c | 87 ++++++++++++++++++++ > > > 3 files changed, 189 insertions(+) > > > create mode 100644 gcc/testsuite/gcc.target/i386/pr119784a.c > > > create mode 100644 gcc/testsuite/gcc.target/i386/pr119784b.c > > > > > > diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc > > > index 4f8380c4a58..91af239d484 100644 > > > --- a/gcc/config/i386/i386.cc > > > +++ b/gcc/config/i386/i386.cc > > > @@ -458,6 +458,9 @@ int ix86_arch_specified; > > > indirect thunk pushes the return address onto stack, destroying > > > red-zone. > > > > > > + NB: Don't use red-zone for functions with no_caller_saved_registers > > > + when APX is enabled since 128-byte red-zone is too small for 31 GPRs. > > > + > > > TODO: If we can reserve the first 2 WORDs, for PUSH and, another > > > for CALL, in red-zone, we can allow local indirect jumps with > > > indirect thunk. */ > > > @@ -467,6 +470,9 @@ ix86_using_red_zone (void) > > > { > > > return (TARGET_RED_ZONE > > > && !TARGET_64BIT_MS_ABI > > > + && (!TARGET_APX_F > > Could we use !TARGET_APX_EGPR, it's more clear.
Fixed. Here is the patch I am checking in. OK for backport to release branches? Thanks. > > Others LGTM, but please wait for a while in case Uros has a different > > opinion. > > LGTM too. > > Thanks, > Uros. > > > > + || (cfun->machine->call_saved_registers > > > + != TYPE_NO_CALLER_SAVED_REGISTERS)) > > > && (!cfun->machine->has_local_indirect_jump > > > || cfun->machine->indirect_branch_type == > > > indirect_branch_keep)); > > > } > > > diff --git a/gcc/testsuite/gcc.target/i386/pr119784a.c > > > b/gcc/testsuite/gcc.target/i386/pr119784a.c > > > new file mode 100644 > > > index 00000000000..8a119d4cc1f > > > --- /dev/null > > > +++ b/gcc/testsuite/gcc.target/i386/pr119784a.c > > > @@ -0,0 +1,96 @@ > > > +/* { dg-do compile { target { *-*-linux* && lp64 } } } */ > > > +/* { dg-options "-O2 -fno-pic -mtune=generic -mgeneral-regs-only -mapxf > > > -mtune-ctrl=prologue_using_move,epilogue_using_move" } */ > > > +/* Keep labels and directives ('.cfi_startproc', '.cfi_endproc'). */ > > > +/* { dg-final { check-function-bodies "**" "" "" { target "*-*-*" } > > > {^\t?\.} } } */ > > > + > > > +/* start must save and restore all caller saved registers. */ > > > + > > > +/* > > > +**start: > > > +**.LFB[0-9]+: > > > +** .cfi_startproc > > > +** subq \$248, %rsp > > > +**... > > > +** movq %rax, \(%rsp\) > > > +** movq %rdx, 8\(%rsp\) > > > +** movq %rcx, 16\(%rsp\) > > > +** movq %rbx, 24\(%rsp\) > > > +** movq %rsi, 32\(%rsp\) > > > +** movq %rdi, 40\(%rsp\) > > > +**... > > > +** movq %rbp, 48\(%rsp\) > > > +** movq %r8, 56\(%rsp\) > > > +** movq %r9, 64\(%rsp\) > > > +** movq %r10, 72\(%rsp\) > > > +** movq %r11, 80\(%rsp\) > > > +** movq %r12, 88\(%rsp\) > > > +** movq %r13, 96\(%rsp\) > > > +** movq %r14, 104\(%rsp\) > > > +** movq %r15, 112\(%rsp\) > > > +** movq %r16, 120\(%rsp\) > > > +** movq %r17, 128\(%rsp\) > > > +** movq %r18, 136\(%rsp\) > > > +** movq %r19, 144\(%rsp\) > > > +** movq %r20, 152\(%rsp\) > > > +** movq %r21, 160\(%rsp\) > > > +** movq %r22, 168\(%rsp\) > > > +** movq %r23, 176\(%rsp\) > > > +** movq %r24, 184\(%rsp\) > > > +** movq %r25, 192\(%rsp\) > > > +** movq %r26, 200\(%rsp\) > > > +** movq %r27, 208\(%rsp\) > > > +** movq %r28, 216\(%rsp\) > > > +** movq %r29, 224\(%rsp\) > > > +** movq %r30, 232\(%rsp\) > > > +** movq %r31, 240\(%rsp\) > > > +**... > > > +** call \*code\(%rip\) > > > +** movq \(%rsp\), %rax > > > +** movq 8\(%rsp\), %rdx > > > +** movq 16\(%rsp\), %rcx > > > +** movq 24\(%rsp\), %rbx > > > +** movq 32\(%rsp\), %rsi > > > +** movq 40\(%rsp\), %rdi > > > +** movq 48\(%rsp\), %rbp > > > +** movq 56\(%rsp\), %r8 > > > +** movq 64\(%rsp\), %r9 > > > +** movq 72\(%rsp\), %r10 > > > +** movq 80\(%rsp\), %r11 > > > +** movq 88\(%rsp\), %r12 > > > +** movq 96\(%rsp\), %r13 > > > +** movq 104\(%rsp\), %r14 > > > +** movq 112\(%rsp\), %r15 > > > +** movq 120\(%rsp\), %r16 > > > +** movq 128\(%rsp\), %r17 > > > +** movq 136\(%rsp\), %r18 > > > +** movq 144\(%rsp\), %r19 > > > +** movq 152\(%rsp\), %r20 > > > +** movq 160\(%rsp\), %r21 > > > +** movq 168\(%rsp\), %r22 > > > +** movq 176\(%rsp\), %r23 > > > +** movq 184\(%rsp\), %r24 > > > +** movq 192\(%rsp\), %r25 > > > +** movq 200\(%rsp\), %r26 > > > +** movq 208\(%rsp\), %r27 > > > +** movq 216\(%rsp\), %r28 > > > +** movq 224\(%rsp\), %r29 > > > +** movq 232\(%rsp\), %r30 > > > +** movq 240\(%rsp\), %r31 > > > +** addq \$248, %rsp > > > +**... > > > +** ret > > > +** .cfi_endproc > > > +**... > > > +*/ > > > + > > > +#define DONT_SAVE_REGS __attribute__((no_callee_saved_registers)) > > > +#define SAVE_REGS __attribute__((no_caller_saved_registers)) > > > + > > > +typedef DONT_SAVE_REGS void (*op_t)(void); > > > + > > > +extern op_t code[]; > > > + > > > +SAVE_REGS void start() > > > +{ > > > + code[0](); > > > +} > > > diff --git a/gcc/testsuite/gcc.target/i386/pr119784b.c > > > b/gcc/testsuite/gcc.target/i386/pr119784b.c > > > new file mode 100644 > > > index 00000000000..c6761976ed6 > > > --- /dev/null > > > +++ b/gcc/testsuite/gcc.target/i386/pr119784b.c > > > @@ -0,0 +1,87 @@ > > > +/* { dg-do compile { target { *-*-linux* && x32 } } } */ > > > +/* { dg-options "-O2 -fno-pic -mtune=generic -mgeneral-regs-only -mapxf > > > -mtune-ctrl=prologue_using_move,epilogue_using_move" } */ > > > +/* Keep labels and directives ('.cfi_startproc', '.cfi_endproc'). */ > > > +/* { dg-final { check-function-bodies "**" "" "" { target "*-*-*" } > > > {^\t?\.} } } */ > > > + > > > +/* start must save and restore all caller saved registers. */ > > > + > > > +/* > > > +**start: > > > +**.LFB[0-9]+: > > > +** .cfi_startproc > > > +** subl \$248, %esp > > > +**... > > > +** movq %rax, \(%rsp\) > > > +** movq %rdx, 8\(%rsp\) > > > +** movq %rcx, 16\(%rsp\) > > > +** movq %rbx, 24\(%rsp\) > > > +** movq %rsi, 32\(%rsp\) > > > +** movq %rdi, 40\(%rsp\) > > > +**... > > > +** movq %rbp, 48\(%rsp\) > > > +** movq %r8, 56\(%rsp\) > > > +** movq %r9, 64\(%rsp\) > > > +** movq %r10, 72\(%rsp\) > > > +** movq %r11, 80\(%rsp\) > > > +** movq %r12, 88\(%rsp\) > > > +** movq %r13, 96\(%rsp\) > > > +** movq %r14, 104\(%rsp\) > > > +** movq %r15, 112\(%rsp\) > > > +** movq %r16, 120\(%rsp\) > > > +** movq %r17, 128\(%rsp\) > > > +** movq %r18, 136\(%rsp\) > > > +** movq %r19, 144\(%rsp\) > > > +** movq %r20, 152\(%rsp\) > > > +** movq %r21, 160\(%rsp\) > > > +** movq %r22, 168\(%rsp\) > > > +** movq %r23, 176\(%rsp\) > > > +** movq %r24, 184\(%rsp\) > > > +** movq %r25, 192\(%rsp\) > > > +** movq %r26, 200\(%rsp\) > > > +** movq %r27, 208\(%rsp\) > > > +** movq %r28, 216\(%rsp\) > > > +** movq %r29, 224\(%rsp\) > > > +** movq %r30, 232\(%rsp\) > > > +** movq %r31, 240\(%rsp\) > > > +**... > > > +** movl code\(%rip\), %ebp > > > +** call \*%rbp > > > +** movq \(%rsp\), %rax > > > +** movq 8\(%rsp\), %rdx > > > +** movq 16\(%rsp\), %rcx > > > +** movq 24\(%rsp\), %rbx > > > +** movq 32\(%rsp\), %rsi > > > +** movq 40\(%rsp\), %rdi > > > +** movq 48\(%rsp\), %rbp > > > +** movq 56\(%rsp\), %r8 > > > +** movq 64\(%rsp\), %r9 > > > +** movq 72\(%rsp\), %r10 > > > +** movq 80\(%rsp\), %r11 > > > +** movq 88\(%rsp\), %r12 > > > +** movq 96\(%rsp\), %r13 > > > +** movq 104\(%rsp\), %r14 > > > +** movq 112\(%rsp\), %r15 > > > +** movq 120\(%rsp\), %r16 > > > +** movq 128\(%rsp\), %r17 > > > +** movq 136\(%rsp\), %r18 > > > +** movq 144\(%rsp\), %r19 > > > +** movq 152\(%rsp\), %r20 > > > +** movq 160\(%rsp\), %r21 > > > +** movq 168\(%rsp\), %r22 > > > +** movq 176\(%rsp\), %r23 > > > +** movq 184\(%rsp\), %r24 > > > +** movq 192\(%rsp\), %r25 > > > +** movq 200\(%rsp\), %r26 > > > +** movq 208\(%rsp\), %r27 > > > +** movq 216\(%rsp\), %r28 > > > +** movq 224\(%rsp\), %r29 > > > +** movq 232\(%rsp\), %r30 > > > +** movq 240\(%rsp\), %r31 > > > +** addl \$248, %esp > > > +**... > > > +** ret > > > +** .cfi_endproc > > > +**... > > > +*/ > > > + > > > +#include "pr119784a.c" > > > -- > > > 2.49.0 > > > > > > > > > -- > > BR, > > Hongtao -- H.J.
From dffa12463f5b551e9267634718c0d2af00119ee8 Mon Sep 17 00:00:00 2001 From: "H.J. Lu" <hjl.to...@gmail.com> Date: Sun, 13 Apr 2025 12:20:42 -0700 Subject: [PATCH] APX: Don't use red-zone with 32 GPRs and no caller-saved registers Don't use red-zone when there are no caller-saved registers with 32 GPRs since 128-byte red-zone is too small for 31 GPRs. gcc/ PR target/119784 * config/i386/i386.cc (ix86_using_red_zone): Don't use red-zone with 32 GPRs and no caller-saved registers. gcc/testsuite/ PR target/119784 * gcc.target/i386/pr119784a.c: New test. * gcc.target/i386/pr119784b.c: Likewise. Signed-off-by: H.J. Lu <hjl.to...@gmail.com> --- gcc/config/i386/i386.cc | 6 ++ gcc/testsuite/gcc.target/i386/pr119784a.c | 96 +++++++++++++++++++++++ gcc/testsuite/gcc.target/i386/pr119784b.c | 87 ++++++++++++++++++++ 3 files changed, 189 insertions(+) create mode 100644 gcc/testsuite/gcc.target/i386/pr119784a.c create mode 100644 gcc/testsuite/gcc.target/i386/pr119784b.c diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc index 4f8380c4a58..b172f716c68 100644 --- a/gcc/config/i386/i386.cc +++ b/gcc/config/i386/i386.cc @@ -458,6 +458,9 @@ int ix86_arch_specified; indirect thunk pushes the return address onto stack, destroying red-zone. + NB: Don't use red-zone for functions with no_caller_saved_registers + and 32 GPRs since 128-byte red-zone is too small for 31 GPRs. + TODO: If we can reserve the first 2 WORDs, for PUSH and, another for CALL, in red-zone, we can allow local indirect jumps with indirect thunk. */ @@ -467,6 +470,9 @@ ix86_using_red_zone (void) { return (TARGET_RED_ZONE && !TARGET_64BIT_MS_ABI + && (!TARGET_APX_EGPR + || (cfun->machine->call_saved_registers + != TYPE_NO_CALLER_SAVED_REGISTERS)) && (!cfun->machine->has_local_indirect_jump || cfun->machine->indirect_branch_type == indirect_branch_keep)); } diff --git a/gcc/testsuite/gcc.target/i386/pr119784a.c b/gcc/testsuite/gcc.target/i386/pr119784a.c new file mode 100644 index 00000000000..8a119d4cc1f --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr119784a.c @@ -0,0 +1,96 @@ +/* { dg-do compile { target { *-*-linux* && lp64 } } } */ +/* { dg-options "-O2 -fno-pic -mtune=generic -mgeneral-regs-only -mapxf -mtune-ctrl=prologue_using_move,epilogue_using_move" } */ +/* Keep labels and directives ('.cfi_startproc', '.cfi_endproc'). */ +/* { dg-final { check-function-bodies "**" "" "" { target "*-*-*" } {^\t?\.} } } */ + +/* start must save and restore all caller saved registers. */ + +/* +**start: +**.LFB[0-9]+: +** .cfi_startproc +** subq \$248, %rsp +**... +** movq %rax, \(%rsp\) +** movq %rdx, 8\(%rsp\) +** movq %rcx, 16\(%rsp\) +** movq %rbx, 24\(%rsp\) +** movq %rsi, 32\(%rsp\) +** movq %rdi, 40\(%rsp\) +**... +** movq %rbp, 48\(%rsp\) +** movq %r8, 56\(%rsp\) +** movq %r9, 64\(%rsp\) +** movq %r10, 72\(%rsp\) +** movq %r11, 80\(%rsp\) +** movq %r12, 88\(%rsp\) +** movq %r13, 96\(%rsp\) +** movq %r14, 104\(%rsp\) +** movq %r15, 112\(%rsp\) +** movq %r16, 120\(%rsp\) +** movq %r17, 128\(%rsp\) +** movq %r18, 136\(%rsp\) +** movq %r19, 144\(%rsp\) +** movq %r20, 152\(%rsp\) +** movq %r21, 160\(%rsp\) +** movq %r22, 168\(%rsp\) +** movq %r23, 176\(%rsp\) +** movq %r24, 184\(%rsp\) +** movq %r25, 192\(%rsp\) +** movq %r26, 200\(%rsp\) +** movq %r27, 208\(%rsp\) +** movq %r28, 216\(%rsp\) +** movq %r29, 224\(%rsp\) +** movq %r30, 232\(%rsp\) +** movq %r31, 240\(%rsp\) +**... +** call \*code\(%rip\) +** movq \(%rsp\), %rax +** movq 8\(%rsp\), %rdx +** movq 16\(%rsp\), %rcx +** movq 24\(%rsp\), %rbx +** movq 32\(%rsp\), %rsi +** movq 40\(%rsp\), %rdi +** movq 48\(%rsp\), %rbp +** movq 56\(%rsp\), %r8 +** movq 64\(%rsp\), %r9 +** movq 72\(%rsp\), %r10 +** movq 80\(%rsp\), %r11 +** movq 88\(%rsp\), %r12 +** movq 96\(%rsp\), %r13 +** movq 104\(%rsp\), %r14 +** movq 112\(%rsp\), %r15 +** movq 120\(%rsp\), %r16 +** movq 128\(%rsp\), %r17 +** movq 136\(%rsp\), %r18 +** movq 144\(%rsp\), %r19 +** movq 152\(%rsp\), %r20 +** movq 160\(%rsp\), %r21 +** movq 168\(%rsp\), %r22 +** movq 176\(%rsp\), %r23 +** movq 184\(%rsp\), %r24 +** movq 192\(%rsp\), %r25 +** movq 200\(%rsp\), %r26 +** movq 208\(%rsp\), %r27 +** movq 216\(%rsp\), %r28 +** movq 224\(%rsp\), %r29 +** movq 232\(%rsp\), %r30 +** movq 240\(%rsp\), %r31 +** addq \$248, %rsp +**... +** ret +** .cfi_endproc +**... +*/ + +#define DONT_SAVE_REGS __attribute__((no_callee_saved_registers)) +#define SAVE_REGS __attribute__((no_caller_saved_registers)) + +typedef DONT_SAVE_REGS void (*op_t)(void); + +extern op_t code[]; + +SAVE_REGS void start() +{ + code[0](); +} diff --git a/gcc/testsuite/gcc.target/i386/pr119784b.c b/gcc/testsuite/gcc.target/i386/pr119784b.c new file mode 100644 index 00000000000..c6761976ed6 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr119784b.c @@ -0,0 +1,87 @@ +/* { dg-do compile { target { *-*-linux* && x32 } } } */ +/* { dg-options "-O2 -fno-pic -mtune=generic -mgeneral-regs-only -mapxf -mtune-ctrl=prologue_using_move,epilogue_using_move" } */ +/* Keep labels and directives ('.cfi_startproc', '.cfi_endproc'). */ +/* { dg-final { check-function-bodies "**" "" "" { target "*-*-*" } {^\t?\.} } } */ + +/* start must save and restore all caller saved registers. */ + +/* +**start: +**.LFB[0-9]+: +** .cfi_startproc +** subl \$248, %esp +**... +** movq %rax, \(%rsp\) +** movq %rdx, 8\(%rsp\) +** movq %rcx, 16\(%rsp\) +** movq %rbx, 24\(%rsp\) +** movq %rsi, 32\(%rsp\) +** movq %rdi, 40\(%rsp\) +**... +** movq %rbp, 48\(%rsp\) +** movq %r8, 56\(%rsp\) +** movq %r9, 64\(%rsp\) +** movq %r10, 72\(%rsp\) +** movq %r11, 80\(%rsp\) +** movq %r12, 88\(%rsp\) +** movq %r13, 96\(%rsp\) +** movq %r14, 104\(%rsp\) +** movq %r15, 112\(%rsp\) +** movq %r16, 120\(%rsp\) +** movq %r17, 128\(%rsp\) +** movq %r18, 136\(%rsp\) +** movq %r19, 144\(%rsp\) +** movq %r20, 152\(%rsp\) +** movq %r21, 160\(%rsp\) +** movq %r22, 168\(%rsp\) +** movq %r23, 176\(%rsp\) +** movq %r24, 184\(%rsp\) +** movq %r25, 192\(%rsp\) +** movq %r26, 200\(%rsp\) +** movq %r27, 208\(%rsp\) +** movq %r28, 216\(%rsp\) +** movq %r29, 224\(%rsp\) +** movq %r30, 232\(%rsp\) +** movq %r31, 240\(%rsp\) +**... +** movl code\(%rip\), %ebp +** call \*%rbp +** movq \(%rsp\), %rax +** movq 8\(%rsp\), %rdx +** movq 16\(%rsp\), %rcx +** movq 24\(%rsp\), %rbx +** movq 32\(%rsp\), %rsi +** movq 40\(%rsp\), %rdi +** movq 48\(%rsp\), %rbp +** movq 56\(%rsp\), %r8 +** movq 64\(%rsp\), %r9 +** movq 72\(%rsp\), %r10 +** movq 80\(%rsp\), %r11 +** movq 88\(%rsp\), %r12 +** movq 96\(%rsp\), %r13 +** movq 104\(%rsp\), %r14 +** movq 112\(%rsp\), %r15 +** movq 120\(%rsp\), %r16 +** movq 128\(%rsp\), %r17 +** movq 136\(%rsp\), %r18 +** movq 144\(%rsp\), %r19 +** movq 152\(%rsp\), %r20 +** movq 160\(%rsp\), %r21 +** movq 168\(%rsp\), %r22 +** movq 176\(%rsp\), %r23 +** movq 184\(%rsp\), %r24 +** movq 192\(%rsp\), %r25 +** movq 200\(%rsp\), %r26 +** movq 208\(%rsp\), %r27 +** movq 216\(%rsp\), %r28 +** movq 224\(%rsp\), %r29 +** movq 232\(%rsp\), %r30 +** movq 240\(%rsp\), %r31 +** addl \$248, %esp +**... +** ret +** .cfi_endproc +**... +*/ + +#include "pr119784a.c" -- 2.49.0