> I compared GCC master branch bootstrap and test times on a slow machine > with 6.6 Linux kernels compiled with the original GCC 13 and the GCC 13 > with the backported patch. The performance data isn't precise since the > measurements were done on different days with different GCC sources under > different 6.6 kernel versions. > > GCC master branch build time in seconds: > > before after improvement > 30043.75user 30013.16user 0% > 1274.85system 1243.72system 2.4% > > GCC master branch test time in seconds (new tests added): > > before after improvement > 216035.90user 216547.51user 0 > 27365.51system 26658.54system 2.6%
It is interesting - the system time difference comes from smaller binary? Is the difference any significant? > > gcc/ > > PR target/38534 > * config/i386/i386-options.cc (ix86_set_func_type): Don't > save and restore callee saved registers for a noreturn function > with nothrow or compiled with -fno-exceptions. In general this looks like good thing to do. I wonder if that is not something middle-end should understand for all targets. Also I wonder about asynchronous stack unwinding. If we want to unwind stack from interrupt then we may need some registers to be saved (like base pointer). Honza > > gcc/testsuite/ > > PR target/38534 > * gcc.target/i386/pr38534-1.c: New file. > * gcc.target/i386/pr38534-2.c: Likewise. > * gcc.target/i386/pr38534-3.c: Likewise. > * gcc.target/i386/pr38534-4.c: Likewise. > * gcc.target/i386/stack-check-17.c: Updated. > --- > gcc/config/i386/i386-options.cc | 16 ++++++++++-- > gcc/testsuite/gcc.target/i386/pr38534-1.c | 26 +++++++++++++++++++ > gcc/testsuite/gcc.target/i386/pr38534-2.c | 18 +++++++++++++ > gcc/testsuite/gcc.target/i386/pr38534-3.c | 19 ++++++++++++++ > gcc/testsuite/gcc.target/i386/pr38534-4.c | 18 +++++++++++++ > .../gcc.target/i386/stack-check-17.c | 19 +++++--------- > 6 files changed, 102 insertions(+), 14 deletions(-) > create mode 100644 gcc/testsuite/gcc.target/i386/pr38534-1.c > create mode 100644 gcc/testsuite/gcc.target/i386/pr38534-2.c > create mode 100644 gcc/testsuite/gcc.target/i386/pr38534-3.c > create mode 100644 gcc/testsuite/gcc.target/i386/pr38534-4.c > > diff --git a/gcc/config/i386/i386-options.cc b/gcc/config/i386/i386-options.cc > index 0cdea30599e..f965568947c 100644 > --- a/gcc/config/i386/i386-options.cc > +++ b/gcc/config/i386/i386-options.cc > @@ -3371,9 +3371,21 @@ ix86_simd_clone_adjust (struct cgraph_node *node) > static void > ix86_set_func_type (tree fndecl) > { > + /* No need to save and restore callee-saved registers for a noreturn > + function with nothrow or compiled with -fno-exceptions. > + > + NB: Don't use TREE_THIS_VOLATILE to check if this is a noreturn > + function. The local-pure-const pass turns an interrupt function > + into a noreturn function by setting TREE_THIS_VOLATILE. Normally > + the local-pure-const pass is run after ix86_set_func_type is called. > + When the local-pure-const pass is enabled for LTO, the interrupt > + function is marked as noreturn in the IR output, which leads the > + incompatible attribute error in LTO1. */ > bool has_no_callee_saved_registers > - = lookup_attribute ("no_callee_saved_registers", > - TYPE_ATTRIBUTES (TREE_TYPE (fndecl))); > + = (((TREE_NOTHROW (fndecl) || !flag_exceptions) > + && lookup_attribute ("noreturn", DECL_ATTRIBUTES (fndecl))) > + || lookup_attribute ("no_callee_saved_registers", > + TYPE_ATTRIBUTES (TREE_TYPE (fndecl)))); > > if (cfun->machine->func_type == TYPE_UNKNOWN) > { > diff --git a/gcc/testsuite/gcc.target/i386/pr38534-1.c > b/gcc/testsuite/gcc.target/i386/pr38534-1.c > new file mode 100644 > index 00000000000..9297959e759 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/pr38534-1.c > @@ -0,0 +1,26 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -mtune-ctrl=^prologue_using_move,^epilogue_using_move" > } */ > + > +#define ARRAY_SIZE 256 > + > +extern int array[ARRAY_SIZE][ARRAY_SIZE][ARRAY_SIZE]; > +extern int value (int, int, int) > +#ifndef __x86_64__ > +__attribute__ ((regparm(3))) > +#endif > +; > + > +void > +__attribute__((noreturn)) > +no_return_to_caller (void) > +{ > + unsigned i, j, k; > + for (i = ARRAY_SIZE; i > 0; --i) > + for (j = ARRAY_SIZE; j > 0; --j) > + for (k = ARRAY_SIZE; k > 0; --k) > + array[i - 1][j - 1][k - 1] = value (i, j, k); > + while (1); > +} > + > +/* { dg-final { scan-assembler-not "push" } } */ > +/* { dg-final { scan-assembler-not "pop" } } */ > diff --git a/gcc/testsuite/gcc.target/i386/pr38534-2.c > b/gcc/testsuite/gcc.target/i386/pr38534-2.c > new file mode 100644 > index 00000000000..1fb01363273 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/pr38534-2.c > @@ -0,0 +1,18 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -mtune-ctrl=^prologue_using_move,^epilogue_using_move" > } */ > + > +extern void bar (void) __attribute__ ((no_callee_saved_registers)); > +extern void fn (void) __attribute__ ((noreturn)); > + > +__attribute__ ((noreturn)) > +void > +foo (void) > +{ > + bar (); > + fn (); > +} > + > +/* { dg-final { scan-assembler-not "push" } } */ > +/* { dg-final { scan-assembler-not "pop" } } */ > +/* { dg-final { scan-assembler-not "jmp\[\\t \]+_?bar" } } */ > +/* { dg-final { scan-assembler "call\[\\t \]+_?bar" } } */ > diff --git a/gcc/testsuite/gcc.target/i386/pr38534-3.c > b/gcc/testsuite/gcc.target/i386/pr38534-3.c > new file mode 100644 > index 00000000000..87fc35f3fe9 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/pr38534-3.c > @@ -0,0 +1,19 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -mtune-ctrl=^prologue_using_move,^epilogue_using_move" > } */ > + > +typedef void (*fn_t) (void) __attribute__ ((no_callee_saved_registers)); > +extern fn_t bar; > +extern void fn (void) __attribute__ ((noreturn)); > + > +__attribute__ ((noreturn)) > +void > +foo (void) > +{ > + bar (); > + fn (); > +} > + > +/* { dg-final { scan-assembler-not "push" } } */ > +/* { dg-final { scan-assembler-not "pop" } } */ > +/* { dg-final { scan-assembler-not "jmp" } } */ > +/* { dg-final { scan-assembler "call\[\\t \]+" } } */ > diff --git a/gcc/testsuite/gcc.target/i386/pr38534-4.c > b/gcc/testsuite/gcc.target/i386/pr38534-4.c > new file mode 100644 > index 00000000000..561ebeef194 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/pr38534-4.c > @@ -0,0 +1,18 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -mtune-ctrl=^prologue_using_move,^epilogue_using_move" > } */ > + > +typedef void (*fn_t) (void) __attribute__ ((no_callee_saved_registers)); > +extern void fn (void) __attribute__ ((noreturn)); > + > +__attribute__ ((noreturn)) > +void > +foo (fn_t bar) > +{ > + bar (); > + fn (); > +} > + > +/* { dg-final { scan-assembler-not "push" } } */ > +/* { dg-final { scan-assembler-not "pop" } } */ > +/* { dg-final { scan-assembler-not "jmp" } } */ > +/* { dg-final { scan-assembler "call\[\\t \]+" } } */ > diff --git a/gcc/testsuite/gcc.target/i386/stack-check-17.c > b/gcc/testsuite/gcc.target/i386/stack-check-17.c > index b3e41cb3d25..061484e1319 100644 > --- a/gcc/testsuite/gcc.target/i386/stack-check-17.c > +++ b/gcc/testsuite/gcc.target/i386/stack-check-17.c > @@ -23,19 +23,14 @@ f3 (void) > /* Verify no explicit probes. */ > /* { dg-final { scan-assembler-not "or\[ql\]" } } */ > > -/* We also want to verify we did not use a push/pop sequence > - to probe *sp as the callee register saves are sufficient > - to probe *sp. > - > - y0/y1 are live across the call and thus must be allocated > +/* y0/y1 are live across the call and thus must be allocated > into either a stack slot or callee saved register. The former > would be rather dumb. So assume it does not happen. > > - So search for two/four pushes for the callee register saves/argument > pushes > - (plus one for the PIC register if needed on ia32) and no pops (since the > - function has no reachable epilogue). */ > -/* { dg-final { scan-assembler-times "push\[ql\]" 2 { target { ! ia32 } } } > } */ > -/* { dg-final { scan-assembler-times "push\[ql\]" 4 { target { ia32 && > nonpic } } } } */ > -/* { dg-final { scan-assembler-times "push\[ql\]" 5 { target { ia32 && { ! > nonpic } } } } } */ > -/* { dg-final { scan-assembler-not "pop" } } */ > + So search for a push/pop sequence for stack probe and 2 argument > + pushes on ia32. There is no need to save and restore the PIC > + register on ia32 for a noreturn function. */ > +/* { dg-final { scan-assembler-times "push\[ql\]" 1 { target { ! ia32 } } } > } */ > +/* { dg-final { scan-assembler-times "push\[ql\]" 3 { target ia32 } } } */ > +/* { dg-final { scan-assembler-times "pop" 1 } } */ > > -- > 2.43.0 >