> I compared GCC master branch bootstrap and test times on a slow machine
> with 6.6 Linux kernels compiled with the original GCC 13 and the GCC 13
> with the backported patch.  The performance data isn't precise since the
> measurements were done on different days with different GCC sources under
> different 6.6 kernel versions.
> 
> GCC master branch build time in seconds:
> 
> before                after                  improvement
> 30043.75user          30013.16user           0%
> 1274.85system         1243.72system          2.4%
> 
> GCC master branch test time in seconds (new tests added):
> 
> before                after                  improvement
> 216035.90user         216547.51user          0
> 27365.51system        26658.54system         2.6%

It is interesting - the system time difference comes from smaller
binary?  Is the difference any significant?
> 
> gcc/
> 
>       PR target/38534
>       * config/i386/i386-options.cc (ix86_set_func_type): Don't
>       save and restore callee saved registers for a noreturn function
>       with nothrow or compiled with -fno-exceptions.

In general this looks like good thing to do.  I wonder if that is not
something middle-end should understand for all targets.
Also I wonder about asynchronous stack unwinding.  If we want to unwind
stack from interrupt then we may need some registers to be saved (like
base pointer).

Honza
> 
> gcc/testsuite/
> 
>       PR target/38534
>       * gcc.target/i386/pr38534-1.c: New file.
>       * gcc.target/i386/pr38534-2.c: Likewise.
>       * gcc.target/i386/pr38534-3.c: Likewise.
>       * gcc.target/i386/pr38534-4.c: Likewise.
>       * gcc.target/i386/stack-check-17.c: Updated.
> ---
>  gcc/config/i386/i386-options.cc               | 16 ++++++++++--
>  gcc/testsuite/gcc.target/i386/pr38534-1.c     | 26 +++++++++++++++++++
>  gcc/testsuite/gcc.target/i386/pr38534-2.c     | 18 +++++++++++++
>  gcc/testsuite/gcc.target/i386/pr38534-3.c     | 19 ++++++++++++++
>  gcc/testsuite/gcc.target/i386/pr38534-4.c     | 18 +++++++++++++
>  .../gcc.target/i386/stack-check-17.c          | 19 +++++---------
>  6 files changed, 102 insertions(+), 14 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr38534-1.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr38534-2.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr38534-3.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr38534-4.c
> 
> diff --git a/gcc/config/i386/i386-options.cc b/gcc/config/i386/i386-options.cc
> index 0cdea30599e..f965568947c 100644
> --- a/gcc/config/i386/i386-options.cc
> +++ b/gcc/config/i386/i386-options.cc
> @@ -3371,9 +3371,21 @@ ix86_simd_clone_adjust (struct cgraph_node *node)
>  static void
>  ix86_set_func_type (tree fndecl)
>  {
> +  /* No need to save and restore callee-saved registers for a noreturn
> +     function with nothrow or compiled with -fno-exceptions.
> +
> +     NB: Don't use TREE_THIS_VOLATILE to check if this is a noreturn
> +     function.  The local-pure-const pass turns an interrupt function
> +     into a noreturn function by setting TREE_THIS_VOLATILE.  Normally
> +     the local-pure-const pass is run after ix86_set_func_type is called.
> +     When the local-pure-const pass is enabled for LTO, the interrupt
> +     function is marked as noreturn in the IR output, which leads the
> +     incompatible attribute error in LTO1.  */
>    bool has_no_callee_saved_registers
> -    = lookup_attribute ("no_callee_saved_registers",
> -                     TYPE_ATTRIBUTES (TREE_TYPE (fndecl)));
> +    = (((TREE_NOTHROW (fndecl) || !flag_exceptions)
> +     && lookup_attribute ("noreturn", DECL_ATTRIBUTES (fndecl)))
> +       || lookup_attribute ("no_callee_saved_registers",
> +                         TYPE_ATTRIBUTES (TREE_TYPE (fndecl))));
>  
>    if (cfun->machine->func_type == TYPE_UNKNOWN)
>      {
> diff --git a/gcc/testsuite/gcc.target/i386/pr38534-1.c 
> b/gcc/testsuite/gcc.target/i386/pr38534-1.c
> new file mode 100644
> index 00000000000..9297959e759
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr38534-1.c
> @@ -0,0 +1,26 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -mtune-ctrl=^prologue_using_move,^epilogue_using_move" 
> } */
> +
> +#define ARRAY_SIZE 256
> +
> +extern int array[ARRAY_SIZE][ARRAY_SIZE][ARRAY_SIZE];
> +extern int value (int, int, int)
> +#ifndef __x86_64__
> +__attribute__ ((regparm(3)))
> +#endif
> +;
> +
> +void
> +__attribute__((noreturn))
> +no_return_to_caller (void)
> +{
> +  unsigned i, j, k;
> +  for (i = ARRAY_SIZE; i > 0; --i)
> +    for (j = ARRAY_SIZE; j > 0; --j)
> +      for (k = ARRAY_SIZE; k > 0; --k)
> +     array[i - 1][j - 1][k - 1] = value (i, j, k);
> +  while (1);
> +}
> +
> +/* { dg-final { scan-assembler-not "push" } } */
> +/* { dg-final { scan-assembler-not "pop" } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pr38534-2.c 
> b/gcc/testsuite/gcc.target/i386/pr38534-2.c
> new file mode 100644
> index 00000000000..1fb01363273
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr38534-2.c
> @@ -0,0 +1,18 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -mtune-ctrl=^prologue_using_move,^epilogue_using_move" 
> } */
> +
> +extern void bar (void) __attribute__ ((no_callee_saved_registers));
> +extern void fn (void) __attribute__ ((noreturn));
> +
> +__attribute__ ((noreturn))
> +void
> +foo (void)
> +{
> +  bar ();
> +  fn ();
> +}
> +
> +/* { dg-final { scan-assembler-not "push" } } */
> +/* { dg-final { scan-assembler-not "pop" } } */
> +/* { dg-final { scan-assembler-not "jmp\[\\t \]+_?bar" } } */
> +/* { dg-final { scan-assembler "call\[\\t \]+_?bar" } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pr38534-3.c 
> b/gcc/testsuite/gcc.target/i386/pr38534-3.c
> new file mode 100644
> index 00000000000..87fc35f3fe9
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr38534-3.c
> @@ -0,0 +1,19 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -mtune-ctrl=^prologue_using_move,^epilogue_using_move" 
> } */
> +
> +typedef void (*fn_t) (void) __attribute__ ((no_callee_saved_registers));
> +extern fn_t bar;
> +extern void fn (void) __attribute__ ((noreturn));
> +
> +__attribute__ ((noreturn))
> +void
> +foo (void)
> +{
> +  bar ();
> +  fn ();
> +}
> +
> +/* { dg-final { scan-assembler-not "push" } } */
> +/* { dg-final { scan-assembler-not "pop" } } */
> +/* { dg-final { scan-assembler-not "jmp" } } */
> +/* { dg-final { scan-assembler "call\[\\t \]+" } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pr38534-4.c 
> b/gcc/testsuite/gcc.target/i386/pr38534-4.c
> new file mode 100644
> index 00000000000..561ebeef194
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr38534-4.c
> @@ -0,0 +1,18 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -mtune-ctrl=^prologue_using_move,^epilogue_using_move" 
> } */
> +
> +typedef void (*fn_t) (void) __attribute__ ((no_callee_saved_registers));
> +extern void fn (void) __attribute__ ((noreturn));
> +
> +__attribute__ ((noreturn))
> +void
> +foo (fn_t bar)
> +{
> +  bar ();
> +  fn ();
> +}
> +
> +/* { dg-final { scan-assembler-not "push" } } */
> +/* { dg-final { scan-assembler-not "pop" } } */
> +/* { dg-final { scan-assembler-not "jmp" } } */
> +/* { dg-final { scan-assembler "call\[\\t \]+" } } */
> diff --git a/gcc/testsuite/gcc.target/i386/stack-check-17.c 
> b/gcc/testsuite/gcc.target/i386/stack-check-17.c
> index b3e41cb3d25..061484e1319 100644
> --- a/gcc/testsuite/gcc.target/i386/stack-check-17.c
> +++ b/gcc/testsuite/gcc.target/i386/stack-check-17.c
> @@ -23,19 +23,14 @@ f3 (void)
>  /* Verify no explicit probes.  */
>  /* { dg-final { scan-assembler-not "or\[ql\]" } } */
>  
> -/* We also want to verify we did not use a push/pop sequence
> -   to probe *sp as the callee register saves are sufficient
> -   to probe *sp.
> -
> -   y0/y1 are live across the call and thus must be allocated
> +/* y0/y1 are live across the call and thus must be allocated
>     into either a stack slot or callee saved register.  The former
>     would be rather dumb.  So assume it does not happen.
>  
> -   So search for two/four pushes for the callee register saves/argument 
> pushes
> -   (plus one for the PIC register if needed on ia32) and no pops (since the
> -   function has no reachable epilogue).  */
> -/* { dg-final { scan-assembler-times "push\[ql\]" 2 { target { ! ia32 } } } 
> }  */
> -/* { dg-final { scan-assembler-times "push\[ql\]" 4 { target { ia32 && 
> nonpic } } } }  */
> -/* { dg-final { scan-assembler-times "push\[ql\]" 5 { target { ia32 && { ! 
> nonpic } } } } }  */
> -/* { dg-final { scan-assembler-not "pop" } } */
> +   So search for a push/pop sequence for stack probe and 2 argument
> +   pushes on ia32.  There is no need to save and restore the PIC
> +   register on ia32 for a noreturn function.  */
> +/* { dg-final { scan-assembler-times "push\[ql\]" 1 { target { ! ia32 } } } 
> }  */
> +/* { dg-final { scan-assembler-times "push\[ql\]" 3 { target ia32 } } }  */
> +/* { dg-final { scan-assembler-times "pop" 1 } } */
>  
> -- 
> 2.43.0
> 

Reply via email to