On Wed, Feb 14, 2024 at 5:33 AM H.J. Lu <hjl.to...@gmail.com> wrote: > > Since push2/pop2 requires 16-byte stack alignment, don't generate them > if the incoming stack isn't 16-byte aligned. Ok. > > gcc/ > > PR target/113912 > * config/i386/i386.cc (ix86_can_use_push2pop2): New. > (ix86_pro_and_epilogue_can_use_push2pop2): Use it. > (ix86_emit_save_regs): Don't generate push2 if > ix86_can_use_push2pop2 return false. > (ix86_expand_epilogue): Don't generate pop2 if > ix86_can_use_push2pop2 return false. > > gcc/testsuite/ > > PR target/113912 > * gcc.target/i386/apx-push2pop2-2.c: New test. > --- > gcc/config/i386/i386.cc | 24 ++++++++++++++----- > .../gcc.target/i386/apx-push2pop2-2.c | 24 +++++++++++++++++++ > 2 files changed, 42 insertions(+), 6 deletions(-) > create mode 100644 gcc/testsuite/gcc.target/i386/apx-push2pop2-2.c > > diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc > index a4e12602f70..46f238651a6 100644 > --- a/gcc/config/i386/i386.cc > +++ b/gcc/config/i386/i386.cc > @@ -6802,16 +6802,24 @@ get_probe_interval (void) > > #define SPLIT_STACK_AVAILABLE 256 > > -/* Helper function to determine whether push2/pop2 can be used in prologue or > - epilogue for register save/restore. */ > +/* Return true if push2/pop2 can be generated. */ > + > static bool > -ix86_pro_and_epilogue_can_use_push2pop2 (int nregs) > +ix86_can_use_push2pop2 (void) > { > /* Use push2/pop2 only if the incoming stack is 16-byte aligned. */ > unsigned int incoming_stack_boundary > = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary > ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary); > - if (incoming_stack_boundary % 128 != 0) > + return incoming_stack_boundary % 128 == 0; > +} > + > +/* Helper function to determine whether push2/pop2 can be used in prologue or > + epilogue for register save/restore. */ > +static bool > +ix86_pro_and_epilogue_can_use_push2pop2 (int nregs) > +{ > + if (!ix86_can_use_push2pop2 ()) > return false; > int aligned = cfun->machine->fs.sp_offset % 16 == 0; > return TARGET_APX_PUSH2POP2 > @@ -7401,7 +7409,9 @@ ix86_emit_save_regs (void) > int regno; > rtx_insn *insn; > > - if (!TARGET_APX_PUSH2POP2 || cfun->machine->func_type != TYPE_NORMAL) > + if (!TARGET_APX_PUSH2POP2 > + || !ix86_can_use_push2pop2 () > + || cfun->machine->func_type != TYPE_NORMAL) > { > for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--) > if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true)) > @@ -10039,7 +10049,9 @@ ix86_expand_epilogue (int style) > m->fs.cfa_reg == stack_pointer_rtx); > } > > - if (TARGET_APX_PUSH2POP2 && m->func_type == TYPE_NORMAL) > + if (TARGET_APX_PUSH2POP2 > + && ix86_can_use_push2pop2 () > + && m->func_type == TYPE_NORMAL) > ix86_emit_restore_regs_using_pop2 (); > else > ix86_emit_restore_regs_using_pop (TARGET_APX_PPX); > diff --git a/gcc/testsuite/gcc.target/i386/apx-push2pop2-2.c > b/gcc/testsuite/gcc.target/i386/apx-push2pop2-2.c > new file mode 100644 > index 00000000000..975a6212b30 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/apx-push2pop2-2.c > @@ -0,0 +1,24 @@ > +/* { dg-do compile { target { ! ia32 } } } */ > +/* { dg-options "-O2 -mpreferred-stack-boundary=3 -mapx-features=push2pop2 > -fomit-frame-pointer" } */ > + > +extern int bar (int); > + > +void foo () > +{ > + int a,b,c,d,e,f,i; > + a = bar (5); > + b = bar (a); > + c = bar (b); > + d = bar (c); > + e = bar (d); > + f = bar (e); > + for (i = 1; i < 10; i++) > + { > + a += bar (a + i) + bar (b + i) + > + bar (c + i) + bar (d + i) + > + bar (e + i) + bar (f + i); > + } > +} > + > +/* { dg-final { scan-assembler-not "push2(|p)\[\\t \]*%r" } } */ > +/* { dg-final { scan-assembler-not "pop2(|p)\[\\t \]*%r" } } */ > -- > 2.43.0 >
-- BR, Hongtao