https://gcc.gnu.org/bugzilla/show_bug.cgi?id=90235

            Bug ID: 90235
           Summary: Unnecessary save and restore frame pointer with
                    AVX/AVX512 pseudo registers
           Product: gcc
           Version: 9.0
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: target
          Assignee: unassigned at gcc dot gnu.org
          Reporter: hjl.tools at gmail dot com
                CC: crazylht at gmail dot com, skpgkp1 at gmail dot com
  Target Milestone: ---

From PR 90202:

[hjl@gnu-cfl-1 pr90202]$ cat x.ii
struct v {
    int val[16];
};

struct v test(struct v a, struct v b) {
    struct v res;

    for (int i = 0; i < 16; i++)
        res.val[i] = a.val[i] + b.val[i];

    return res;
}
[hjl@gnu-cfl-1 pr90202]$ make CC=gcc
gcc -O3 -march=skylake  -S x.ii
[hjl@gnu-cfl-1 pr90202]$ cat x.s
        .file   "x.ii"
        .text
        .p2align 4,,15
        .globl  _Z4test1vS_
        .type   _Z4test1vS_, @function
_Z4test1vS_:
.LFB0:
        .cfi_startproc
        pushq   %rbp
        .cfi_def_cfa_offset 16
        .cfi_offset 6, -16
        movq    %rdi, %rax
        movq    %rsp, %rbp
        .cfi_def_cfa_register 6
        vmovdqu 16(%rbp), %ymm1
        vmovdqu 48(%rbp), %ymm2
        vpaddd  80(%rbp), %ymm1, %ymm0
        vmovdqu %ymm0, (%rdi)
        vpaddd  112(%rbp), %ymm2, %ymm0
        vmovdqu %ymm0, 32(%rdi)
        vzeroupper
        popq    %rbp
        .cfi_def_cfa 7, 8
        ret
        .cfi_endproc

Since there is

rtx
gen_reg_rtx (machine_mode mode)
{
  rtx val; 
  unsigned int align = GET_MODE_ALIGNMENT (mode);

  gcc_assert (can_create_pseudo_p ()); 

  /* If a virtual register with bigger mode alignment is generated,
     increase stack alignment estimation because it might be spilled
     to stack later.  */
  if (SUPPORTS_STACK_ALIGNMENT
      && crtl->stack_alignment_estimated < align
      && !crtl->stack_realign_processed)
    {    
      unsigned int min_align = MINIMUM_ALIGNMENT (NULL, mode, align);
      if (crtl->stack_alignment_estimated < min_align)
        crtl->stack_alignment_estimated = min_align;
    }    

and IRA has

  frame_pointer_needed
    = (! flag_omit_frame_pointer
       || (cfun->calls_alloca && EXIT_IGNORE_STACK)
       /* We need the frame pointer to catch stack overflow exceptions if
          the stack pointer is moving (as for the alloca case just above).  */
       || (STACK_CHECK_MOVING_SP
           && flag_stack_check
           && flag_exceptions
           && cfun->can_throw_non_call_exceptions)
       || crtl->accesses_prior_frames
       || (SUPPORTS_STACK_ALIGNMENT && crtl->stack_realign_needed)
       || targetm.frame_pointer_required ());

generate AVX/AVX512 pseudo registers via gen_reg_rtx will mark frame
pointer as needed.  Stack realignment is needed to

1. Align the outgoing stack.
2. Support aligned spill of AVX/AVX512 registers.

But we won't know if spill is needed before RA. As the result, we
save and restore frame pointer even if not needed.  Since 

(define_insn "mov<mode>_internal"
  [(set (match_operand:VMOVE 0 "nonimmediate_operand"
         "=v,v ,v ,m")
        (match_operand:VMOVE 1 "nonimmediate_or_sse_const_operand"
         " C,BC,vm,v"))]
  "TARGET_SSE
   && (register_operand (operands[0], <MODE>mode)
       || register_operand (operands[1], <MODE>mode))"

now supports both aligned and unaligned load/store of AVX/AVX512
registers, we can change gen_reg_rtx to

  /* If a virtual register with bigger mode alignment is generated,
     increase stack alignment estimation because it might be spilled
     to stack later.  */
  if (SUPPORTS_STACK_ALIGNMENT
      && !SUPPORTS_MISALIGNED_SPILL
      && crtl->stack_alignment_estimated < align
      && !crtl->stack_realign_processed)
    {    
      unsigned int min_align = MINIMUM_ALIGNMENT (NULL, mode, align);
      if (crtl->stack_alignment_estimated < min_align)
        crtl->stack_alignment_estimated = min_align;
    }

Reply via email to