[SOLVED-ish] Question about sibling call epilogues & registers

2016-10-17 Thread Daniel Santos
So the core problem was my "restore multiple" insn contained a CALL insn 
and was a call_insn. The symbol it called is in the static section of 
libgcc. However, during peephole2 pass, get_call_reg_set_usage in 
final.c didn't find a function declaration attached to the symbol and so 
defaulted to say that it depends upon and clobbers most registers. (the 
target's default set)


My solution was to change the pattern so that I do not generate a CALL 
and emit the parallel using emit_insn instead of emit_call_insn. In this 
way, I still declare everything that calling stub ("__msabi_restore_15" 
in the below case) does and it doesn't presume that I depend upon or 
clobber any other regs, so that the sibling call emitted after this 
still works. Since I'm using RSI for the base address, I'm only changing 
registers that have to be saved anyway, so the sibcall should never need 
to use one of these registers anyway (I hope :)



(insn/f 148 147 149 11 (parallel [
(use (symbol_ref:DI ("__msabi_restore_15")))
(clobber (reg:CC 17 flags))
(set (reg:V4SF 52 xmm15)
(mem/c:V4SF (plus:DI (reg:DI 4 si)
(const_int -88 [0xffa8])) [0 S16 A8]))
(set (reg:V4SF 51 xmm14)
(mem/c:V4SF (plus:DI (reg:DI 4 si)
(const_int -72 [0xffb8])) [0 S16 A8]))
(set (reg:V4SF 50 xmm13)
(mem/c:V4SF (plus:DI (reg:DI 4 si)
(const_int -56 [0xffc8])) [0 S16 A8]))
(set (reg:V4SF 49 xmm12)
(mem/c:V4SF (plus:DI (reg:DI 4 si)
(const_int -40 [0xffd8])) [0 S16 A8]))
(set (reg:V4SF 48 xmm11)
(mem/c:V4SF (plus:DI (reg:DI 4 si)
(const_int -24 [0xffe8])) [0 S16 A8]))
(set (reg:V4SF 47 xmm10)
(mem/c:V4SF (plus:DI (reg:DI 4 si)
(const_int -8 [0xfff8])) [0  S16 A8]))
(set (reg:V4SF 46 xmm9)
(mem/c:V4SF (plus:DI (reg:DI 4 si)
(const_int 8 [0x8])) [0  S16 A8]))
(set (reg:V4SF 45 xmm8)
(mem/c:V4SF (plus:DI (reg:DI 4 si)
(const_int 24 [0x18])) [0  S16 A8]))
(set (reg:V4SF 28 xmm7)
(mem/c:V4SF (plus:DI (reg:DI 4 si)
(const_int 40 [0x28])) [0  S16 A8]))
(set (reg:V4SF 27 xmm6)
(mem/c:V4SF (plus:DI (reg:DI 4 si)
(const_int 56 [0x38])) [0  S16 A8]))
(set (reg:DI 5 di)
(mem/c:DI (plus:DI (reg:DI 4 si)
(const_int 72 [0x48])) [0  S8 A8]))
(set (reg:DI 3 bx)
(mem/c:DI (plus:DI (reg:DI 4 si)
(const_int 80 [0x50])) [0  S8 A8]))
(set (reg:DI 6 bp)
(mem/c:DI (plus:DI (reg:DI 4 si)
(const_int 88 [0x58])) [0  S8 A8]))
(set (reg:DI 41 r12)
(mem/c:DI (plus:DI (reg:DI 4 si)
(const_int 96 [0x60])) [0  S8 A8]))
(set (reg:DI 4 si)
(mem/c:DI (plus:DI (reg:DI 4 si)
(const_int 64 [0x40])) [0  S8 A8]))
]) 
/home/daniel/proj/emu/wine/github/dlls/winex11.drv/window.c:1655 -1

 (expr_list:REG_UNUSED (reg:CC 17 flags)
(expr_list:REG_CFA_RESTORE (reg:DI 4 si)
(expr_list:REG_CFA_RESTORE (reg:DI 41 r12)
(expr_list:REG_CFA_RESTORE (reg:DI 6 bp)
(expr_list:REG_CFA_RESTORE (reg:DI 3 bx)
(expr_list:REG_CFA_RESTORE (reg:DI 5 di)
(expr_list:REG_CFA_RESTORE (reg:V4SF 27 xmm6)
(expr_list:REG_CFA_RESTORE (reg:V4SF 28 
xmm7)
(expr_list:REG_CFA_RESTORE 
(reg:V4SF 45 xmm8)
(expr_list:REG_CFA_RESTORE 
(reg:V4SF 46 xmm9)

(nil



Re: Question about sibling call epilogues & registers

2016-10-17 Thread Daniel Santos
It would probably be useful to post the actual code. The below function 
emit_msabi_outlined_restore() is is called from ix86_expand_epilogue() 
to emit the RTL to call the restore stub. Like ix86_expand_epilogue, it 
uses style == 0 to indicate that there will be a sibling call following 
the epilogue, so we will call the stub rather than jmp. But it also uses 
a call if we need to pop incoming args or are using a hard frame pointer.


The problem appears to be the lack of a function declaration causing 
get_call_reg_set_usage() (in final.c) to use the target default 
"regs_invalidated_by_call" value instead of what I've supplied with 
add_function_usage_to() and the gen_frame_load() insns for each register 
restored. I'm developing on 5.4.0 since I need a known good compiler for 
Wine testing and I plan to rebase it later.


static bool
emit_msabi_outlined_restore (const struct ix86_frame , bool use_call,
 int style)
{
  struct machine_function *m = cfun->machine;
  const unsigned ncregs = NUM_X86_64_MS_CLOBBERED_REGS
  + m->outline_ms_sysv_extra_regs;
  rtvec v = rtvec_alloc (ncregs - 1 + (use_call ? 3 : 5));
  rtx insn, sym, tmp;
  rtx rsi = gen_rtx_REG (word_mode, SI_REG);
  rtx use = NULL_RTX;
  rtx note = NULL_RTX;
  unsigned i = 0;
  const struct xlogue_layout  = xlogue_layout::get_instance ();
  HOST_WIDE_INT stack_restore_offset;
  HOST_WIDE_INT reg_data_offset;
  HOST_WIDE_INT rsi_offset;
  rtx rsi_frame_load = NULL_RTX;
  HOST_WIDE_INT rsi_restore_offset = 0x7fff;
  const typeof (xlogue.regs[0]) *ri;

  gcc_assert (m->fs.sp_valid);

  stack_restore_offset = m->fs.sp_offset - frame.hard_frame_pointer_offset;
  rsi_offset = stack_restore_offset - xlogue.get_offset ();
  reg_data_offset = stack_restore_offset;

  /* adjust for alignment */
  if (m->outline_ms_sysv_offset_in)
reg_data_offset -= UNITS_PER_WORD;

  tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT(rsi_offset));
  insn = emit_insn (gen_rtx_SET (VOIDmode, rsi, tmp));
  use_reg (, rsi);

  /* construct restore_multiple/restore_multiple_and_return insn */
  sym = xlogue.get_stub_rtx (use_call ? XLOGUE_STUB_RESTORE
  : XLOGUE_STUB_RESTORE_RET);

  /* Verify that note queue is empty. */
  gcc_assert(!queued_cfa_restores);

  /* If:
 * we need to pop incoming args,
 * this is a sibcall, or
 * we have a hard frame pointer
 then we want to call the epilogue stub instead of jumping to it. */
  if (use_call)
{
  tmp = gen_rtx_MEM (QImode, sym);
  RTVEC_ELT (v, i++) = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
}
  else
{
  rtx r10;

  RTVEC_ELT (v, i++) = ret_rtx;
  RTVEC_ELT (v, i++) = gen_rtx_USE (VOIDmode, sym);
  tmp = GEN_INT(stack_restore_offset);
  tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, tmp);
  r10 = gen_rtx_REG (DImode, R10_REG);
  RTVEC_ELT (v, i++) = gen_rtx_SET (VOIDmode, r10, tmp);

  gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
  gcc_assert (m->fs.sp_valid);
  m->fs.sp_offset -= stack_restore_offset;

  note = gen_rtx_PLUS (Pmode, stack_pointer_rtx,
   GEN_INT(stack_restore_offset));
  note = gen_rtx_SET (VOIDmode, stack_pointer_rtx, note);
}

  RTVEC_ELT (v, i++) = gen_rtx_CLOBBER (VOIDmode,
gen_rtx_REG (CCmode, FLAGS_REG));

  for (ri = [0]; ri != [ncregs]; ++ri)
{
  enum machine_mode mode = SSE_REGNO_P(ri->regno) ? V4SFmode : 
word_mode;

  rtx reg, restore_note;
  HOST_WIDE_INT offset = ri->offset - 0x70;

  reg = gen_rtx_REG (mode, ri->regno);
  restore_note = gen_frame_load (reg, rsi, offset);

  /* Make sure RSI frame load/restore note is last */
  /* TODO: Do I really need to reorder this? */
  if (ri->regno == SI_REG)
{
  gcc_assert (!rsi_frame_load);
  rsi_frame_load = restore_note;
  rsi_restore_offset = offset;
}
  else
{
  RTVEC_ELT (v, i++) = restore_note;
  ix86_add_cfa_restore_note (NULL_RTX, reg, offset);
}
}

  /* add frame load & restore note for RSI last */
  gcc_assert (rsi_frame_load);
  RTVEC_ELT (v, i++) = rsi_frame_load;
  ix86_add_cfa_restore_note (NULL_RTX, gen_rtx_REG (DImode, SI_REG),
 rsi_restore_offset);

  gcc_assert (i == (unsigned)GET_NUM_ELEM (v));

  tmp = gen_rtx_PARALLEL (VOIDmode, v);
  if (use_call)
{
  insn = emit_call_insn (tmp);
  add_reg_note (insn, REG_CALL_DECL, sym);
  add_function_usage_to (insn, use);
}
  else
{
  insn = emit_jump_insn (tmp);
  JUMP_LABEL (insn) = ret_rtx;
  add_reg_note(insn, REG_CFA_ADJUST_CFA, note);
}

  RTX_FRAME_RELATED_P(insn) = true;
  ix86_add_queued_cfa_restore_notes (insn);

  if (use_call)
pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
   GEN_INT 

Re: Question about sibling call epilogues & registers

2016-10-16 Thread Daniel Santos

On 10/16/2016 05:27 PM, Segher Boessenkool wrote:



Oddly enough, I had forgotten to call add_function_usage_to() on my save
stub (which I didn't post), but not the restore stub. So thanks for that
psychic intervention. :) But if you look carefully, it's there, although
it's hard to read because of all the line wrapping:

 (expr_list (use (reg:DI 4 si))
 (nil)))

But shouldn't it be marked as using RCX as well?



So the bright side is that I'm learning a lot more about
gcc internals! Which is probably my main goal of this exercise.

:-)


Segher


Hehe, well no, it's the sibcall to LeaveCriticalSection that needs RCX. 
My stub isn't reading or touching it.


Daniel


Re: Question about sibling call epilogues & registers

2016-10-16 Thread Segher Boessenkool
On Sun, Oct 16, 2016 at 05:05:17PM -0500, Daniel Santos wrote:
> >>The insn that's getting deleted is 75, where RCX is set.  I'm starting
> >>to think that maybe df_analyze() presumes that my call (to the stub) is
> >>invalidating RCX, although it does not.
> >It looks like you don't use add_function_usage_to on the call insn?

> Oddly enough, I had forgotten to call add_function_usage_to() on my save 
> stub (which I didn't post), but not the restore stub. So thanks for that 
> psychic intervention. :) But if you look carefully, it's there, although 
> it's hard to read because of all the line wrapping:
> 
> (expr_list (use (reg:DI 4 si))
> (nil)))

But shouldn't it be marked as using RCX as well?


> So the bright side is that I'm learning a lot more about 
> gcc internals! Which is probably my main goal of this exercise.

:-)


Segher


Re: Question about sibling call epilogues & registers

2016-10-16 Thread Daniel Santos

On 10/15/2016 08:41 PM, Segher Boessenkool wrote:

Hi Daniel,

On Sat, Oct 15, 2016 at 01:45:12AM -0500, Daniel Santos wrote:

The insn that's getting deleted is 75, where RCX is set.  I'm starting
to think that maybe df_analyze() presumes that my call (to the stub) is
invalidating RCX, although it does not.

It looks like you don't use add_function_usage_to on the call insn?


Segher


Oddly enough, I had forgotten to call add_function_usage_to() on my save 
stub (which I didn't post), but not the restore stub. So thanks for that 
psychic intervention. :) But if you look carefully, it's there, although 
it's hard to read because of all the line wrapping:


(expr_list (use (reg:DI 4 si))
(nil)))

Right now I'm using gdb and gradually walking through df_analyze() and 
learning how the data flow and problems work. I presume that I can find 
my answer somewhere in the "live register" data flow problem code . This 
eventually runs "dead code elimination" on this insn (during the 
"finalize" phase), so I believe that I'm getting closer to discovering 
the root cause. So the bright side is that I'm learning a lot more about 
gcc internals! Which is probably my main goal of this exercise.


Daniel


Re: Question about sibling call epilogues & registers

2016-10-15 Thread Segher Boessenkool
Hi Daniel,

On Sat, Oct 15, 2016 at 01:45:12AM -0500, Daniel Santos wrote:
> The insn that's getting deleted is 75, where RCX is set.  I'm starting 
> to think that maybe df_analyze() presumes that my call (to the stub) is 
> invalidating RCX, although it does not.

It looks like you don't use add_function_usage_to on the call insn?


Segher


Question about sibling call epilogues & registers

2016-10-15 Thread Daniel Santos
I'm working on my -foutline-masbi-xlouges optimization (targeting 64 bit 
Wine) and I've run into a snag with sibling calls. When 
-foutline-masbi-xlouges is disabled, the sibling call generates just 
fine. But when enabled the call to df_analyze() in the peephole2 pass 
deletes the insn that initializes the register that the sibling call 
uses. I'm hoping that somebody might be able to look at my epilogue code 
and identify what may cause this. My optimization is generating insns 
146, 147, 148 and 149. (Actually, 146 and 149 can be merged, I just 
haven't gotten to it yet).


The insn that's getting deleted is 75, where RCX is set.  I'm starting 
to think that maybe df_analyze() presumes that my call (to the stub) is 
invalidating RCX, although it does not. Below is the RTL dump from the 
compgotos pass. Also, I'm not sure why all of my REG_CFA_RESTORE notes 
aren't showing up (xmm10-15 are missing).


(code_label 91 70 73 11 903 "" [6 uses])
(note 73 91 74 11 [bb 11] NOTE_INSN_BASIC_BLOCK)
(debug_insn 74 73 75 11 (var_location:DI data (debug_expr:DI D#64)) -1
 (nil))
(insn 75 74 150 11 (set (reg:DI 2 cx)
(symbol_ref:DI ("win_data_section") [flags 0x2] 0x7f33746fce10 win_data_section>)) 
/home/daniel/proj/emu/wine/github/dlls/winex11.drv/window.c:1826 89 
{*movdi_internal}

 (expr_list:REG_UNUSED (reg:DI 2 cx)
(nil)))
(note 150 75 146 11 NOTE_INSN_EPILOGUE_BEG)
(insn/f 146 150 147 11 (parallel [
(set (reg/f:DI 7 sp)
(plus:DI (reg/f:DI 7 sp)
(const_int 48 [0x30])))
(clobber (reg:CC 17 flags))
(clobber (mem:BLK (scratch) [0  A8]))
]) 
/home/daniel/proj/emu/wine/github/dlls/winex11.drv/window.c:1655 -1

 (expr_list:REG_UNUSED (reg:CC 17 flags)
(expr_list:REG_CFA_ADJUST_CFA (set (reg/f:DI 7 sp)
(plus:DI (reg/f:DI 7 sp)
(const_int 48 [0x30])))
(nil
(insn 147 146 148 11 (set (reg:DI 4 si)
(plus:DI (reg/f:DI 7 sp)
(const_int 96 [0x60]))) 
/home/daniel/proj/emu/wine/github/dlls/winex11.drv/window.c:1655 -1

 (nil))
(call_insn/f 148 147 149 11 (parallel [
(call (mem:QI (symbol_ref:DI ("__msabi_restore_15") [flags 
0x1]) [0  S1 A8])

(const_int 0 [0]))
(clobber (reg:CC 17 flags))
(set (reg:V4SF 52 xmm15)
(mem/c:V4SF (plus:DI (reg:DI 4 si)
(const_int -88 [0xffa8])) [0  S16 A8]))
(set (reg:V4SF 51 xmm14)
(mem/c:V4SF (plus:DI (reg:DI 4 si)
(const_int -72 [0xffb8])) [0  S16 A8]))
(set (reg:V4SF 50 xmm13)
(mem/c:V4SF (plus:DI (reg:DI 4 si)
(const_int -56 [0xffc8])) [0  S16 A8]))
(set (reg:V4SF 49 xmm12)
(mem/c:V4SF (plus:DI (reg:DI 4 si)
(const_int -40 [0xffd8])) [0  S16 A8]))
(set (reg:V4SF 48 xmm11)
(mem/c:V4SF (plus:DI (reg:DI 4 si)
(const_int -24 [0xffe8])) [0  S16 A8]))
(set (reg:V4SF 47 xmm10)
(mem/c:V4SF (plus:DI (reg:DI 4 si)
(const_int -8 [0xfff8])) [0  S16 A8]))
(set (reg:V4SF 46 xmm9)
(mem/c:V4SF (plus:DI (reg:DI 4 si)
(const_int 8 [0x8])) [0  S16 A8]))
(set (reg:V4SF 45 xmm8)
(mem/c:V4SF (plus:DI (reg:DI 4 si)
(const_int 24 [0x18])) [0  S16 A8]))
(set (reg:V4SF 28 xmm7)
(mem/c:V4SF (plus:DI (reg:DI 4 si)
(const_int 40 [0x28])) [0  S16 A8]))
(set (reg:V4SF 27 xmm6)
(mem/c:V4SF (plus:DI (reg:DI 4 si)
(const_int 56 [0x38])) [0  S16 A8]))
(set (reg:DI 5 di)
(mem/c:DI (plus:DI (reg:DI 4 si)
(const_int 72 [0x48])) [0  S8 A8]))
(set (reg:DI 3 bx)
(mem/c:DI (plus:DI (reg:DI 4 si)
(const_int 80 [0x50])) [0  S8 A8]))
(set (reg:DI 6 bp)
(mem/c:DI (plus:DI (reg:DI 4 si)
(const_int 88 [0x58])) [0  S8 A8]))
(set (reg:DI 41 r12)
(mem/c:DI (plus:DI (reg:DI 4 si)
(const_int 96 [0x60])) [0  S8 A8]))
(set (reg:DI 4 si)
(mem/c:DI (plus:DI (reg:DI 4 si)
(const_int 64 [0x40])) [0  S8 A8]))
]) 
/home/daniel/proj/emu/wine/github/dlls/winex11.drv/window.c:1655 -1

 (expr_list:REG_CFA_RESTORE (reg:DI 4 si)
(expr_list:REG_CFA_RESTORE (reg:DI 41 r12)
(expr_list:REG_CFA_RESTORE (reg:DI 6 bp)
(expr_list:REG_CFA_RESTORE (reg:DI 3 bx)
(expr_list:REG_CFA_RESTORE (reg:DI 5 di)