Il 27/08/2013 23:46, Richard Henderson ha scritto: > This does require the fast path always load to the function return > value register, but apparently the loaded value usually needs to be > spilled back to its memory slot anyway so the change in register > does not really change much.
Even for something like mov (%rdi), %rax add (%r8), %rax ? Memory operands should avoid the need to spill anything. Is this change really an advantage considering the additional icache footprint of the new helpers? Paolo > Signed-off-by: Richard Henderson <r...@twiddle.net> > --- > tcg/i386/tcg-target.c | 107 > ++++++++++++++++++-------------------------------- > 1 file changed, 39 insertions(+), 68 deletions(-) > > diff --git a/tcg/i386/tcg-target.c b/tcg/i386/tcg-target.c > index 5aee0fa..b1d05b8 100644 > --- a/tcg/i386/tcg-target.c > +++ b/tcg/i386/tcg-target.c > @@ -1025,11 +1025,20 @@ static void tcg_out_jmp(TCGContext *s, > tcg_target_long dest) > /* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr, > * int mmu_idx, uintptr_t ra) > */ > -static const void * const qemu_ld_helpers[4] = { > +static const void * const qemu_ld_helpers[8] = { > helper_ret_ldub_mmu, > helper_ret_lduw_mmu, > helper_ret_ldul_mmu, > helper_ret_ldq_mmu, > + > + helper_ret_ldsb_mmu, > + helper_ret_ldsw_mmu, > +#if TCG_TARGET_REG_BITS == 64 > + helper_ret_ldsl_mmu, > +#else > + helper_ret_ldul_mmu, > +#endif > + helper_ret_ldq_mmu, > }; > > /* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr, > @@ -1473,9 +1482,8 @@ static void add_qemu_ldst_label(TCGContext *s, > static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l) > { > int opc = l->opc; > - int s_bits = opc & 3; > - TCGReg data_reg; > uint8_t **label_ptr = &l->label_ptr[0]; > + TCGReg retaddr; > > /* resolve label address */ > *(uint32_t *)label_ptr[0] = (uint32_t)(s->code_ptr - label_ptr[0] - 4); > @@ -1500,58 +1508,21 @@ static void tcg_out_qemu_ld_slow_path(TCGContext *s, > TCGLabelQemuLdst *l) > tcg_out_sti(s, TCG_TYPE_I32, TCG_REG_ESP, ofs, l->mem_index); > ofs += 4; > > - tcg_out_sti(s, TCG_TYPE_I32, TCG_REG_ESP, ofs, (uintptr_t)l->raddr); > + retaddr = TCG_REG_EAX; > + tcg_out_movi(s, TCG_TYPE_I32, retaddr, (uintptr_t)l->raddr); > + tcg_out_st(s, TCG_TYPE_I32, retaddr, TCG_REG_ESP, ofs); > } else { > tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], > TCG_AREG0); > /* The second argument is already loaded with addrlo. */ > tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[2], > l->mem_index); > - tcg_out_movi(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[3], > - (uintptr_t)l->raddr); > - } > - > - tcg_out_calli(s, (tcg_target_long)qemu_ld_helpers[s_bits]); > - > - data_reg = l->datalo_reg; > - switch(opc) { > - case 0 | 4: > - tcg_out_ext8s(s, data_reg, TCG_REG_EAX, P_REXW); > - break; > - case 1 | 4: > - tcg_out_ext16s(s, data_reg, TCG_REG_EAX, P_REXW); > - break; > - case 0: > - tcg_out_ext8u(s, data_reg, TCG_REG_EAX); > - break; > - case 1: > - tcg_out_ext16u(s, data_reg, TCG_REG_EAX); > - break; > - case 2: > - tcg_out_mov(s, TCG_TYPE_I32, data_reg, TCG_REG_EAX); > - break; > -#if TCG_TARGET_REG_BITS == 64 > - case 2 | 4: > - tcg_out_ext32s(s, data_reg, TCG_REG_EAX); > - break; > -#endif > - case 3: > - if (TCG_TARGET_REG_BITS == 64) { > - tcg_out_mov(s, TCG_TYPE_I64, data_reg, TCG_REG_RAX); > - } else if (data_reg == TCG_REG_EDX) { > - /* xchg %edx, %eax */ > - tcg_out_opc(s, OPC_XCHG_ax_r32 + TCG_REG_EDX, 0, 0, 0); > - tcg_out_mov(s, TCG_TYPE_I32, l->datahi_reg, TCG_REG_EAX); > - } else { > - tcg_out_mov(s, TCG_TYPE_I32, data_reg, TCG_REG_EAX); > - tcg_out_mov(s, TCG_TYPE_I32, l->datahi_reg, TCG_REG_EDX); > - } > - break; > - default: > - tcg_abort(); > + retaddr = tcg_target_call_iarg_regs[3]; > + tcg_out_movi(s, TCG_TYPE_PTR, retaddr, (uintptr_t)l->raddr); > } > > - /* Jump to the code corresponding to next IR of qemu_st */ > - tcg_out_jmp(s, (tcg_target_long)l->raddr); > + /* "Tail call" to the helper, with the return address back inline. */ > + tcg_out_push(s, retaddr); > + tcg_out_jmp(s, (tcg_target_long)qemu_ld_helpers[opc]); > } > > /* > @@ -2125,38 +2096,38 @@ static const TCGTargetOpDef x86_op_defs[] = { > #endif > > #if TCG_TARGET_REG_BITS == 64 > - { INDEX_op_qemu_ld8u, { "r", "L" } }, > - { INDEX_op_qemu_ld8s, { "r", "L" } }, > - { INDEX_op_qemu_ld16u, { "r", "L" } }, > - { INDEX_op_qemu_ld16s, { "r", "L" } }, > - { INDEX_op_qemu_ld32, { "r", "L" } }, > - { INDEX_op_qemu_ld32u, { "r", "L" } }, > - { INDEX_op_qemu_ld32s, { "r", "L" } }, > - { INDEX_op_qemu_ld64, { "r", "L" } }, > + { INDEX_op_qemu_ld8u, { "a", "L" } }, > + { INDEX_op_qemu_ld8s, { "a", "L" } }, > + { INDEX_op_qemu_ld16u, { "a", "L" } }, > + { INDEX_op_qemu_ld16s, { "a", "L" } }, > + { INDEX_op_qemu_ld32, { "a", "L" } }, > + { INDEX_op_qemu_ld32u, { "a", "L" } }, > + { INDEX_op_qemu_ld32s, { "a", "L" } }, > + { INDEX_op_qemu_ld64, { "a", "L" } }, > > { INDEX_op_qemu_st8, { "L", "L" } }, > { INDEX_op_qemu_st16, { "L", "L" } }, > { INDEX_op_qemu_st32, { "L", "L" } }, > { INDEX_op_qemu_st64, { "L", "L" } }, > #elif TARGET_LONG_BITS <= TCG_TARGET_REG_BITS > - { INDEX_op_qemu_ld8u, { "r", "L" } }, > - { INDEX_op_qemu_ld8s, { "r", "L" } }, > - { INDEX_op_qemu_ld16u, { "r", "L" } }, > - { INDEX_op_qemu_ld16s, { "r", "L" } }, > - { INDEX_op_qemu_ld32, { "r", "L" } }, > - { INDEX_op_qemu_ld64, { "r", "r", "L" } }, > + { INDEX_op_qemu_ld8u, { "a", "L" } }, > + { INDEX_op_qemu_ld8s, { "a", "L" } }, > + { INDEX_op_qemu_ld16u, { "a", "L" } }, > + { INDEX_op_qemu_ld16s, { "a", "L" } }, > + { INDEX_op_qemu_ld32, { "a", "L" } }, > + { INDEX_op_qemu_ld64, { "a", "d", "L" } }, > > { INDEX_op_qemu_st8, { "cb", "L" } }, > { INDEX_op_qemu_st16, { "L", "L" } }, > { INDEX_op_qemu_st32, { "L", "L" } }, > { INDEX_op_qemu_st64, { "L", "L", "L" } }, > #else > - { INDEX_op_qemu_ld8u, { "r", "L", "L" } }, > - { INDEX_op_qemu_ld8s, { "r", "L", "L" } }, > - { INDEX_op_qemu_ld16u, { "r", "L", "L" } }, > - { INDEX_op_qemu_ld16s, { "r", "L", "L" } }, > - { INDEX_op_qemu_ld32, { "r", "L", "L" } }, > - { INDEX_op_qemu_ld64, { "r", "r", "L", "L" } }, > + { INDEX_op_qemu_ld8u, { "a", "L", "L" } }, > + { INDEX_op_qemu_ld8s, { "a", "L", "L" } }, > + { INDEX_op_qemu_ld16u, { "a", "L", "L" } }, > + { INDEX_op_qemu_ld16s, { "a", "L", "L" } }, > + { INDEX_op_qemu_ld32, { "a", "L", "L" } }, > + { INDEX_op_qemu_ld64, { "a", "d", "L", "L" } }, > > { INDEX_op_qemu_st8, { "cb", "L", "L" } }, > { INDEX_op_qemu_st16, { "L", "L", "L" } }, >