This does require the fast path always load to the function return value register, but apparently the loaded value usually needs to be spilled back to its memory slot anyway so the change in register does not really change much.
Signed-off-by: Richard Henderson <r...@twiddle.net> --- tcg/i386/tcg-target.c | 107 ++++++++++++++++++-------------------------------- 1 file changed, 39 insertions(+), 68 deletions(-) diff --git a/tcg/i386/tcg-target.c b/tcg/i386/tcg-target.c index 5aee0fa..b1d05b8 100644 --- a/tcg/i386/tcg-target.c +++ b/tcg/i386/tcg-target.c @@ -1025,11 +1025,20 @@ static void tcg_out_jmp(TCGContext *s, tcg_target_long dest) /* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr, * int mmu_idx, uintptr_t ra) */ -static const void * const qemu_ld_helpers[4] = { +static const void * const qemu_ld_helpers[8] = { helper_ret_ldub_mmu, helper_ret_lduw_mmu, helper_ret_ldul_mmu, helper_ret_ldq_mmu, + + helper_ret_ldsb_mmu, + helper_ret_ldsw_mmu, +#if TCG_TARGET_REG_BITS == 64 + helper_ret_ldsl_mmu, +#else + helper_ret_ldul_mmu, +#endif + helper_ret_ldq_mmu, }; /* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr, @@ -1473,9 +1482,8 @@ static void add_qemu_ldst_label(TCGContext *s, static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l) { int opc = l->opc; - int s_bits = opc & 3; - TCGReg data_reg; uint8_t **label_ptr = &l->label_ptr[0]; + TCGReg retaddr; /* resolve label address */ *(uint32_t *)label_ptr[0] = (uint32_t)(s->code_ptr - label_ptr[0] - 4); @@ -1500,58 +1508,21 @@ static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l) tcg_out_sti(s, TCG_TYPE_I32, TCG_REG_ESP, ofs, l->mem_index); ofs += 4; - tcg_out_sti(s, TCG_TYPE_I32, TCG_REG_ESP, ofs, (uintptr_t)l->raddr); + retaddr = TCG_REG_EAX; + tcg_out_movi(s, TCG_TYPE_I32, retaddr, (uintptr_t)l->raddr); + tcg_out_st(s, TCG_TYPE_I32, retaddr, TCG_REG_ESP, ofs); } else { tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0); /* The second argument is already loaded with addrlo. */ tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[2], l->mem_index); - tcg_out_movi(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[3], - (uintptr_t)l->raddr); - } - - tcg_out_calli(s, (tcg_target_long)qemu_ld_helpers[s_bits]); - - data_reg = l->datalo_reg; - switch(opc) { - case 0 | 4: - tcg_out_ext8s(s, data_reg, TCG_REG_EAX, P_REXW); - break; - case 1 | 4: - tcg_out_ext16s(s, data_reg, TCG_REG_EAX, P_REXW); - break; - case 0: - tcg_out_ext8u(s, data_reg, TCG_REG_EAX); - break; - case 1: - tcg_out_ext16u(s, data_reg, TCG_REG_EAX); - break; - case 2: - tcg_out_mov(s, TCG_TYPE_I32, data_reg, TCG_REG_EAX); - break; -#if TCG_TARGET_REG_BITS == 64 - case 2 | 4: - tcg_out_ext32s(s, data_reg, TCG_REG_EAX); - break; -#endif - case 3: - if (TCG_TARGET_REG_BITS == 64) { - tcg_out_mov(s, TCG_TYPE_I64, data_reg, TCG_REG_RAX); - } else if (data_reg == TCG_REG_EDX) { - /* xchg %edx, %eax */ - tcg_out_opc(s, OPC_XCHG_ax_r32 + TCG_REG_EDX, 0, 0, 0); - tcg_out_mov(s, TCG_TYPE_I32, l->datahi_reg, TCG_REG_EAX); - } else { - tcg_out_mov(s, TCG_TYPE_I32, data_reg, TCG_REG_EAX); - tcg_out_mov(s, TCG_TYPE_I32, l->datahi_reg, TCG_REG_EDX); - } - break; - default: - tcg_abort(); + retaddr = tcg_target_call_iarg_regs[3]; + tcg_out_movi(s, TCG_TYPE_PTR, retaddr, (uintptr_t)l->raddr); } - /* Jump to the code corresponding to next IR of qemu_st */ - tcg_out_jmp(s, (tcg_target_long)l->raddr); + /* "Tail call" to the helper, with the return address back inline. */ + tcg_out_push(s, retaddr); + tcg_out_jmp(s, (tcg_target_long)qemu_ld_helpers[opc]); } /* @@ -2125,38 +2096,38 @@ static const TCGTargetOpDef x86_op_defs[] = { #endif #if TCG_TARGET_REG_BITS == 64 - { INDEX_op_qemu_ld8u, { "r", "L" } }, - { INDEX_op_qemu_ld8s, { "r", "L" } }, - { INDEX_op_qemu_ld16u, { "r", "L" } }, - { INDEX_op_qemu_ld16s, { "r", "L" } }, - { INDEX_op_qemu_ld32, { "r", "L" } }, - { INDEX_op_qemu_ld32u, { "r", "L" } }, - { INDEX_op_qemu_ld32s, { "r", "L" } }, - { INDEX_op_qemu_ld64, { "r", "L" } }, + { INDEX_op_qemu_ld8u, { "a", "L" } }, + { INDEX_op_qemu_ld8s, { "a", "L" } }, + { INDEX_op_qemu_ld16u, { "a", "L" } }, + { INDEX_op_qemu_ld16s, { "a", "L" } }, + { INDEX_op_qemu_ld32, { "a", "L" } }, + { INDEX_op_qemu_ld32u, { "a", "L" } }, + { INDEX_op_qemu_ld32s, { "a", "L" } }, + { INDEX_op_qemu_ld64, { "a", "L" } }, { INDEX_op_qemu_st8, { "L", "L" } }, { INDEX_op_qemu_st16, { "L", "L" } }, { INDEX_op_qemu_st32, { "L", "L" } }, { INDEX_op_qemu_st64, { "L", "L" } }, #elif TARGET_LONG_BITS <= TCG_TARGET_REG_BITS - { INDEX_op_qemu_ld8u, { "r", "L" } }, - { INDEX_op_qemu_ld8s, { "r", "L" } }, - { INDEX_op_qemu_ld16u, { "r", "L" } }, - { INDEX_op_qemu_ld16s, { "r", "L" } }, - { INDEX_op_qemu_ld32, { "r", "L" } }, - { INDEX_op_qemu_ld64, { "r", "r", "L" } }, + { INDEX_op_qemu_ld8u, { "a", "L" } }, + { INDEX_op_qemu_ld8s, { "a", "L" } }, + { INDEX_op_qemu_ld16u, { "a", "L" } }, + { INDEX_op_qemu_ld16s, { "a", "L" } }, + { INDEX_op_qemu_ld32, { "a", "L" } }, + { INDEX_op_qemu_ld64, { "a", "d", "L" } }, { INDEX_op_qemu_st8, { "cb", "L" } }, { INDEX_op_qemu_st16, { "L", "L" } }, { INDEX_op_qemu_st32, { "L", "L" } }, { INDEX_op_qemu_st64, { "L", "L", "L" } }, #else - { INDEX_op_qemu_ld8u, { "r", "L", "L" } }, - { INDEX_op_qemu_ld8s, { "r", "L", "L" } }, - { INDEX_op_qemu_ld16u, { "r", "L", "L" } }, - { INDEX_op_qemu_ld16s, { "r", "L", "L" } }, - { INDEX_op_qemu_ld32, { "r", "L", "L" } }, - { INDEX_op_qemu_ld64, { "r", "r", "L", "L" } }, + { INDEX_op_qemu_ld8u, { "a", "L", "L" } }, + { INDEX_op_qemu_ld8s, { "a", "L", "L" } }, + { INDEX_op_qemu_ld16u, { "a", "L", "L" } }, + { INDEX_op_qemu_ld16s, { "a", "L", "L" } }, + { INDEX_op_qemu_ld32, { "a", "L", "L" } }, + { INDEX_op_qemu_ld64, { "a", "d", "L", "L" } }, { INDEX_op_qemu_st8, { "cb", "L", "L" } }, { INDEX_op_qemu_st16, { "L", "L", "L" } }, -- 1.8.1.4