Richard Henderson <richard.hender...@linaro.org> writes:
> Add tcg_out_ld_helper_args, tcg_out_ld_helper_ret, > and tcg_out_st_helper_args. These and their subroutines > use the existing knowledge of the host function call abi > to load the function call arguments and return results. > > These will be used to simplify the backends in turn. > > Signed-off-by: Richard Henderson <richard.hender...@linaro.org> > --- > tcg/tcg.c | 456 +++++++++++++++++++++++++++++++++++++++++++++++++++++- > 1 file changed, 453 insertions(+), 3 deletions(-) > > diff --git a/tcg/tcg.c b/tcg/tcg.c > index 057423c121..748be8426a 100644 > --- a/tcg/tcg.c > +++ b/tcg/tcg.c > @@ -181,6 +181,22 @@ static bool tcg_target_const_match(int64_t val, TCGType > type, int ct); > static int tcg_out_ldst_finalize(TCGContext *s); > #endif > > +typedef struct TCGLdstHelperParam { > + TCGReg (*ra_gen)(TCGContext *s, const TCGLabelQemuLdst *l, int arg_reg); > + unsigned ntmp; > + int tmp[3]; > +} TCGLdstHelperParam; > + > +static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *l, > + const TCGLdstHelperParam *p) > + __attribute__((unused)); > +static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *l, > + bool load_sign, const TCGLdstHelperParam > *p) > + __attribute__((unused)); > +static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *l, > + const TCGLdstHelperParam *p) > + __attribute__((unused)); > + > TCGContext tcg_init_ctx; > __thread TCGContext *tcg_ctx; > > @@ -459,9 +475,8 @@ static void tcg_out_movext1(TCGContext *s, const > TCGMovExtend *i) > * between the sources and destinations. > */ > > -static void __attribute__((unused)) > -tcg_out_movext2(TCGContext *s, const TCGMovExtend *i1, > - const TCGMovExtend *i2, int scratch) > +static void tcg_out_movext2(TCGContext *s, const TCGMovExtend *i1, > + const TCGMovExtend *i2, int scratch) > { > TCGReg src1 = i1->src; > TCGReg src2 = i2->src; > @@ -715,6 +730,50 @@ static TCGHelperInfo all_helpers[] = { > }; > static GHashTable *helper_table; > > +#if TCG_TARGET_REG_BITS == 32 > +# define dh_typecode_ttl dh_typecode_i32 > +#else > +# define dh_typecode_ttl dh_typecode_i64 > +#endif > + Might be worth a small comment to mention this is doing manually what the HELPER wrappers do although we don't use them for reasons. > +static TCGHelperInfo info_helper_ld32_mmu = { > + .flags = TCG_CALL_NO_WG, > + .typemask = dh_typemask(ttl, 0) /* return tcg_target_ulong */ > + | dh_typemask(env, 1) > + | dh_typemask(tl, 2) /* target_ulong addr */ > + | dh_typemask(i32, 3) /* unsigned oi */ > + | dh_typemask(ptr, 4) /* uintptr_t ra */ > +}; > + > +static TCGHelperInfo info_helper_ld64_mmu = { > + .flags = TCG_CALL_NO_WG, > + .typemask = dh_typemask(i64, 0) /* return uint64_t */ > + | dh_typemask(env, 1) > + | dh_typemask(tl, 2) /* target_ulong addr */ > + | dh_typemask(i32, 3) /* unsigned oi */ > + | dh_typemask(ptr, 4) /* uintptr_t ra */ > +}; > + > +static TCGHelperInfo info_helper_st32_mmu = { > + .flags = TCG_CALL_NO_WG, > + .typemask = dh_typemask(void, 0) > + | dh_typemask(env, 1) > + | dh_typemask(tl, 2) /* target_ulong addr */ > + | dh_typemask(i32, 3) /* uint32_t data */ > + | dh_typemask(i32, 4) /* unsigned oi */ > + | dh_typemask(ptr, 5) /* uintptr_t ra */ > +}; > + > +static TCGHelperInfo info_helper_st64_mmu = { > + .flags = TCG_CALL_NO_WG, > + .typemask = dh_typemask(void, 0) > + | dh_typemask(env, 1) > + | dh_typemask(tl, 2) /* target_ulong addr */ > + | dh_typemask(i64, 3) /* uint64_t data */ > + | dh_typemask(i32, 4) /* unsigned oi */ > + | dh_typemask(ptr, 5) /* uintptr_t ra */ > +}; > + > #ifdef CONFIG_TCG_INTERPRETER > static ffi_type *typecode_to_ffi(int argmask) > { > @@ -1126,6 +1185,11 @@ static void tcg_context_init(unsigned max_cpus) > (gpointer)&all_helpers[i]); > } > > + init_call_layout(&info_helper_ld32_mmu); > + init_call_layout(&info_helper_ld64_mmu); > + init_call_layout(&info_helper_st32_mmu); > + init_call_layout(&info_helper_st64_mmu); > + > #ifdef CONFIG_TCG_INTERPRETER > init_ffi_layouts(); > #endif > @@ -5011,6 +5075,392 @@ static void tcg_reg_alloc_call(TCGContext *s, TCGOp > *op) > } > } > > +/* > + * Similarly for qemu_ld/st slow path helpers. > + * We must re-implement tcg_gen_callN and tcg_reg_alloc_call simultaneously, > + * using only the provided backend tcg_out_* functions. > + */ > + > +static int tcg_out_helper_stk_ofs(TCGType type, unsigned slot) > +{ > + int ofs = arg_slot_stk_ofs(slot); > + > + /* > + * Each stack slot is TCG_TARGET_LONG_BITS. If the host does not > + * require extension to uint64_t, adjust the address for uint32_t. > + */ > + if (HOST_BIG_ENDIAN && > + TCG_TARGET_REG_BITS == 64 && > + type == TCG_TYPE_I32) { > + ofs += 4; > + } > + return ofs; > +} > + > +static void tcg_out_helper_load_regs(TCGContext *s, > + unsigned nmov, TCGMovExtend *mov, > + unsigned ntmp, const int *tmp) > +{ > + switch (nmov) { > + default: > + /* The backend must have provided enough temps for the worst case. */ > + tcg_debug_assert(ntmp + 1 >= nmov); > + > + for (unsigned i = nmov - 1; i >= 2; --i) { > + TCGReg dst = mov[i].dst; > + > + for (unsigned j = 0; j < i; ++j) { > + if (dst == mov[j].src) { > + /* > + * Conflict. > + * Copy the source to a temporary, recurse for the > + * remaining moves, perform the extension from our > + * scratch on the way out. > + */ > + TCGReg scratch = tmp[--ntmp]; > + tcg_out_mov(s, mov[i].src_type, scratch, mov[i].src); > + mov[i].src = scratch; > + > + tcg_out_helper_load_regs(s, i, mov, ntmp, tmp); > + tcg_out_movext1(s, &mov[i]); > + return; > + } > + } > + > + /* No conflicts: perform this move and continue. */ > + tcg_out_movext1(s, &mov[i]); > + } > + /* fall through for the final two moves */ > + > + case 2: > + tcg_out_movext2(s, mov, mov + 1, ntmp ? tmp[0] : -1); > + return; > + case 1: > + tcg_out_movext1(s, mov); > + return; > + case 0: > + g_assert_not_reached(); > + } > +} > + > +static void tcg_out_helper_load_slots(TCGContext *s, > + unsigned nmov, TCGMovExtend *mov, > + const TCGLdstHelperParam *parm) I think a documentation patch before this for TCGMoveExtend describing the reg/slot duality for dst might be worthwhile. > +{ > + unsigned i; > + > + /* > + * Start from the end, storing to the stack first. > + * This frees those registers, so we need not consider overlap. > + */ > + for (i = nmov; i-- > 0; ) { > + unsigned slot = mov[i].dst; > + > + if (arg_slot_reg_p(slot)) { > + goto found_reg; > + } > + > + TCGReg src = mov[i].src; > + TCGType dst_type = mov[i].dst_type; > + MemOp dst_mo = dst_type == TCG_TYPE_I32 ? MO_32 : MO_64; > + > + /* The argument is going onto the stack; extend into scratch. */ > + if ((mov[i].src_ext & MO_SIZE) != dst_mo) { > + tcg_debug_assert(parm->ntmp != 0); > + mov[i].dst = src = parm->tmp[0]; > + tcg_out_movext1(s, &mov[i]); > + } > + > + tcg_out_st(s, dst_type, src, TCG_REG_CALL_STACK, > + tcg_out_helper_stk_ofs(dst_type, slot)); > + } > + return; > + > + found_reg: > + /* > + * The remaining arguments are in registers. > + * Convert slot numbers to argument registers. > + */ > + nmov = i + 1; > + for (i = 0; i < nmov; ++i) { > + mov[i].dst = tcg_target_call_iarg_regs[mov[i].dst]; > + } > + tcg_out_helper_load_regs(s, nmov, mov, parm->ntmp, parm->tmp); > +} > + > +static void tcg_out_helper_load_imm(TCGContext *s, unsigned slot, > + TCGType type, tcg_target_long imm, > + const TCGLdstHelperParam *parm) > +{ > + if (arg_slot_reg_p(slot)) { > + tcg_out_movi(s, type, tcg_target_call_iarg_regs[slot], imm); > + } else { > + int ofs = tcg_out_helper_stk_ofs(type, slot); > + if (!tcg_out_sti(s, type, imm, TCG_REG_CALL_STACK, ofs)) { > + tcg_debug_assert(parm->ntmp != 0); > + tcg_out_movi(s, type, parm->tmp[0], imm); > + tcg_out_st(s, type, parm->tmp[0], TCG_REG_CALL_STACK, ofs); > + } > + } > +} > + > +static void tcg_out_helper_load_common_args(TCGContext *s, > + const TCGLabelQemuLdst *ldst, > + const TCGLdstHelperParam *parm, > + const TCGHelperInfo *info, > + unsigned next_arg) > +{ > + TCGMovExtend ptr_mov = { > + .dst_type = TCG_TYPE_PTR, > + .src_type = TCG_TYPE_PTR, > + .src_ext = sizeof(void *) == 4 ? MO_32 : MO_64 > + }; > + const TCGCallArgumentLoc *loc = &info->in[0]; > + TCGType type; > + unsigned slot; > + tcg_target_ulong imm; > + > + /* > + * Handle env, which is always first. > + */ > + ptr_mov.dst = loc->arg_slot; > + ptr_mov.src = TCG_AREG0; > + tcg_out_helper_load_slots(s, 1, &ptr_mov, parm); > + > + /* > + * Handle oi. > + */ > + imm = ldst->oi; > + loc = &info->in[next_arg]; > + type = TCG_TYPE_I32; > + switch (loc->kind) { > + case TCG_CALL_ARG_NORMAL: > + break; > + case TCG_CALL_ARG_EXTEND_U: > + case TCG_CALL_ARG_EXTEND_S: > + /* No extension required for MemOpIdx. */ > + tcg_debug_assert(imm <= INT32_MAX); > + type = TCG_TYPE_REG; > + break; > + default: > + g_assert_not_reached(); > + } > + tcg_out_helper_load_imm(s, loc->arg_slot, type, imm, parm); > + next_arg++; > + > + /* > + * Handle ra. > + */ > + loc = &info->in[next_arg]; > + slot = loc->arg_slot; > + if (parm->ra_gen) { > + int arg_reg = -1; > + TCGReg ra_reg; > + > + if (arg_slot_reg_p(slot)) { > + arg_reg = tcg_target_call_iarg_regs[slot]; > + } > + ra_reg = parm->ra_gen(s, ldst, arg_reg); > + > + ptr_mov.dst = slot; > + ptr_mov.src = ra_reg; > + tcg_out_helper_load_slots(s, 1, &ptr_mov, parm); > + } else { > + imm = (uintptr_t)ldst->raddr; > + tcg_out_helper_load_imm(s, slot, TCG_TYPE_PTR, imm, parm); > + } > +} > + > +static unsigned tcg_out_helper_add_mov(TCGMovExtend *mov, > + const TCGCallArgumentLoc *loc, > + TCGType dst_type, TCGType src_type, > + TCGReg lo, TCGReg hi) > +{ > + if (dst_type <= TCG_TYPE_REG) { > + MemOp src_ext; > + > + switch (loc->kind) { > + case TCG_CALL_ARG_NORMAL: > + src_ext = src_type == TCG_TYPE_I32 ? MO_32 : MO_64; > + break; > + case TCG_CALL_ARG_EXTEND_U: > + dst_type = TCG_TYPE_REG; > + src_ext = MO_UL; > + break; > + case TCG_CALL_ARG_EXTEND_S: > + dst_type = TCG_TYPE_REG; > + src_ext = MO_SL; > + break; > + default: > + g_assert_not_reached(); > + } > + > + mov[0].dst = loc->arg_slot; > + mov[0].dst_type = dst_type; > + mov[0].src = lo; > + mov[0].src_type = src_type; > + mov[0].src_ext = src_ext; > + return 1; > + } > + > + assert(TCG_TARGET_REG_BITS == 32); > + > + mov[0].dst = loc[HOST_BIG_ENDIAN].arg_slot; > + mov[0].src = lo; > + mov[0].dst_type = TCG_TYPE_I32; > + mov[0].src_type = TCG_TYPE_I32; > + mov[0].src_ext = MO_32; > + > + mov[1].dst = loc[!HOST_BIG_ENDIAN].arg_slot; > + mov[1].src = hi; > + mov[1].dst_type = TCG_TYPE_I32; > + mov[1].src_type = TCG_TYPE_I32; > + mov[1].src_ext = MO_32; > + > + return 2; > +} > + > +static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst > *ldst, > + const TCGLdstHelperParam *parm) > +{ > + const TCGHelperInfo *info; > + const TCGCallArgumentLoc *loc; > + TCGMovExtend mov[2]; > + unsigned next_arg, nmov; > + MemOp mop = get_memop(ldst->oi); > + > + switch (mop & MO_SIZE) { > + case MO_8: > + case MO_16: > + case MO_32: > + info = &info_helper_ld32_mmu; > + break; > + case MO_64: > + info = &info_helper_ld64_mmu; > + break; > + default: > + g_assert_not_reached(); > + } > + > + /* Defer env argument. */ > + next_arg = 1; This could be assigned on creation but maybe even just open code: loc = &info->in[1] .. tcg_out_helper_load_common_args(s, ldst, parm, info, 1 + nmov); although I concede that matching up with the store helper makes sense. > + > + loc = &info->in[next_arg]; > + nmov = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_TL, TCG_TYPE_TL, > + ldst->addrlo_reg, ldst->addrhi_reg); > + next_arg += nmov; > + > + tcg_out_helper_load_slots(s, nmov, mov, parm); > + > + /* No special attention for 32 and 64-bit return values. */ > + tcg_debug_assert(info->out_kind == TCG_CALL_RET_NORMAL); > + > + tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg); > +} > + > +static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst > *ldst, > + bool load_sign, > + const TCGLdstHelperParam *parm) > +{ > + TCGMovExtend mov[2]; > + > + if (ldst->type <= TCG_TYPE_REG) { > + MemOp mop = get_memop(ldst->oi); > + > + mov[0].dst = ldst->datalo_reg; > + mov[0].src = tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, 0); > + mov[0].dst_type = ldst->type; > + mov[0].src_type = TCG_TYPE_REG; > + > + /* > + * If load_sign, then we allowed the helper to perform the > + * appropriate sign extension to tcg_target_ulong, and all > + * we need now is a plain move. > + * > + * If they do not, then we expect the relevant extension > + * instruction to be no more expensive than a move, and > + * we thus save the icache etc by only using one of two > + * helper functions. > + */ > + if (load_sign || !(mop & MO_SIGN)) { > + if (TCG_TARGET_REG_BITS == 32 || ldst->type == TCG_TYPE_I32) { > + mov[0].src_ext = MO_32; > + } else { > + mov[0].src_ext = MO_64; > + } > + } else { > + mov[0].src_ext = mop & MO_SSIZE; > + } > + tcg_out_movext1(s, mov); > + } else { > + assert(TCG_TARGET_REG_BITS == 32); > + > + mov[0].dst = ldst->datalo_reg; > + mov[0].src = > + tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, HOST_BIG_ENDIAN); > + mov[0].dst_type = TCG_TYPE_I32; > + mov[0].src_type = TCG_TYPE_I32; > + mov[0].src_ext = MO_32; > + > + mov[1].dst = ldst->datahi_reg; > + mov[1].src = > + tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, !HOST_BIG_ENDIAN); > + mov[1].dst_type = TCG_TYPE_REG; > + mov[1].src_type = TCG_TYPE_REG; > + mov[1].src_ext = MO_32; > + > + tcg_out_movext2(s, mov, mov + 1, parm->ntmp ? parm->tmp[0] : -1); > + } > +} > + > +static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst > *ldst, > + const TCGLdstHelperParam *parm) > +{ > + const TCGHelperInfo *info; > + const TCGCallArgumentLoc *loc; > + TCGMovExtend mov[4]; > + TCGType data_type; > + unsigned next_arg, nmov, n; > + MemOp mop = get_memop(ldst->oi); > + > + switch (mop & MO_SIZE) { > + case MO_8: > + case MO_16: > + case MO_32: > + info = &info_helper_st32_mmu; > + data_type = TCG_TYPE_I32; > + break; > + case MO_64: > + info = &info_helper_st64_mmu; > + data_type = TCG_TYPE_I64; > + break; > + default: > + g_assert_not_reached(); > + } > + > + /* Defer env argument. */ > + next_arg = 1; > + nmov = 0; > + > + /* Handle addr argument. */ > + loc = &info->in[next_arg]; > + n = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_TL, TCG_TYPE_TL, > + ldst->addrlo_reg, ldst->addrhi_reg); > + next_arg += n; > + nmov += n; > + > + /* Handle data argument. */ > + loc = &info->in[next_arg]; > + n = tcg_out_helper_add_mov(mov + nmov, loc, data_type, ldst->type, > + ldst->datalo_reg, ldst->datahi_reg); > + next_arg += n; > + nmov += n; > + tcg_debug_assert(nmov <= ARRAY_SIZE(mov)); > + > + tcg_out_helper_load_slots(s, nmov, mov, parm); > + tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg); > +} > + > #ifdef CONFIG_PROFILER > > /* avoid copy/paste errors */ Otherwise: Reviewed-by: Alex Bennée <alex.ben...@linaro.org> -- Alex Bennée Virtualisation Tech Lead @ Linaro