Alvise Rigo <a.r...@virtualopensystems.com> writes: > Implement strex and ldrex instruction relying on TCG's qemu_ldlink and > qemu_stcond. For the time being only 32bit configurations are supported. > > Suggested-by: Jani Kokkonen <jani.kokko...@huawei.com> > Suggested-by: Claudio Fontana <claudio.font...@huawei.com> > Signed-off-by: Alvise Rigo <a.r...@virtualopensystems.com> > --- > tcg/i386/tcg-target.c | 136 > ++++++++++++++++++++++++++++++++++++++++++-------- > 1 file changed, 114 insertions(+), 22 deletions(-) > > diff --git a/tcg/i386/tcg-target.c b/tcg/i386/tcg-target.c > index 0d7c99c..d8250a9 100644 > --- a/tcg/i386/tcg-target.c > +++ b/tcg/i386/tcg-target.c > @@ -1141,6 +1141,17 @@ static void * const qemu_ld_helpers[16] = { > [MO_BEQ] = helper_be_ldq_mmu, > }; > > +/* LoadLink helpers, only unsigned. Use the macro below to access them. */ > +static void * const qemu_ldex_helpers[16] = { > + [MO_LEUL] = helper_le_ldlinkul_mmu, > +}; > + > +#define LDEX_HELPER(mem_op) \ > +({ \ > + assert(mem_op & MO_EXCL); \ > + qemu_ldex_helpers[((int)mem_op - MO_EXCL)]; \ > +}) > + > /* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr, > * uintxx_t val, int mmu_idx, uintptr_t > ra) > */ > @@ -1154,6 +1165,17 @@ static void * const qemu_st_helpers[16] = { > [MO_BEQ] = helper_be_stq_mmu, > }; > > +/* StoreConditional helpers. Use the macro below to access them. */ > +static void * const qemu_stex_helpers[16] = { > + [MO_LEUL] = helper_le_stcondl_mmu, > +}; > + > +#define STEX_HELPER(mem_op) \ > +({ \ > + assert(mem_op & MO_EXCL); \ > + qemu_stex_helpers[(int)mem_op - MO_EXCL]; \ > +}) > +
Same comments as for target-arm. Do we need to be protecting backends with HAS_LDST_EXCL defines or some such macro hackery? What currently happens if you use the new TCG ops when the backend doesn't support them? Is supporting all backends a prerequisite for the series? > /* Perform the TLB load and compare. > > Inputs: > @@ -1249,6 +1271,7 @@ static inline void tcg_out_tlb_load(TCGContext *s, > TCGReg addrlo, TCGReg addrhi, > * for a load or store, so that we can later generate the correct helper code > */ > static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOpIdx oi, > + TCGReg llsc_success, > TCGReg datalo, TCGReg datahi, > TCGReg addrlo, TCGReg addrhi, > tcg_insn_unit *raddr, > @@ -1257,6 +1280,7 @@ static void add_qemu_ldst_label(TCGContext *s, bool > is_ld, TCGMemOpIdx oi, > TCGLabelQemuLdst *label = new_ldst_label(s); > > label->is_ld = is_ld; > + label->llsc_success = llsc_success; > label->oi = oi; > label->datalo_reg = datalo; > label->datahi_reg = datahi; > @@ -1311,7 +1335,11 @@ static void tcg_out_qemu_ld_slow_path(TCGContext *s, > TCGLabelQemuLdst *l) > (uintptr_t)l->raddr); > } > > - tcg_out_call(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]); > + if (opc & MO_EXCL) { > + tcg_out_call(s, LDEX_HELPER(opc)); > + } else { > + tcg_out_call(s, qemu_ld_helpers[opc & ~MO_SIGN]); > + } > > data_reg = l->datalo_reg; > switch (opc & MO_SSIZE) { > @@ -1415,9 +1443,16 @@ static void tcg_out_qemu_st_slow_path(TCGContext *s, > TCGLabelQemuLdst *l) > } > } > > - /* "Tail call" to the helper, with the return address back inline. */ > - tcg_out_push(s, retaddr); > - tcg_out_jmp(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]); > + if (opc & MO_EXCL) { > + tcg_out_call(s, STEX_HELPER(opc)); > + /* Save the output of the StoreConditional */ > + tcg_out_mov(s, TCG_TYPE_I32, l->llsc_success, TCG_REG_EAX); > + tcg_out_jmp(s, l->raddr); > + } else { > + /* "Tail call" to the helper, with the return address back inline. > */ > + tcg_out_push(s, retaddr); > + tcg_out_jmp(s, qemu_st_helpers[opc]); > + } > } > #elif defined(__x86_64__) && defined(__linux__) > # include <asm/prctl.h> > @@ -1530,7 +1565,8 @@ static void tcg_out_qemu_ld_direct(TCGContext *s, > TCGReg datalo, TCGReg datahi, > /* XXX: qemu_ld and qemu_st could be modified to clobber only EDX and > EAX. It will be useful once fixed registers globals are less > common. */ > -static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64) > +static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64, > + bool isLoadLink) > { > TCGReg datalo, datahi, addrlo; > TCGReg addrhi __attribute__((unused)); > @@ -1553,14 +1589,34 @@ static void tcg_out_qemu_ld(TCGContext *s, const > TCGArg *args, bool is64) > mem_index = get_mmuidx(oi); > s_bits = opc & MO_SIZE; > > - tcg_out_tlb_load(s, addrlo, addrhi, mem_index, s_bits, > - label_ptr, offsetof(CPUTLBEntry, addr_read)); > + if (isLoadLink) { > + TCGType t = ((TCG_TARGET_REG_BITS == 64) && (TARGET_LONG_BITS == > 64)) ? > + TCG_TYPE_I64 : > TCG_TYPE_I32; > + /* The JMP address will be patched afterwards, > + * in tcg_out_qemu_ld_slow_path (two times when > + * TARGET_LONG_BITS > TCG_TARGET_REG_BITS). */ > + tcg_out_mov(s, t, TCG_REG_L1, addrlo); > + > + if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) { > + /* Store the second part of the address. */ > + tcg_out_mov(s, t, TCG_REG_L0, addrhi); > + /* We add 4 to include the jmp that follows. */ > + label_ptr[1] = s->code_ptr + 4; > + } > > - /* TLB Hit. */ > - tcg_out_qemu_ld_direct(s, datalo, datahi, TCG_REG_L1, 0, 0, opc); > + tcg_out_opc(s, OPC_JMP_long, 0, 0, 0); > + label_ptr[0] = s->code_ptr; > + s->code_ptr += 4; > + } else { > + tcg_out_tlb_load(s, addrlo, addrhi, mem_index, s_bits, > + label_ptr, offsetof(CPUTLBEntry, addr_read)); > + > + /* TLB Hit. */ > + tcg_out_qemu_ld_direct(s, datalo, datahi, TCG_REG_L1, 0, 0, opc); > + } > > /* Record the current context of a load into ldst label */ > - add_qemu_ldst_label(s, true, oi, datalo, datahi, addrlo, addrhi, > + add_qemu_ldst_label(s, true, oi, 0, datalo, datahi, addrlo, addrhi, > s->code_ptr, label_ptr); > #else > { > @@ -1663,9 +1719,10 @@ static void tcg_out_qemu_st_direct(TCGContext *s, > TCGReg datalo, TCGReg datahi, > } > } > > -static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64) > +static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64, > + bool isStoreCond) > { > - TCGReg datalo, datahi, addrlo; > + TCGReg datalo, datahi, addrlo, llsc_success; > TCGReg addrhi __attribute__((unused)); > TCGMemOpIdx oi; > TCGMemOp opc; > @@ -1675,6 +1732,9 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg > *args, bool is64) > tcg_insn_unit *label_ptr[2]; > #endif > > + /* The stcond variant has one more param */ > + llsc_success = (isStoreCond ? *args++ : 0); > + > datalo = *args++; > datahi = (TCG_TARGET_REG_BITS == 32 && is64 ? *args++ : 0); > addrlo = *args++; > @@ -1686,15 +1746,35 @@ static void tcg_out_qemu_st(TCGContext *s, const > TCGArg *args, bool is64) > mem_index = get_mmuidx(oi); > s_bits = opc & MO_SIZE; > > - tcg_out_tlb_load(s, addrlo, addrhi, mem_index, s_bits, > - label_ptr, offsetof(CPUTLBEntry, addr_write)); > + if (isStoreCond) { > + TCGType t = ((TCG_TARGET_REG_BITS == 64) && (TARGET_LONG_BITS == > 64)) ? > + TCG_TYPE_I64 : > TCG_TYPE_I32; > + /* The JMP address will be filled afterwards, > + * in tcg_out_qemu_ld_slow_path (two times when > + * TARGET_LONG_BITS > TCG_TARGET_REG_BITS). */ > + tcg_out_mov(s, t, TCG_REG_L1, addrlo); > + > + if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) { > + /* Store the second part of the address. */ > + tcg_out_mov(s, t, TCG_REG_L0, addrhi); > + /* We add 4 to include the jmp that follows. */ > + label_ptr[1] = s->code_ptr + 4; > + } > > - /* TLB Hit. */ > - tcg_out_qemu_st_direct(s, datalo, datahi, TCG_REG_L1, 0, 0, opc); > + tcg_out_opc(s, OPC_JMP_long, 0, 0, 0); > + label_ptr[0] = s->code_ptr; > + s->code_ptr += 4; > + } else { > + tcg_out_tlb_load(s, addrlo, addrhi, mem_index, s_bits, > + label_ptr, offsetof(CPUTLBEntry, addr_write)); > + > + /* TLB Hit. */ > + tcg_out_qemu_st_direct(s, datalo, datahi, TCG_REG_L1, 0, 0, opc); > + } > > /* Record the current context of a store into ldst label */ > - add_qemu_ldst_label(s, false, oi, datalo, datahi, addrlo, addrhi, > - s->code_ptr, label_ptr); > + add_qemu_ldst_label(s, false, oi, llsc_success, datalo, datahi, addrlo, > + addrhi, s->code_ptr, label_ptr); > #else > { > int32_t offset = GUEST_BASE; > @@ -1955,16 +2035,22 @@ static inline void tcg_out_op(TCGContext *s, > TCGOpcode opc, > break; > > case INDEX_op_qemu_ld_i32: > - tcg_out_qemu_ld(s, args, 0); > + tcg_out_qemu_ld(s, args, 0, 0); > + break; > + case INDEX_op_qemu_ldlink_i32: > + tcg_out_qemu_ld(s, args, 0, 1); > break; > case INDEX_op_qemu_ld_i64: > - tcg_out_qemu_ld(s, args, 1); > + tcg_out_qemu_ld(s, args, 1, 0); > break; > case INDEX_op_qemu_st_i32: > - tcg_out_qemu_st(s, args, 0); > + tcg_out_qemu_st(s, args, 0, 0); > + break; > + case INDEX_op_qemu_stcond_i32: > + tcg_out_qemu_st(s, args, 0, 1); > break; > case INDEX_op_qemu_st_i64: > - tcg_out_qemu_st(s, args, 1); > + tcg_out_qemu_st(s, args, 1, 0); > break; > > OP_32_64(mulu2): > @@ -2186,17 +2272,23 @@ static const TCGTargetOpDef x86_op_defs[] = { > > #if TCG_TARGET_REG_BITS == 64 > { INDEX_op_qemu_ld_i32, { "r", "L" } }, > + { INDEX_op_qemu_ldlink_i32, { "r", "L" } }, > { INDEX_op_qemu_st_i32, { "L", "L" } }, > + { INDEX_op_qemu_stcond_i32, { "r", "L", "L" } }, > { INDEX_op_qemu_ld_i64, { "r", "L" } }, > { INDEX_op_qemu_st_i64, { "L", "L" } }, > #elif TARGET_LONG_BITS <= TCG_TARGET_REG_BITS > { INDEX_op_qemu_ld_i32, { "r", "L" } }, > + { INDEX_op_qemu_ldlink_i32, { "r", "L" } }, > { INDEX_op_qemu_st_i32, { "L", "L" } }, > + { INDEX_op_qemu_stcond_i32, { "r", "L", "L" } }, > { INDEX_op_qemu_ld_i64, { "r", "r", "L" } }, > { INDEX_op_qemu_st_i64, { "L", "L", "L" } }, > #else > { INDEX_op_qemu_ld_i32, { "r", "L", "L" } }, > + { INDEX_op_qemu_ldlink_i32, { "r", "L", "L" } }, > { INDEX_op_qemu_st_i32, { "L", "L", "L" } }, > + { INDEX_op_qemu_stcond_i32, { "r", "L", "L", "L" } }, > { INDEX_op_qemu_ld_i64, { "r", "r", "L", "L" } }, > { INDEX_op_qemu_st_i64, { "L", "L", "L", "L" } }, > #endif -- Alex Bennée