On Fri, Jul 17, 2015 at 2:56 PM, Alex Bennée <alex.ben...@linaro.org> wrote: > > Alvise Rigo <a.r...@virtualopensystems.com> writes: > >> Implement strex and ldrex instruction relying on TCG's qemu_ldlink and >> qemu_stcond. For the time being only 32bit configurations are supported. >> >> Suggested-by: Jani Kokkonen <jani.kokko...@huawei.com> >> Suggested-by: Claudio Fontana <claudio.font...@huawei.com> >> Signed-off-by: Alvise Rigo <a.r...@virtualopensystems.com> >> --- >> tcg/i386/tcg-target.c | 136 >> ++++++++++++++++++++++++++++++++++++++++++-------- >> 1 file changed, 114 insertions(+), 22 deletions(-) >> >> diff --git a/tcg/i386/tcg-target.c b/tcg/i386/tcg-target.c >> index 0d7c99c..d8250a9 100644 >> --- a/tcg/i386/tcg-target.c >> +++ b/tcg/i386/tcg-target.c >> @@ -1141,6 +1141,17 @@ static void * const qemu_ld_helpers[16] = { >> [MO_BEQ] = helper_be_ldq_mmu, >> }; >> >> +/* LoadLink helpers, only unsigned. Use the macro below to access them. */ >> +static void * const qemu_ldex_helpers[16] = { >> + [MO_LEUL] = helper_le_ldlinkul_mmu, >> +}; >> + >> +#define LDEX_HELPER(mem_op) \ >> +({ \ >> + assert(mem_op & MO_EXCL); \ >> + qemu_ldex_helpers[((int)mem_op - MO_EXCL)]; \ >> +}) >> + >> /* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr, >> * uintxx_t val, int mmu_idx, uintptr_t >> ra) >> */ >> @@ -1154,6 +1165,17 @@ static void * const qemu_st_helpers[16] = { >> [MO_BEQ] = helper_be_stq_mmu, >> }; >> >> +/* StoreConditional helpers. Use the macro below to access them. */ >> +static void * const qemu_stex_helpers[16] = { >> + [MO_LEUL] = helper_le_stcondl_mmu, >> +}; >> + >> +#define STEX_HELPER(mem_op) \ >> +({ \ >> + assert(mem_op & MO_EXCL); \ >> + qemu_stex_helpers[(int)mem_op - MO_EXCL]; \ >> +}) >> + > > Same comments as for target-arm. > > Do we need to be protecting backends with HAS_LDST_EXCL defines or some > such macro hackery? What currently happens if you use the new TCG ops > when the backend doesn't support them? Is supporting all backends a > prerequisite for the series?
I think that the ideal approach would be to have all the backends implementing the slowpath for atomic instructions so that the HAS_LDST_EXCL macro will not be needed. Then a frontend can rely on the slowpath or not. So, ideally, it's a prerequisite. Regards, alvise > >> /* Perform the TLB load and compare. >> >> Inputs: >> @@ -1249,6 +1271,7 @@ static inline void tcg_out_tlb_load(TCGContext *s, >> TCGReg addrlo, TCGReg addrhi, >> * for a load or store, so that we can later generate the correct helper >> code >> */ >> static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOpIdx oi, >> + TCGReg llsc_success, >> TCGReg datalo, TCGReg datahi, >> TCGReg addrlo, TCGReg addrhi, >> tcg_insn_unit *raddr, >> @@ -1257,6 +1280,7 @@ static void add_qemu_ldst_label(TCGContext *s, bool >> is_ld, TCGMemOpIdx oi, >> TCGLabelQemuLdst *label = new_ldst_label(s); >> >> label->is_ld = is_ld; >> + label->llsc_success = llsc_success; >> label->oi = oi; >> label->datalo_reg = datalo; >> label->datahi_reg = datahi; >> @@ -1311,7 +1335,11 @@ static void tcg_out_qemu_ld_slow_path(TCGContext *s, >> TCGLabelQemuLdst *l) >> (uintptr_t)l->raddr); >> } >> >> - tcg_out_call(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]); >> + if (opc & MO_EXCL) { >> + tcg_out_call(s, LDEX_HELPER(opc)); >> + } else { >> + tcg_out_call(s, qemu_ld_helpers[opc & ~MO_SIGN]); >> + } >> >> data_reg = l->datalo_reg; >> switch (opc & MO_SSIZE) { >> @@ -1415,9 +1443,16 @@ static void tcg_out_qemu_st_slow_path(TCGContext *s, >> TCGLabelQemuLdst *l) >> } >> } >> >> - /* "Tail call" to the helper, with the return address back inline. */ >> - tcg_out_push(s, retaddr); >> - tcg_out_jmp(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]); >> + if (opc & MO_EXCL) { >> + tcg_out_call(s, STEX_HELPER(opc)); >> + /* Save the output of the StoreConditional */ >> + tcg_out_mov(s, TCG_TYPE_I32, l->llsc_success, TCG_REG_EAX); >> + tcg_out_jmp(s, l->raddr); >> + } else { >> + /* "Tail call" to the helper, with the return address back inline. >> */ >> + tcg_out_push(s, retaddr); >> + tcg_out_jmp(s, qemu_st_helpers[opc]); >> + } >> } >> #elif defined(__x86_64__) && defined(__linux__) >> # include <asm/prctl.h> >> @@ -1530,7 +1565,8 @@ static void tcg_out_qemu_ld_direct(TCGContext *s, >> TCGReg datalo, TCGReg datahi, >> /* XXX: qemu_ld and qemu_st could be modified to clobber only EDX and >> EAX. It will be useful once fixed registers globals are less >> common. */ >> -static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64) >> +static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64, >> + bool isLoadLink) >> { >> TCGReg datalo, datahi, addrlo; >> TCGReg addrhi __attribute__((unused)); >> @@ -1553,14 +1589,34 @@ static void tcg_out_qemu_ld(TCGContext *s, const >> TCGArg *args, bool is64) >> mem_index = get_mmuidx(oi); >> s_bits = opc & MO_SIZE; >> >> - tcg_out_tlb_load(s, addrlo, addrhi, mem_index, s_bits, >> - label_ptr, offsetof(CPUTLBEntry, addr_read)); >> + if (isLoadLink) { >> + TCGType t = ((TCG_TARGET_REG_BITS == 64) && (TARGET_LONG_BITS == >> 64)) ? >> + TCG_TYPE_I64 : >> TCG_TYPE_I32; >> + /* The JMP address will be patched afterwards, >> + * in tcg_out_qemu_ld_slow_path (two times when >> + * TARGET_LONG_BITS > TCG_TARGET_REG_BITS). */ >> + tcg_out_mov(s, t, TCG_REG_L1, addrlo); >> + >> + if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) { >> + /* Store the second part of the address. */ >> + tcg_out_mov(s, t, TCG_REG_L0, addrhi); >> + /* We add 4 to include the jmp that follows. */ >> + label_ptr[1] = s->code_ptr + 4; >> + } >> >> - /* TLB Hit. */ >> - tcg_out_qemu_ld_direct(s, datalo, datahi, TCG_REG_L1, 0, 0, opc); >> + tcg_out_opc(s, OPC_JMP_long, 0, 0, 0); >> + label_ptr[0] = s->code_ptr; >> + s->code_ptr += 4; >> + } else { >> + tcg_out_tlb_load(s, addrlo, addrhi, mem_index, s_bits, >> + label_ptr, offsetof(CPUTLBEntry, addr_read)); >> + >> + /* TLB Hit. */ >> + tcg_out_qemu_ld_direct(s, datalo, datahi, TCG_REG_L1, 0, 0, opc); >> + } >> >> /* Record the current context of a load into ldst label */ >> - add_qemu_ldst_label(s, true, oi, datalo, datahi, addrlo, addrhi, >> + add_qemu_ldst_label(s, true, oi, 0, datalo, datahi, addrlo, addrhi, >> s->code_ptr, label_ptr); >> #else >> { >> @@ -1663,9 +1719,10 @@ static void tcg_out_qemu_st_direct(TCGContext *s, >> TCGReg datalo, TCGReg datahi, >> } >> } >> >> -static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64) >> +static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64, >> + bool isStoreCond) >> { >> - TCGReg datalo, datahi, addrlo; >> + TCGReg datalo, datahi, addrlo, llsc_success; >> TCGReg addrhi __attribute__((unused)); >> TCGMemOpIdx oi; >> TCGMemOp opc; >> @@ -1675,6 +1732,9 @@ static void tcg_out_qemu_st(TCGContext *s, const >> TCGArg *args, bool is64) >> tcg_insn_unit *label_ptr[2]; >> #endif >> >> + /* The stcond variant has one more param */ >> + llsc_success = (isStoreCond ? *args++ : 0); >> + >> datalo = *args++; >> datahi = (TCG_TARGET_REG_BITS == 32 && is64 ? *args++ : 0); >> addrlo = *args++; >> @@ -1686,15 +1746,35 @@ static void tcg_out_qemu_st(TCGContext *s, const >> TCGArg *args, bool is64) >> mem_index = get_mmuidx(oi); >> s_bits = opc & MO_SIZE; >> >> - tcg_out_tlb_load(s, addrlo, addrhi, mem_index, s_bits, >> - label_ptr, offsetof(CPUTLBEntry, addr_write)); >> + if (isStoreCond) { >> + TCGType t = ((TCG_TARGET_REG_BITS == 64) && (TARGET_LONG_BITS == >> 64)) ? >> + TCG_TYPE_I64 : >> TCG_TYPE_I32; >> + /* The JMP address will be filled afterwards, >> + * in tcg_out_qemu_ld_slow_path (two times when >> + * TARGET_LONG_BITS > TCG_TARGET_REG_BITS). */ >> + tcg_out_mov(s, t, TCG_REG_L1, addrlo); >> + >> + if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) { >> + /* Store the second part of the address. */ >> + tcg_out_mov(s, t, TCG_REG_L0, addrhi); >> + /* We add 4 to include the jmp that follows. */ >> + label_ptr[1] = s->code_ptr + 4; >> + } >> >> - /* TLB Hit. */ >> - tcg_out_qemu_st_direct(s, datalo, datahi, TCG_REG_L1, 0, 0, opc); >> + tcg_out_opc(s, OPC_JMP_long, 0, 0, 0); >> + label_ptr[0] = s->code_ptr; >> + s->code_ptr += 4; >> + } else { >> + tcg_out_tlb_load(s, addrlo, addrhi, mem_index, s_bits, >> + label_ptr, offsetof(CPUTLBEntry, addr_write)); >> + >> + /* TLB Hit. */ >> + tcg_out_qemu_st_direct(s, datalo, datahi, TCG_REG_L1, 0, 0, opc); >> + } >> >> /* Record the current context of a store into ldst label */ >> - add_qemu_ldst_label(s, false, oi, datalo, datahi, addrlo, addrhi, >> - s->code_ptr, label_ptr); >> + add_qemu_ldst_label(s, false, oi, llsc_success, datalo, datahi, addrlo, >> + addrhi, s->code_ptr, label_ptr); >> #else >> { >> int32_t offset = GUEST_BASE; >> @@ -1955,16 +2035,22 @@ static inline void tcg_out_op(TCGContext *s, >> TCGOpcode opc, >> break; >> >> case INDEX_op_qemu_ld_i32: >> - tcg_out_qemu_ld(s, args, 0); >> + tcg_out_qemu_ld(s, args, 0, 0); >> + break; >> + case INDEX_op_qemu_ldlink_i32: >> + tcg_out_qemu_ld(s, args, 0, 1); >> break; >> case INDEX_op_qemu_ld_i64: >> - tcg_out_qemu_ld(s, args, 1); >> + tcg_out_qemu_ld(s, args, 1, 0); >> break; >> case INDEX_op_qemu_st_i32: >> - tcg_out_qemu_st(s, args, 0); >> + tcg_out_qemu_st(s, args, 0, 0); >> + break; >> + case INDEX_op_qemu_stcond_i32: >> + tcg_out_qemu_st(s, args, 0, 1); >> break; >> case INDEX_op_qemu_st_i64: >> - tcg_out_qemu_st(s, args, 1); >> + tcg_out_qemu_st(s, args, 1, 0); >> break; >> >> OP_32_64(mulu2): >> @@ -2186,17 +2272,23 @@ static const TCGTargetOpDef x86_op_defs[] = { >> >> #if TCG_TARGET_REG_BITS == 64 >> { INDEX_op_qemu_ld_i32, { "r", "L" } }, >> + { INDEX_op_qemu_ldlink_i32, { "r", "L" } }, >> { INDEX_op_qemu_st_i32, { "L", "L" } }, >> + { INDEX_op_qemu_stcond_i32, { "r", "L", "L" } }, >> { INDEX_op_qemu_ld_i64, { "r", "L" } }, >> { INDEX_op_qemu_st_i64, { "L", "L" } }, >> #elif TARGET_LONG_BITS <= TCG_TARGET_REG_BITS >> { INDEX_op_qemu_ld_i32, { "r", "L" } }, >> + { INDEX_op_qemu_ldlink_i32, { "r", "L" } }, >> { INDEX_op_qemu_st_i32, { "L", "L" } }, >> + { INDEX_op_qemu_stcond_i32, { "r", "L", "L" } }, >> { INDEX_op_qemu_ld_i64, { "r", "r", "L" } }, >> { INDEX_op_qemu_st_i64, { "L", "L", "L" } }, >> #else >> { INDEX_op_qemu_ld_i32, { "r", "L", "L" } }, >> + { INDEX_op_qemu_ldlink_i32, { "r", "L", "L" } }, >> { INDEX_op_qemu_st_i32, { "L", "L", "L" } }, >> + { INDEX_op_qemu_stcond_i32, { "r", "L", "L", "L" } }, >> { INDEX_op_qemu_ld_i64, { "r", "r", "L", "L" } }, >> { INDEX_op_qemu_st_i64, { "L", "L", "L", "L" } }, >> #endif > > -- > Alex Bennée