Re: [Qemu-devel] [PATCH 2/2] target-mips: reimplement SC instruction and use cmpxchg

2016-09-27 Thread Leon Alrae
On Wed, Sep 21, 2016 at 01:16:28PM -0700, Richard Henderson wrote:
> On 09/21/2016 01:07 AM, Leon Alrae wrote:
> >+tcg_gen_brcond_tl(TCG_COND_EQ, addr, cpu_lladdr, l1);
> >+tcg_temp_free(addr);
> >+tcg_gen_movi_tl(t0, 0);
> >+tcg_gen_br(done);
> >+
> >+gen_set_label(l1);
> >+/* generate cmpxchg */
> >+val = tcg_temp_new();
> >+gen_load_gpr(val, rt);
> >+tcg_gen_atomic_cmpxchg_tl(t0, cpu_lladdr, cpu_llval, val,
> >+  ctx->mem_idx, tcg_mo);
> >+tcg_gen_setcond_tl(TCG_COND_EQ, t0, t0, cpu_llval);
> >+tcg_temp_free(val);
> >+
> >+gen_set_label(done);
> >+/* store the result into the register */
> >+gen_store_gpr(t0, rt);
> > tcg_temp_free(t0);
> 
> The only thing I would change is to duplicate the gen_store_gpr into
> both branches, so that we don't have to store t0 into the stack
> across the blocks.

Done in v3.

> 
> Otherwise,
> 
> Reviewed-by: Richard Henderson 

Thanks for reviewing.

Leon



Re: [Qemu-devel] [PATCH 2/2] target-mips: reimplement SC instruction and use cmpxchg

2016-09-21 Thread Richard Henderson

On 09/21/2016 01:07 AM, Leon Alrae wrote:

+tcg_gen_brcond_tl(TCG_COND_EQ, addr, cpu_lladdr, l1);
+tcg_temp_free(addr);
+tcg_gen_movi_tl(t0, 0);
+tcg_gen_br(done);
+
+gen_set_label(l1);
+/* generate cmpxchg */
+val = tcg_temp_new();
+gen_load_gpr(val, rt);
+tcg_gen_atomic_cmpxchg_tl(t0, cpu_lladdr, cpu_llval, val,
+  ctx->mem_idx, tcg_mo);
+tcg_gen_setcond_tl(TCG_COND_EQ, t0, t0, cpu_llval);
+tcg_temp_free(val);
+
+gen_set_label(done);
+/* store the result into the register */
+gen_store_gpr(t0, rt);
 tcg_temp_free(t0);


The only thing I would change is to duplicate the gen_store_gpr into both 
branches, so that we don't have to store t0 into the stack across the blocks.


Otherwise,

Reviewed-by: Richard Henderson 


r~



[Qemu-devel] [PATCH 2/2] target-mips: reimplement SC instruction and use cmpxchg

2016-09-21 Thread Leon Alrae
This patch completely rewrites conditional stores. Now we use cmpxchg and
no longer need separate implementations for user and system emulation.

Signed-off-by: Leon Alrae 
---
 linux-user/main.c   |  58 --
 target-mips/cpu.h   |   4 --
 target-mips/helper.c|   6 +--
 target-mips/helper.h|   2 -
 target-mips/op_helper.c |  25 ---
 target-mips/translate.c | 107 +---
 6 files changed, 39 insertions(+), 163 deletions(-)

diff --git a/linux-user/main.c b/linux-user/main.c
index 0d0bf9d..bc1b307 100644
--- a/linux-user/main.c
+++ b/linux-user/main.c
@@ -2230,55 +2230,6 @@ static const uint8_t mips_syscall_args[] = {
 #  undef MIPS_SYS
 # endif /* O32 */
 
-static int do_store_exclusive(CPUMIPSState *env)
-{
-target_ulong addr;
-target_ulong page_addr;
-target_ulong val;
-int flags;
-int segv = 0;
-int reg;
-int d;
-
-addr = env->lladdr;
-page_addr = addr & TARGET_PAGE_MASK;
-start_exclusive();
-mmap_lock();
-flags = page_get_flags(page_addr);
-if ((flags & PAGE_READ) == 0) {
-segv = 1;
-} else {
-reg = env->llreg & 0x1f;
-d = (env->llreg & 0x20) != 0;
-if (d) {
-segv = get_user_s64(val, addr);
-} else {
-segv = get_user_s32(val, addr);
-}
-if (!segv) {
-if (val != env->llval) {
-env->active_tc.gpr[reg] = 0;
-} else {
-if (d) {
-segv = put_user_u64(env->llnewval, addr);
-} else {
-segv = put_user_u32(env->llnewval, addr);
-}
-if (!segv) {
-env->active_tc.gpr[reg] = 1;
-}
-}
-}
-}
-env->lladdr = -1;
-if (!segv) {
-env->active_tc.PC += 4;
-}
-mmap_unlock();
-end_exclusive();
-return segv;
-}
-
 /* Break codes */
 enum {
 BRK_OVERFLOW = 6,
@@ -2426,15 +2377,6 @@ done_syscall:
   }
 }
 break;
-case EXCP_SC:
-if (do_store_exclusive(env)) {
-info.si_signo = TARGET_SIGSEGV;
-info.si_errno = 0;
-info.si_code = TARGET_SEGV_MAPERR;
-info._sifields._sigfault._addr = env->active_tc.PC;
-queue_signal(env, info.si_signo, );
-}
-break;
 case EXCP_DSPDIS:
 info.si_signo = TARGET_SIGILL;
 info.si_errno = 0;
diff --git a/target-mips/cpu.h b/target-mips/cpu.h
index 78555b9..6c268f0 100644
--- a/target-mips/cpu.h
+++ b/target-mips/cpu.h
@@ -500,8 +500,6 @@ struct CPUMIPSState {
 /* XXX: Maybe make LLAddr per-TC? */
 target_ulong lladdr; /* LL virtual address compared against SC */
 target_ulong llval;
-target_ulong llnewval;
-target_ulong llreg;
 uint64_t CP0_LLAddr_rw_bitmask;
 int CP0_LLAddr_shift;
 target_ulong CP0_WatchLo[8];
@@ -796,8 +794,6 @@ enum {
 
 EXCP_LAST = EXCP_TLBRI,
 };
-/* Dummy exception for conditional stores.  */
-#define EXCP_SC 0x100
 
 /*
  * This is an interrnally generated WAKE request line.
diff --git a/target-mips/helper.c b/target-mips/helper.c
index c864b15..67b19e6 100644
--- a/target-mips/helper.c
+++ b/target-mips/helper.c
@@ -958,10 +958,8 @@ void QEMU_NORETURN do_raise_exception_err(CPUMIPSState 
*env,
 {
 CPUState *cs = CPU(mips_env_get_cpu(env));
 
-if (exception < EXCP_SC) {
-qemu_log_mask(CPU_LOG_INT, "%s: %d %d\n",
-  __func__, exception, error_code);
-}
+qemu_log_mask(CPU_LOG_INT, "%s: %d %d\n",
+  __func__, exception, error_code);
 cs->exception_index = exception;
 env->error_code = error_code;
 
diff --git a/target-mips/helper.h b/target-mips/helper.h
index 666936c..dd68751 100644
--- a/target-mips/helper.h
+++ b/target-mips/helper.h
@@ -13,10 +13,8 @@ DEF_HELPER_4(swr, void, env, tl, tl, int)
 
 #ifndef CONFIG_USER_ONLY
 DEF_HELPER_3(ll, tl, env, tl, int)
-DEF_HELPER_4(sc, tl, env, tl, tl, int)
 #ifdef TARGET_MIPS64
 DEF_HELPER_3(lld, tl, env, tl, int)
-DEF_HELPER_4(scd, tl, env, tl, tl, int)
 #endif
 #endif
 
diff --git a/target-mips/op_helper.c b/target-mips/op_helper.c
index e0c9842..9f094ad 100644
--- a/target-mips/op_helper.c
+++ b/target-mips/op_helper.c
@@ -300,31 +300,6 @@ HELPER_LD_ATOMIC(ll, lw, 0x3)
 HELPER_LD_ATOMIC(lld, ld, 0x7)
 #endif
 #undef HELPER_LD_ATOMIC
-
-#define HELPER_ST_ATOMIC(name, ld_insn, st_insn, almask)  \
-target_ulong helper_##name(CPUMIPSState *env, target_ulong arg1,  \
-   target_ulong arg2, int mem_idx)\
-{ \
-target_long tmp;  \
-   

Re: [Qemu-devel] [PATCH 2/2] target-mips: reimplement SC instruction and use cmpxchg

2016-09-19 Thread Leon Alrae
On Fri, Sep 16, 2016 at 09:48:51AM -0700, Richard Henderson wrote:
> On 09/15/2016 01:44 AM, Leon Alrae wrote:
> > /* Store conditional */
> >+static void gen_st_cond(DisasContext *ctx, int rt, int base, int offset,
> >+int size)
> > {
> >+TCGv addr, t0, val;
> >+TCGLabel *l1 = gen_new_label();
> >+TCGLabel *l2 = gen_new_label();
> >+TCGLabel *done = gen_new_label();
> >
> >-#ifdef CONFIG_USER_ONLY
> > t0 = tcg_temp_local_new();
> >+addr = tcg_temp_local_new();
> >+/* check the alignment of the address */
> >+gen_base_offset_addr(ctx, addr, base, offset);
> >+tcg_gen_andi_tl(t0, addr, size - 1);
> 
> You shouldn't have to test the alignment here, as the alignment
> should have been tested during the load-locked, and the (aligned)
> address will be compared.

This is to satisfy the requirement that unaligned SC generates Address
Error exception. But I agree that in practice this doesn't seem
particularly useful since LL will do that.

> 
> 
> >+/* compare the address against that of the preceeding LL */
> >+tcg_gen_brcond_tl(TCG_COND_EQ, addr, cpu_lladdr, l2);
> >+tcg_gen_movi_tl(t0, 0);
> >+tcg_gen_br(done);
> ...
> >+#ifdef TARGET_MIPS64
> >+case 8: /* SCD */
> >+tcg_gen_atomic_cmpxchg_i64(t0, addr, cpu_llval, val,
> >+   ctx->mem_idx, MO_TEQ);
> > break;
> > #endif
> >-case OPC_SC:
> >-case R6_OPC_SC:
> >-op_st_sc(t1, t0, rt, ctx);
> >+case 4: /* SC */
> >+{
> >+TCGv_i32 val32 = tcg_temp_new_i32();
> >+TCGv_i32 llval32 = tcg_temp_new_i32();
> >+TCGv_i32 old32 = tcg_temp_new_i32();
> >+tcg_gen_trunc_tl_i32(val32, val);
> >+tcg_gen_trunc_tl_i32(llval32, cpu_llval);
> >+
> >+tcg_gen_atomic_cmpxchg_i32(old32, addr, llval32, val32,
> >+   ctx->mem_idx, MO_TESL);
> >+tcg_gen_ext_i32_tl(t0, old32);
> 
> You can use tcg_gen_atomic_cmpxchg_tl so that you do not need to do
> all of this truncation yourself.  Which means that if you replace
> the size parameter with a TCGMemOp parameter (MO_TEQ vs MO_TESL) you
> can make all this code common.

Ah, yes.

> 
> Further, local temporaries are less than ideal and should be avoided
> if possible.  Using them results in an extra store into the local
> stack frame.
> 
> We can avoid this for addr by noting that once you have compared
> addr to cpu_lladdr, you can free addr and use cpu_lladdr in the
> actual cmpxchg.

Ok. I'll correct in v2.

Thanks,
Leon



Re: [Qemu-devel] [PATCH 2/2] target-mips: reimplement SC instruction and use cmpxchg

2016-09-16 Thread Richard Henderson

On 09/15/2016 01:44 AM, Leon Alrae wrote:

 /* Store conditional */
+static void gen_st_cond(DisasContext *ctx, int rt, int base, int offset,
+int size)
 {
+TCGv addr, t0, val;
+TCGLabel *l1 = gen_new_label();
+TCGLabel *l2 = gen_new_label();
+TCGLabel *done = gen_new_label();

-#ifdef CONFIG_USER_ONLY
 t0 = tcg_temp_local_new();
+addr = tcg_temp_local_new();
+/* check the alignment of the address */
+gen_base_offset_addr(ctx, addr, base, offset);
+tcg_gen_andi_tl(t0, addr, size - 1);


You shouldn't have to test the alignment here, as the alignment should have 
been tested during the load-locked, and the (aligned) address will be compared.




+/* compare the address against that of the preceeding LL */
+tcg_gen_brcond_tl(TCG_COND_EQ, addr, cpu_lladdr, l2);
+tcg_gen_movi_tl(t0, 0);
+tcg_gen_br(done);

...

+#ifdef TARGET_MIPS64
+case 8: /* SCD */
+tcg_gen_atomic_cmpxchg_i64(t0, addr, cpu_llval, val,
+   ctx->mem_idx, MO_TEQ);
 break;
 #endif
-case OPC_SC:
-case R6_OPC_SC:
-op_st_sc(t1, t0, rt, ctx);
+case 4: /* SC */
+{
+TCGv_i32 val32 = tcg_temp_new_i32();
+TCGv_i32 llval32 = tcg_temp_new_i32();
+TCGv_i32 old32 = tcg_temp_new_i32();
+tcg_gen_trunc_tl_i32(val32, val);
+tcg_gen_trunc_tl_i32(llval32, cpu_llval);
+
+tcg_gen_atomic_cmpxchg_i32(old32, addr, llval32, val32,
+   ctx->mem_idx, MO_TESL);
+tcg_gen_ext_i32_tl(t0, old32);


You can use tcg_gen_atomic_cmpxchg_tl so that you do not need to do all of this 
truncation yourself.  Which means that if you replace the size parameter with a 
TCGMemOp parameter (MO_TEQ vs MO_TESL) you can make all this code common.


Further, local temporaries are less than ideal and should be avoided if 
possible.  Using them results in an extra store into the local stack frame.


We can avoid this for addr by noting that once you have compared addr to 
cpu_lladdr, you can free addr and use cpu_lladdr in the actual cmpxchg.



r~



[Qemu-devel] [PATCH 2/2] target-mips: reimplement SC instruction and use cmpxchg

2016-09-15 Thread Leon Alrae
This patch completely rewrites conditional stores. Now we use cmpxchg and
no longer need separate implementations for user and system emulation.

Signed-off-by: Leon Alrae 
---
 linux-user/main.c   |  58 -
 target-mips/cpu.h   |   4 --
 target-mips/helper.c|   6 +--
 target-mips/helper.h|   2 -
 target-mips/op_helper.c |  25 -
 target-mips/translate.c | 131 
 6 files changed, 67 insertions(+), 159 deletions(-)

diff --git a/linux-user/main.c b/linux-user/main.c
index 0d0bf9d..bc1b307 100644
--- a/linux-user/main.c
+++ b/linux-user/main.c
@@ -2230,55 +2230,6 @@ static const uint8_t mips_syscall_args[] = {
 #  undef MIPS_SYS
 # endif /* O32 */
 
-static int do_store_exclusive(CPUMIPSState *env)
-{
-target_ulong addr;
-target_ulong page_addr;
-target_ulong val;
-int flags;
-int segv = 0;
-int reg;
-int d;
-
-addr = env->lladdr;
-page_addr = addr & TARGET_PAGE_MASK;
-start_exclusive();
-mmap_lock();
-flags = page_get_flags(page_addr);
-if ((flags & PAGE_READ) == 0) {
-segv = 1;
-} else {
-reg = env->llreg & 0x1f;
-d = (env->llreg & 0x20) != 0;
-if (d) {
-segv = get_user_s64(val, addr);
-} else {
-segv = get_user_s32(val, addr);
-}
-if (!segv) {
-if (val != env->llval) {
-env->active_tc.gpr[reg] = 0;
-} else {
-if (d) {
-segv = put_user_u64(env->llnewval, addr);
-} else {
-segv = put_user_u32(env->llnewval, addr);
-}
-if (!segv) {
-env->active_tc.gpr[reg] = 1;
-}
-}
-}
-}
-env->lladdr = -1;
-if (!segv) {
-env->active_tc.PC += 4;
-}
-mmap_unlock();
-end_exclusive();
-return segv;
-}
-
 /* Break codes */
 enum {
 BRK_OVERFLOW = 6,
@@ -2426,15 +2377,6 @@ done_syscall:
   }
 }
 break;
-case EXCP_SC:
-if (do_store_exclusive(env)) {
-info.si_signo = TARGET_SIGSEGV;
-info.si_errno = 0;
-info.si_code = TARGET_SEGV_MAPERR;
-info._sifields._sigfault._addr = env->active_tc.PC;
-queue_signal(env, info.si_signo, );
-}
-break;
 case EXCP_DSPDIS:
 info.si_signo = TARGET_SIGILL;
 info.si_errno = 0;
diff --git a/target-mips/cpu.h b/target-mips/cpu.h
index 78555b9..6c268f0 100644
--- a/target-mips/cpu.h
+++ b/target-mips/cpu.h
@@ -500,8 +500,6 @@ struct CPUMIPSState {
 /* XXX: Maybe make LLAddr per-TC? */
 target_ulong lladdr; /* LL virtual address compared against SC */
 target_ulong llval;
-target_ulong llnewval;
-target_ulong llreg;
 uint64_t CP0_LLAddr_rw_bitmask;
 int CP0_LLAddr_shift;
 target_ulong CP0_WatchLo[8];
@@ -796,8 +794,6 @@ enum {
 
 EXCP_LAST = EXCP_TLBRI,
 };
-/* Dummy exception for conditional stores.  */
-#define EXCP_SC 0x100
 
 /*
  * This is an interrnally generated WAKE request line.
diff --git a/target-mips/helper.c b/target-mips/helper.c
index c864b15..67b19e6 100644
--- a/target-mips/helper.c
+++ b/target-mips/helper.c
@@ -958,10 +958,8 @@ void QEMU_NORETURN do_raise_exception_err(CPUMIPSState 
*env,
 {
 CPUState *cs = CPU(mips_env_get_cpu(env));
 
-if (exception < EXCP_SC) {
-qemu_log_mask(CPU_LOG_INT, "%s: %d %d\n",
-  __func__, exception, error_code);
-}
+qemu_log_mask(CPU_LOG_INT, "%s: %d %d\n",
+  __func__, exception, error_code);
 cs->exception_index = exception;
 env->error_code = error_code;
 
diff --git a/target-mips/helper.h b/target-mips/helper.h
index 666936c..dd68751 100644
--- a/target-mips/helper.h
+++ b/target-mips/helper.h
@@ -13,10 +13,8 @@ DEF_HELPER_4(swr, void, env, tl, tl, int)
 
 #ifndef CONFIG_USER_ONLY
 DEF_HELPER_3(ll, tl, env, tl, int)
-DEF_HELPER_4(sc, tl, env, tl, tl, int)
 #ifdef TARGET_MIPS64
 DEF_HELPER_3(lld, tl, env, tl, int)
-DEF_HELPER_4(scd, tl, env, tl, tl, int)
 #endif
 #endif
 
diff --git a/target-mips/op_helper.c b/target-mips/op_helper.c
index e0c9842..9f094ad 100644
--- a/target-mips/op_helper.c
+++ b/target-mips/op_helper.c
@@ -300,31 +300,6 @@ HELPER_LD_ATOMIC(ll, lw, 0x3)
 HELPER_LD_ATOMIC(lld, ld, 0x7)
 #endif
 #undef HELPER_LD_ATOMIC
-
-#define HELPER_ST_ATOMIC(name, ld_insn, st_insn, almask)  \
-target_ulong helper_##name(CPUMIPSState *env, target_ulong arg1,  \
-   target_ulong arg2, int mem_idx)\
-{ \
-target_long tmp;  \
-