Re: [Qemu-devel] [PATCH v2 1/8] target-arm: A64: add support for ld/st pair

2013-12-13 Thread Peter Maydell
On 12 December 2013 12:14, C Fontana claudio.font...@linaro.org wrote:
 I think that there is more than the missing return:

 we need to handle the case 0 as well, as it's a perfectly valid form
 of a load/store pair:
 it's the Load/Store no-allocate pair (offset) (LDNP, STNP).

 So in my view we need to add a case 0 where we handle  the load/store
 no-allocate pair,
 and no default case, or a default case where we assert(0) for unreachable 
 code,
 since all possible index values (2 bits) should be handled by the
 switch statement.

Yep. I've added support for the non-temporal versions
(pretty easy since qemu doesn't care about cache allocation
hints) to my patchstack so we'll get this in the next
version of the patchset.

thanks
-- PMM



Re: [Qemu-devel] [PATCH v2 1/8] target-arm: A64: add support for ld/st pair

2013-12-12 Thread C Fontana
Hi,

I saw a missing return below:

On 11 December 2013 23:01, Peter Maydell peter.mayd...@linaro.org wrote:
 From: Alex Bennée alex.ben...@linaro.org

 This patch support the basic load and store pair instructions and
 includes the generic helper functions:

   * do_gpr_st()
   * do_fp_st()
   * do_gpr_ld()
   * do_fp_ld()
   * read_cpu_reg_sp()
   * gen_check_sp_alignment()

 The last function gen_check_sp_alignment() is a NULL op currently but
 put in place to make it easy to add SP alignment checking later.

 Signed-off-by: Alex Bennée alex.ben...@linaro.org
 Signed-off-by: Peter Maydell peter.mayd...@linaro.org

 ---

 Changes
   - merge ldp/stp together
   - add read_cpu_reg_sp() and use
   - only reference registers as needed
   - use common tcg ops for gpr/fp loads/store
 ---
  target-arm/translate-a64.c | 268 
 -
  1 file changed, 263 insertions(+), 5 deletions(-)

 diff --git a/target-arm/translate-a64.c b/target-arm/translate-a64.c
 index 0a76130..018b3ee 100644
 --- a/target-arm/translate-a64.c
 +++ b/target-arm/translate-a64.c
 @@ -99,6 +99,15 @@ void aarch64_cpu_dump_state(CPUState *cs, FILE *f,
  cpu_fprintf(f, \n);
  }

 +static int get_mem_index(DisasContext *s)
 +{
 +#ifdef CONFIG_USER_ONLY
 +return 1;
 +#else
 +return s-user;
 +#endif
 +}
 +
  void gen_a64_set_pc_im(uint64_t val)
  {
  tcg_gen_movi_i64(cpu_pc, val);
 @@ -250,6 +259,17 @@ static TCGv_i64 read_cpu_reg(DisasContext *s, int reg, 
 int sf)
  return v;
  }

 +static TCGv_i64 read_cpu_reg_sp(DisasContext *s, int reg, int sf)
 +{
 +TCGv_i64 v = new_tmp_a64(s);
 +if (sf) {
 +tcg_gen_mov_i64(v, cpu_X[reg]);
 +} else {
 +tcg_gen_ext32u_i64(v, cpu_X[reg]);
 +}
 +return v;
 +}
 +
  /* Set ZF and NF based on a 64 bit result. This is alas fiddlier
   * than the 32 bit equivalent.
   */
 @@ -278,6 +298,124 @@ static inline void gen_logic_CC(int sf, TCGv_i64 result)
  }

  /*
 + * Load/Store generators
 + */
 +
 +/*
 + *  Store from GPR register to memory
 + */
 +static void do_gpr_st(DisasContext *s, TCGv_i64 source,
 +  TCGv_i64 tcg_addr, int size)
 +{
 +g_assert(size = 3);
 +tcg_gen_qemu_st_i64(source, tcg_addr, get_mem_index(s), MO_TE + size);
 +}
 +
 +/*
 + * Load from memory to GPR register
 + */
 +static void do_gpr_ld(DisasContext *s, TCGv_i64 dest, TCGv_i64 tcg_addr,
 +  int size, bool is_signed, bool extend)
 +{
 +TCGMemOp memop =  MO_TE + size;
 +
 +g_assert(size = 3);
 +
 +if (is_signed) {
 +memop += MO_SIGN;
 +}
 +
 +tcg_gen_qemu_ld_i64(dest, tcg_addr, get_mem_index(s), memop);
 +
 +if (extend  is_signed) {
 +g_assert(size  3);
 +tcg_gen_ext32u_i64(dest, dest);
 +}
 +}
 +
 +/*
 + * Store from FP register to memory
 + */
 +static void do_fp_st(DisasContext *s, int srcidx, TCGv_i64 tcg_addr, int 
 size)
 +{
 +/* This writes the bottom N bits of a 128 bit wide vector to memory */
 +int freg_offs = offsetof(CPUARMState, vfp.regs[srcidx * 2]);
 +TCGv_i64 tmp = tcg_temp_new_i64();
 +
 +if (size  4) {
 +switch (size) {
 +case 0:
 +tcg_gen_ld8u_i64(tmp, cpu_env, freg_offs);
 +break;
 +case 1:
 +tcg_gen_ld16u_i64(tmp, cpu_env, freg_offs);
 +break;
 +case 2:
 +tcg_gen_ld32u_i64(tmp, cpu_env, freg_offs);
 +break;
 +case 3:
 +tcg_gen_ld_i64(tmp, cpu_env, freg_offs);
 +break;
 +}
 +tcg_gen_qemu_st_i64(tmp, tcg_addr, get_mem_index(s), MO_TE + size);
 +} else {
 +TCGv_i64 tcg_hiaddr = tcg_temp_new_i64();
 +tcg_gen_ld_i64(tmp, cpu_env, freg_offs);
 +tcg_gen_qemu_st_i64(tmp, tcg_addr, get_mem_index(s), MO_TEQ);
 +tcg_gen_qemu_st64(tmp, tcg_addr, get_mem_index(s));
 +tcg_gen_ld_i64(tmp, cpu_env, freg_offs + sizeof(float64));
 +tcg_gen_addi_i64(tcg_hiaddr, tcg_addr, 8);
 +tcg_gen_qemu_st_i64(tmp, tcg_hiaddr, get_mem_index(s), MO_TEQ);
 +tcg_temp_free_i64(tcg_hiaddr);
 +}
 +
 +tcg_temp_free_i64(tmp);
 +}
 +
 +/* Load from memory to FP register */
 +static void do_fp_ld(DisasContext *s, int destidx, TCGv_i64 tcg_addr, int 
 size)
 +{
 +/* This always zero-extends and writes to a full 128 bit wide vector */
 +int freg_offs = offsetof(CPUARMState, vfp.regs[destidx * 2]);
 +TCGv_i64 tmplo = tcg_temp_new_i64();
 +TCGv_i64 tmphi;
 +
 +if (size  4) {
 +TCGMemOp memop =  MO_TE + size;
 +tmphi = tcg_const_i64(0);
 +tcg_gen_qemu_ld_i64(tmplo, tcg_addr, get_mem_index(s), memop);
 +} else {
 +TCGv_i64 tcg_hiaddr;
 +tmphi = tcg_temp_new_i64();
 +tcg_hiaddr = tcg_temp_new_i64();
 +
 +tcg_gen_qemu_ld_i64(tmplo, tcg_addr, get_mem_index(s), MO_TEQ);
 +tcg_gen_addi_i64(tcg_hiaddr, tcg_addr, 8);
 +tcg_gen_qemu_ld_i64(tmphi, 

Re: [Qemu-devel] [PATCH v2 1/8] target-arm: A64: add support for ld/st pair

2013-12-12 Thread Alex Bennée

claudio.font...@linaro.org writes:

 Hi,

 I saw a missing return below:
snip
 +default: /* Failed decoder tree? */
 +unallocated_encoding(s);
 +break;
 +}

 This doesn't seem right (break instead of return):

 default:
 unallocated_encoding(s);
 return;
 }

Good catch. I suspect it never hits though (or risu doesn't generate
enough unallocated versions).

-- 
Alex Bennée
QEMU/KVM Hacker for Linaro




Re: [Qemu-devel] [PATCH v2 1/8] target-arm: A64: add support for ld/st pair

2013-12-12 Thread Peter Maydell
On 12 December 2013 11:43, Alex Bennée alex.ben...@linaro.org wrote:

 claudio.font...@linaro.org writes:

 Hi,

 I saw a missing return below:
 snip
 +default: /* Failed decoder tree? */
 +unallocated_encoding(s);
 +break;
 +}

 This doesn't seem right (break instead of return):

 default:
 unallocated_encoding(s);
 return;
 }

 Good catch. I suspect it never hits though (or risu doesn't generate
 enough unallocated versions).

Oh, I saw that and then forgot to mention it.
Either the comment or the code is wrong -- if
the decoder tree prevents us getting to this point
with that value of index then we should be asserting,
not calling unallocated_encoding(). If the encoding
is really supposed to be unallocated then it's not
a failure of the decoder tree that we got here,
it's just another case we need to check. Which is it?

thanks
-- PMM



Re: [Qemu-devel] [PATCH v2 1/8] target-arm: A64: add support for ld/st pair

2013-12-12 Thread C Fontana
On 12 December 2013 12:45, Peter Maydell peter.mayd...@linaro.org wrote:
 On 12 December 2013 11:43, Alex Bennée alex.ben...@linaro.org wrote:

 claudio.font...@linaro.org writes:

 Hi,

 I saw a missing return below:
 snip
 +default: /* Failed decoder tree? */
 +unallocated_encoding(s);
 +break;
 +}

 This doesn't seem right (break instead of return):

 default:
 unallocated_encoding(s);
 return;
 }

 Good catch. I suspect it never hits though (or risu doesn't generate
 enough unallocated versions).

 Oh, I saw that and then forgot to mention it.
 Either the comment or the code is wrong -- if
 the decoder tree prevents us getting to this point
 with that value of index then we should be asserting,
 not calling unallocated_encoding(). If the encoding
 is really supposed to be unallocated then it's not
 a failure of the decoder tree that we got here,
 it's just another case we need to check. Which is it?

 thanks
 -- PMM

I think that there is more than the missing return:

we need to handle the case 0 as well, as it's a perfectly valid form
of a load/store pair:
it's the Load/Store no-allocate pair (offset) (LDNP, STNP).

So in my view we need to add a case 0 where we handle  the load/store
no-allocate pair,
and no default case, or a default case where we assert(0) for unreachable code,
since all possible index values (2 bits) should be handled by the
switch statement.

Ref: C3.3 Loads and stores Table C3-3

Claudio



[Qemu-devel] [PATCH v2 1/8] target-arm: A64: add support for ld/st pair

2013-12-11 Thread Peter Maydell
From: Alex Bennée alex.ben...@linaro.org

This patch support the basic load and store pair instructions and
includes the generic helper functions:

  * do_gpr_st()
  * do_fp_st()
  * do_gpr_ld()
  * do_fp_ld()
  * read_cpu_reg_sp()
  * gen_check_sp_alignment()

The last function gen_check_sp_alignment() is a NULL op currently but
put in place to make it easy to add SP alignment checking later.

Signed-off-by: Alex Bennée alex.ben...@linaro.org
Signed-off-by: Peter Maydell peter.mayd...@linaro.org

---

Changes
  - merge ldp/stp together
  - add read_cpu_reg_sp() and use
  - only reference registers as needed
  - use common tcg ops for gpr/fp loads/store
---
 target-arm/translate-a64.c | 268 -
 1 file changed, 263 insertions(+), 5 deletions(-)

diff --git a/target-arm/translate-a64.c b/target-arm/translate-a64.c
index 0a76130..018b3ee 100644
--- a/target-arm/translate-a64.c
+++ b/target-arm/translate-a64.c
@@ -99,6 +99,15 @@ void aarch64_cpu_dump_state(CPUState *cs, FILE *f,
 cpu_fprintf(f, \n);
 }
 
+static int get_mem_index(DisasContext *s)
+{
+#ifdef CONFIG_USER_ONLY
+return 1;
+#else
+return s-user;
+#endif
+}
+
 void gen_a64_set_pc_im(uint64_t val)
 {
 tcg_gen_movi_i64(cpu_pc, val);
@@ -250,6 +259,17 @@ static TCGv_i64 read_cpu_reg(DisasContext *s, int reg, int 
sf)
 return v;
 }
 
+static TCGv_i64 read_cpu_reg_sp(DisasContext *s, int reg, int sf)
+{
+TCGv_i64 v = new_tmp_a64(s);
+if (sf) {
+tcg_gen_mov_i64(v, cpu_X[reg]);
+} else {
+tcg_gen_ext32u_i64(v, cpu_X[reg]);
+}
+return v;
+}
+
 /* Set ZF and NF based on a 64 bit result. This is alas fiddlier
  * than the 32 bit equivalent.
  */
@@ -278,6 +298,124 @@ static inline void gen_logic_CC(int sf, TCGv_i64 result)
 }
 
 /*
+ * Load/Store generators
+ */
+
+/*
+ *  Store from GPR register to memory
+ */
+static void do_gpr_st(DisasContext *s, TCGv_i64 source,
+  TCGv_i64 tcg_addr, int size)
+{
+g_assert(size = 3);
+tcg_gen_qemu_st_i64(source, tcg_addr, get_mem_index(s), MO_TE + size);
+}
+
+/*
+ * Load from memory to GPR register
+ */
+static void do_gpr_ld(DisasContext *s, TCGv_i64 dest, TCGv_i64 tcg_addr,
+  int size, bool is_signed, bool extend)
+{
+TCGMemOp memop =  MO_TE + size;
+
+g_assert(size = 3);
+
+if (is_signed) {
+memop += MO_SIGN;
+}
+
+tcg_gen_qemu_ld_i64(dest, tcg_addr, get_mem_index(s), memop);
+
+if (extend  is_signed) {
+g_assert(size  3);
+tcg_gen_ext32u_i64(dest, dest);
+}
+}
+
+/*
+ * Store from FP register to memory
+ */
+static void do_fp_st(DisasContext *s, int srcidx, TCGv_i64 tcg_addr, int size)
+{
+/* This writes the bottom N bits of a 128 bit wide vector to memory */
+int freg_offs = offsetof(CPUARMState, vfp.regs[srcidx * 2]);
+TCGv_i64 tmp = tcg_temp_new_i64();
+
+if (size  4) {
+switch (size) {
+case 0:
+tcg_gen_ld8u_i64(tmp, cpu_env, freg_offs);
+break;
+case 1:
+tcg_gen_ld16u_i64(tmp, cpu_env, freg_offs);
+break;
+case 2:
+tcg_gen_ld32u_i64(tmp, cpu_env, freg_offs);
+break;
+case 3:
+tcg_gen_ld_i64(tmp, cpu_env, freg_offs);
+break;
+}
+tcg_gen_qemu_st_i64(tmp, tcg_addr, get_mem_index(s), MO_TE + size);
+} else {
+TCGv_i64 tcg_hiaddr = tcg_temp_new_i64();
+tcg_gen_ld_i64(tmp, cpu_env, freg_offs);
+tcg_gen_qemu_st_i64(tmp, tcg_addr, get_mem_index(s), MO_TEQ);
+tcg_gen_qemu_st64(tmp, tcg_addr, get_mem_index(s));
+tcg_gen_ld_i64(tmp, cpu_env, freg_offs + sizeof(float64));
+tcg_gen_addi_i64(tcg_hiaddr, tcg_addr, 8);
+tcg_gen_qemu_st_i64(tmp, tcg_hiaddr, get_mem_index(s), MO_TEQ);
+tcg_temp_free_i64(tcg_hiaddr);
+}
+
+tcg_temp_free_i64(tmp);
+}
+
+/* Load from memory to FP register */
+static void do_fp_ld(DisasContext *s, int destidx, TCGv_i64 tcg_addr, int size)
+{
+/* This always zero-extends and writes to a full 128 bit wide vector */
+int freg_offs = offsetof(CPUARMState, vfp.regs[destidx * 2]);
+TCGv_i64 tmplo = tcg_temp_new_i64();
+TCGv_i64 tmphi;
+
+if (size  4) {
+TCGMemOp memop =  MO_TE + size;
+tmphi = tcg_const_i64(0);
+tcg_gen_qemu_ld_i64(tmplo, tcg_addr, get_mem_index(s), memop);
+} else {
+TCGv_i64 tcg_hiaddr;
+tmphi = tcg_temp_new_i64();
+tcg_hiaddr = tcg_temp_new_i64();
+
+tcg_gen_qemu_ld_i64(tmplo, tcg_addr, get_mem_index(s), MO_TEQ);
+tcg_gen_addi_i64(tcg_hiaddr, tcg_addr, 8);
+tcg_gen_qemu_ld_i64(tmphi, tcg_hiaddr, get_mem_index(s), MO_TEQ);
+tcg_temp_free_i64(tcg_hiaddr);
+}
+
+tcg_gen_st_i64(tmplo, cpu_env, freg_offs);
+tcg_gen_st_i64(tmphi, cpu_env, freg_offs + sizeof(float64));
+
+tcg_temp_free_i64(tmplo);
+tcg_temp_free_i64(tmphi);
+}
+