Re: [Qemu-devel] [PATCH 32/52] target-m68k: bitfield ops

2016-05-06 Thread Richard Henderson

On 05/04/2016 10:12 AM, Laurent Vivier wrote:

Signed-off-by: Laurent Vivier 
---
 target-m68k/helper.c|  13 ++
 target-m68k/helper.h|   4 +
 target-m68k/op_helper.c |  68 ++
 target-m68k/translate.c | 560 
 4 files changed, 645 insertions(+)

diff --git a/target-m68k/helper.c b/target-m68k/helper.c
index e9e7cee..76dda44 100644
--- a/target-m68k/helper.c
+++ b/target-m68k/helper.c
@@ -267,6 +267,19 @@ uint32_t HELPER(ff1)(uint32_t x)
 return n;
 }

+uint32_t HELPER(bfffo)(uint32_t arg, uint32_t width)
+{
+int n;
+uint32_t mask;
+mask = 0x8000;
+for (n = 0; n < width; n++) {
+   if (arg & mask)
+   break;
+   mask >>= 1;
+}
+return n;


  n = clz32(arg);
  return n < width ? n : width;


+DEF_HELPER_2(bfffo, i32, i32, i32)


DEF_HELPER_FLAGS_*


+DEF_HELPER_4(bitfield_load, i64, env, i32, i32, i32)
+DEF_HELPER_5(bitfield_store, void, env, i32, i32, i32, i64)


Likewise.


+bitfield = cpu_ldub_data(env, addr);


Switch to using cpu_ldub_data_ra etc.  That will avoid having to store PC or 
update_cc before calling these helpers.



+static inline void bcd_add(TCGv src, TCGv dest)


Did the bcd patch get squashed into this one by mistake?

Anyway, it might be nice to take off the inline marker for these, since they're 
fairly large functions.



+static inline void bcd_neg(TCGv dest, TCGv src)


Likewise wrt inline.


+/* compute the 10's complement
+ *
+ *bcd_add(0xff99 - (src + X), 0x0001)
+ *
+ *t1 = 0xF999 - src - X)
+ *t2 = t1 + 0x0666
+ *t3 = t2 + 0x0001
+ *t4 = t2 ^ 0x0001
+ *t5 = t3 ^ t4
+ *t6 = ~t5 & 0x1110
+ *t7 = (t6 >> 2) | (t6 >> 3)
+ *return t3 - t7
+ *
+ * reduced in:
+ *
+ *t2 = 0x + (-src)
+ *t3 = (-src)
+ *t4 = t2  ^ (X ^ 1)
+ *t5 = (t3 - X) ^ t4
+ *t6 = ~t5 & 0x1110
+ *t7 = (t6 >> 2) | (t6 >> 3)
+ *return (t3 - X) - t7


Is there a reason that you're computing the ten's compliment to 7 digits when 
you're only going to use 3 of them?  Restricting to 3 means that these 
constants become smaller and therefore easier to build on a non-x86 host.



+DISAS_INSN(abcd_mem)
+{
+TCGv src;
+TCGv addr_src;
+TCGv dest;
+TCGv addr_dest;
+
+gen_flush_flags(s); /* !Z is sticky */
+
+addr_src = AREG(insn, 0);
+tcg_gen_subi_i32(addr_src, addr_src, opsize_bytes(OS_BYTE));
+src = gen_load(s, OS_BYTE, addr_src, 0);
+
+addr_dest = AREG(insn, 9);
+tcg_gen_subi_i32(addr_dest, addr_dest, opsize_bytes(OS_BYTE));
+dest = gen_load(s, OS_BYTE, addr_dest, 0);
+
+bcd_add(src, dest);
+
+gen_store(s, OS_BYTE, addr_dest, dest);


Surely the write-back to the address registers only happens after any possible 
exception due to the loads.  Otherwise you can't restart the insn after a page 
fault.



+set_cc_op(s, CC_OP_FLAGS);


This is redundant with gen_flush_flags.


+static void bitfield_param(uint16_t ext, TCGv *offset, TCGv *width, TCGv *mask)
+{
+TCGv tmp;
+
+/* offset */
+
+if (ext & 0x0800) {
+*offset = tcg_temp_new_i32();
+tcg_gen_mov_i32(*offset, DREG(ext, 6));
+} else {
+*offset = tcg_temp_new_i32();
+tcg_gen_movi_i32(*offset, (ext >> 6) & 31);
+}


No need to have two copies of the *offset allocation.


+tmp = tcg_temp_new_i32();
+tcg_gen_sub_i32(tmp, tcg_const_i32(32), *width);
+*mask = tcg_temp_new_i32();
+tcg_gen_shl_i32(*mask, tcg_const_i32(0x), tmp);


Less garbage temporaries if you do

  tmp = tcg_const_i32(32);
  tcg_gen_sub_i32(tmp, tmp, *width);
  *mask = tcg_const_i32(-1);
  tcg_gen_shl_i32(*mask, *mask, tmp);
  tcg_temp_free_i32(tmp);


+if (ext & 0x0800)
+tcg_gen_andi_i32(offset, offset, 31);


Watch the coding style.



+if (op == 7) {
+TCGv tmp2;
+
+tmp2 = tcg_temp_new_i32();
+tcg_gen_sub_i32(tmp2, tcg_const_i32(32), width);
+tcg_gen_shl_i32(tmp2, reg2, tmp2);
+tcg_gen_and_i32(tmp2, tmp2, mask);
+gen_logic_cc(s, tmp2, OS_LONG);


A rotate right of the width is easier to put the field into the most 
significant bits.  Also, you need to do this AND before you rotate the mask.


A comment here that we're computing the flags for BFINS wouldn't go amiss.


+tcg_temp_free_i32(tmp1);


tmp2.


+} else {
+gen_logic_cc(s, tmp1, OS_LONG);
+}


I also question the logic of doing

mask = mask >>> offset;
tmp = reg & mask
tmp = tmp <<< offset

when just

tmp = reg <<< offset;
tmp = tmp & mask

would do.

Let's assume we make this change, that we align the field at MSB for both reg1 
and reg2.  That gives us


TCGv tmp = tcg_temp_new();
bitfield_param(ext, , , );

if (op 

[Qemu-devel] [PATCH 32/52] target-m68k: bitfield ops

2016-05-04 Thread Laurent Vivier
Signed-off-by: Laurent Vivier 
---
 target-m68k/helper.c|  13 ++
 target-m68k/helper.h|   4 +
 target-m68k/op_helper.c |  68 ++
 target-m68k/translate.c | 560 
 4 files changed, 645 insertions(+)

diff --git a/target-m68k/helper.c b/target-m68k/helper.c
index e9e7cee..76dda44 100644
--- a/target-m68k/helper.c
+++ b/target-m68k/helper.c
@@ -267,6 +267,19 @@ uint32_t HELPER(ff1)(uint32_t x)
 return n;
 }
 
+uint32_t HELPER(bfffo)(uint32_t arg, uint32_t width)
+{
+int n;
+uint32_t mask;
+mask = 0x8000;
+for (n = 0; n < width; n++) {
+   if (arg & mask)
+   break;
+   mask >>= 1;
+}
+return n;
+}
+
 uint32_t HELPER(sats)(uint32_t val, uint32_t v)
 {
 /* The result has the opposite sign to the original value.  */
diff --git a/target-m68k/helper.h b/target-m68k/helper.h
index aae01f9..0b819cb 100644
--- a/target-m68k/helper.h
+++ b/target-m68k/helper.h
@@ -1,5 +1,6 @@
 DEF_HELPER_1(bitrev, i32, i32)
 DEF_HELPER_1(ff1, i32, i32)
+DEF_HELPER_2(bfffo, i32, i32, i32)
 DEF_HELPER_FLAGS_2(sats, TCG_CALL_NO_RWG_SE, i32, i32, i32)
 DEF_HELPER_2(divu, void, env, i32)
 DEF_HELPER_2(divs, void, env, i32)
@@ -44,3 +45,6 @@ DEF_HELPER_2(flush_flags, void, env, i32)
 DEF_HELPER_2(set_ccr, void, env, i32)
 DEF_HELPER_FLAGS_1(get_ccr, TCG_CALL_NO_WG_SE, i32, env)
 DEF_HELPER_2(raise_exception, void, env, i32)
+
+DEF_HELPER_4(bitfield_load, i64, env, i32, i32, i32)
+DEF_HELPER_5(bitfield_store, void, env, i32, i32, i32, i64)
diff --git a/target-m68k/op_helper.c b/target-m68k/op_helper.c
index 1c95ea0..71caba9 100644
--- a/target-m68k/op_helper.c
+++ b/target-m68k/op_helper.c
@@ -178,6 +178,74 @@ void HELPER(raise_exception)(CPUM68KState *env, uint32_t 
tt)
 raise_exception(env, tt);
 }
 
+/* load from a bitfield */
+
+uint64_t HELPER(bitfield_load)(CPUM68KState *env, uint32_t addr,
+   uint32_t offset, uint32_t width)
+{
+uint64_t bitfield = 0;
+int size;
+
+size = (offset + width + 7) >> 3;
+switch (size) {
+case 1:
+bitfield = cpu_ldub_data(env, addr);
+bitfield <<= 56;
+break;
+case 2:
+bitfield = cpu_lduw_data(env, addr);
+bitfield <<= 48;
+break;
+case 3:
+bitfield = cpu_lduw_data(env, addr);
+bitfield <<= 8;
+bitfield |= cpu_ldub_data(env, addr + 2);
+bitfield <<= 40;
+break;
+case 4:
+bitfield = cpu_ldl_data(env, addr);
+bitfield <<= 32;
+break;
+case 5:
+bitfield = cpu_ldl_data(env, addr);
+bitfield <<= 8;
+bitfield |= cpu_ldub_data(env, addr + 4);
+bitfield <<= 24;
+break;
+}
+
+return bitfield;
+}
+
+/* store to a bitfield */
+
+void HELPER(bitfield_store)(CPUM68KState *env, uint32_t addr, uint32_t offset,
+uint32_t width, uint64_t bitfield)
+{
+int size;
+
+size = (offset + width + 7) >> 3;
+switch (size) {
+case 1:
+cpu_stb_data(env, addr, bitfield >> 56);
+break;
+case 2:
+cpu_stw_data(env, addr, bitfield >> 48);
+break;
+case 3:
+cpu_stw_data(env, addr, bitfield >> 48);
+cpu_stb_data(env, addr + 2, bitfield >> 40);
+break;
+case 4:
+cpu_stl_data(env, addr, bitfield >> 32);
+break;
+case 5:
+cpu_stl_data(env, addr, bitfield >> 32);
+cpu_stb_data(env, addr + 4, bitfield >> 24);
+break;
+}
+}
+
 void HELPER(divu)(CPUM68KState *env, uint32_t word)
 {
 uint32_t num;
diff --git a/target-m68k/translate.c b/target-m68k/translate.c
index 817f0b3..00fd2f1 100644
--- a/target-m68k/translate.c
+++ b/target-m68k/translate.c
@@ -1240,6 +1240,231 @@ DISAS_INSN(divl)
 set_cc_op(s, CC_OP_FLAGS);
 }
 
+static inline void bcd_add(TCGv src, TCGv dest)
+{
+TCGv t0, t1;
+
+/* t1 = (src + 0x0066) + dest
+ *= result with some possible exceding 0x6
+ */
+
+t0 = tcg_const_i32(0x0066);
+tcg_gen_add_i32(t0, t0, src);
+
+t1 = tcg_temp_new();
+tcg_gen_add_i32(t1, t0, dest);
+
+/* we will remove exceding 0x6 where there is no carry */
+
+/* t0 = (src + 0x0066) ^ dest
+ *= t1 without carries
+ */
+
+tcg_gen_xor_i32(t0, t0, dest);
+
+/* extract the carries
+ * t0 = t0 ^ t1
+ *= only the carries
+ */
+
+tcg_gen_xor_i32(t0, t0, t1);
+
+/* generate 0x1 where there is no carry */
+
+tcg_gen_not_i32(t0, t0);
+tcg_gen_andi_i32(t0, t0, 0x110);
+
+/* for each 0x10, generate a 0x6 */
+
+tcg_gen_shri_i32(dest, t0, 2);
+tcg_gen_shri_i32(t0, t0, 3);
+tcg_gen_or_i32(dest, dest, t0);
+tcg_temp_free(t0);
+
+/* remove the exceding 0x6
+ * for digits that have not generated a carry
+ */
+
+tcg_gen_sub_i32(dest, t1, dest);
+tcg_temp_free(t1);
+}
+
+static inline void bcd_neg(TCGv dest, TCGv src)
+{
+TCGv t0,