Re: [Qemu-devel] [PATCH for-2.5 30/30] m68k: add bitfield instructions

2015-08-12 Thread Richard Henderson
On 08/09/2015 01:13 PM, Laurent Vivier wrote:
  uint32_t HELPER(rol32)(uint32_t val, uint32_t shift)
  {
  uint32_t result;
 @@ -1227,6 +1241,53 @@ void HELPER(set_mac_extu)(CPUM68KState *env, uint32_t 
 val, uint32_t acc)
  env-macc[acc + 1] = res;
  }
  
 +/* load from a bitfield */
 +
 +uint64_t HELPER(bitfield_load)(uint32_t addr, uint32_t offset, uint32_t 
 width)
 +{
 +uint8_t data[8];
 +uint64_t bitfield;
 +int size;
 +int i;
 +
 +size = (offset + width + 7)  3;
 +#if defined(CONFIG_USER_ONLY)
 +cpu_memory_rw_debug(NULL, (target_ulong)addr, data, size, 0);
 +#else
 +cpu_physical_memory_rw(addr, data, size, 0);
 +#endif

Err, this bypasses virtual memory.  Definitely not correct.

You'll need to use cpu_ld*_data instead.

 +DISAS_INSN(bitfield_reg)
 +{
 +uint16_t ext;
 +TCGv tmp;
 +TCGv tmp1;
 +TCGv reg;
 +TCGv offset;
 +TCGv width;
 +int op;
 +TCGv reg2;
 +TCGv mask;
 +
 +reg = DREG(insn, 0);
 +op = (insn  8)  7;
 +ext = read_im16(env, s);
 +
 +bitfield_param(ext, offset, width, mask);
 +
 +if (ext  0x0800) {
 +tcg_gen_andi_i32(offset, offset, 31);
 +}
 +gen_helper_ror32(mask, mask, offset);

Ah, the curious unused helpers.  Anyway, tcg_gen_rotr_i32.

Anyway, there's so much redundant computation in here let's name some
sub-expressions, and give them their own temporaries.  Let the tcg optimizer do
its job if they turn out to be unused for a specific case.

  mask_msb = mask before the rotate (at msb).
  mask_inp = mask after the rotate (in place).

 +tmp = tcg_temp_new_i32();
 +tcg_gen_and_i32(tmp, reg, mask);

  oldf_inp = tmp (old field in place).

 +
 +tmp1 = tcg_temp_new_i32();
 +gen_helper_rol32(tmp1, tmp, offset);

  oldf_msb = tmp1 (old field at msb).

 +tcg_gen_sub_i32(tmp2, tcg_const_i32(32), width);

  comp_width = tmp2 (compliment of width).

Use tcg_gen_subfi so you won't leak the const.
Compute this once for all of the sub-cases.

 +switch (op) {
 +case 0: /* bftst */
 +break;
 +case 1: /* bfextu */
 +tcg_gen_add_i32(tmp1, offset, width);
 +tcg_gen_andi_i32(tmp1, tmp1, 31);
 +gen_helper_rol32(reg2, tmp, tmp1);

  tcg_gen_shr_i32(reg2, oldf_msb, comp_width);

 +break;
 +case 2: /* bfchg */
 +tcg_gen_xor_i32(reg, reg, mask);
 +break;
 +case 3: /* bfexts */
 +gen_helper_rol32(reg2, tmp, offset);
 +tcg_gen_sub_i32(width, tcg_const_i32(32), width);
 +tcg_gen_sar_i32(reg2, reg2, width);

  tcg_gen_sar_i32(reg2, oldf_msb, comp_width);

 +case 4: /* bfclr */
 +tcg_gen_not_i32(mask, mask);
 +tcg_gen_and_i32(reg, reg, mask);

  tcg_gen_andc_i32(reg, reg, mask_inp);

 +break;
 +case 5: /* bfffo */
 +gen_helper_rol32(reg2, tmp, offset);
 +gen_helper_bfffo(tmp, tmp, width);
 +tcg_gen_add_i32(reg2, tmp, offset);

There's a typo here if you look close.  That said,

  tcg_gen_orc_i32(tmp, oldf_msb, mask_msb);
  gen_helper_clo32(tmp, tmp);
  tcg_gen_add_i32(reg2, tmp, offset);

with

uint32_t helper_clo32(uint32_t x)
{
  return clo32(x);
}

The first opcode sets all bits outside the field, so that (if the field is
smaller than 32 bits) we're guaranteed to find a one.  At which point there's
really very little that needs doing in the helper.

 +case 6: /* bfset */
 +tcg_gen_or_i32(reg, reg, mask);
 +break;

  tcg_gen_or_i32(reg, reg, mask_inp);

 +case 7: /* bfins */
 +tcg_gen_shl_i32(tmp1, tcg_const_i32(1), width);

Undefined if width == 32.  That said...

 +tcg_gen_subi_i32(tmp1, tmp1, 1);
 +tcg_gen_and_i32(tmp, reg2, tmp1);
 +tcg_gen_add_i32(tmp1, offset, width);
 +tcg_gen_andi_i32(tmp1, tmp1, 31);
 +gen_helper_ror32(tmp, tmp, tmp1);
 +tcg_gen_not_i32(mask, mask);
 +tcg_gen_and_i32(reg, reg, mask);
 +tcg_gen_or_i32(reg, reg, tmp);
 +break;

  /* Rotate the source so that the field is at msb.  */
  tcg_gen_rotr_i32(tmp, reg2, width);

  /* Isolate the field and set flags.  */
  tcg_gen_and_i32(tmp, tmp, mask_msb);
  gen_logic_cc(s, tmp, OS_LONG);

  /* Rotate the field into position.  */
  tcg_gen_rotr_i32(tmp, tmp, offset);

  /* Merge field into destination.  */
  tcg_gen_andc_i32(reg, reg, mask_inp);
  tcg_gen_or_i32(reg, reg, tmp);
  return;

Handle the non-bfins after the switch with

  gen_logic_cc(s, oldf_msb, OS_LONG);

 +DISAS_INSN(bitfield_mem)
 +{
 +uint16_t ext;
 +int op;
 +TCGv_i64 bitfield;
 +TCGv_i64 mask_bitfield;
 +TCGv mask_cc;
 +TCGv shift;
 +TCGv val;
 +TCGv src;
 +TCGv offset;
 +TCGv width;
 +TCGv reg;
 +TCGv tmp;
 +
 +op = (insn  8)  7;
 +ext = read_im16(env, s);
 +src = gen_lea(env, s, insn, OS_LONG);
 +if (IS_NULL_QREG(src)) {
 +gen_addr_fault(s);
 +return;
 +}
 +
 +bitfield_param(ext, offset, width, 

[Qemu-devel] [PATCH for-2.5 30/30] m68k: add bitfield instructions

2015-08-09 Thread Laurent Vivier
Signed-off-by: Laurent Vivier laur...@vivier.eu
---
 target-m68k/helper.c|  61 +
 target-m68k/helper.h|   4 +
 target-m68k/translate.c | 331 
 3 files changed, 396 insertions(+)

diff --git a/target-m68k/helper.c b/target-m68k/helper.c
index 532f366..ac4 100644
--- a/target-m68k/helper.c
+++ b/target-m68k/helper.c
@@ -447,6 +447,20 @@ uint32_t HELPER(ff1)(uint32_t x)
 return n;
 }
 
+uint32_t HELPER(bfffo)(uint32_t arg, uint32_t width)
+{
+int n;
+uint32_t mask;
+mask = 0x8000;
+for (n = 0; n  width; n++) {
+if (arg  mask) {
+break;
+}
+mask = 1;
+}
+return n;
+}
+
 uint32_t HELPER(rol32)(uint32_t val, uint32_t shift)
 {
 uint32_t result;
@@ -1227,6 +1241,53 @@ void HELPER(set_mac_extu)(CPUM68KState *env, uint32_t 
val, uint32_t acc)
 env-macc[acc + 1] = res;
 }
 
+/* load from a bitfield */
+
+uint64_t HELPER(bitfield_load)(uint32_t addr, uint32_t offset, uint32_t width)
+{
+uint8_t data[8];
+uint64_t bitfield;
+int size;
+int i;
+
+size = (offset + width + 7)  3;
+#if defined(CONFIG_USER_ONLY)
+cpu_memory_rw_debug(NULL, (target_ulong)addr, data, size, 0);
+#else
+cpu_physical_memory_rw(addr, data, size, 0);
+#endif
+
+bitfield = data[0];
+for (i = 1; i  8; i++) {
+bitfield = (bitfield  8) | data[i];
+}
+
+return bitfield;
+}
+
+/* store to a bitfield */
+
+void HELPER(bitfield_store)(uint32_t addr, uint32_t offset, uint32_t width,
+uint64_t bitfield)
+{
+uint8_t data[8];
+int size;
+int i;
+
+size = (offset + width + 7)  3;
+
+for (i = 0; i  8; i++) {
+data[7 - i] = bitfield;
+bitfield = 8;
+}
+
+#if defined(CONFIG_USER_ONLY)
+cpu_memory_rw_debug(NULL, (target_ulong)addr, data, size, 1);
+#else
+cpu_physical_memory_rw(addr, data, size, 1);
+#endif
+}
+
 uint32_t HELPER(abcd_cc)(CPUM68KState *env, uint32_t src, uint32_t dest)
 {
 uint16_t hi, lo;
diff --git a/target-m68k/helper.h b/target-m68k/helper.h
index 209064c..5db4278 100644
--- a/target-m68k/helper.h
+++ b/target-m68k/helper.h
@@ -1,5 +1,6 @@
 DEF_HELPER_1(bitrev, i32, i32)
 DEF_HELPER_1(ff1, i32, i32)
+DEF_HELPER_2(bfffo, i32, i32, i32)
 DEF_HELPER_2(rol32, i32, i32, i32)
 DEF_HELPER_2(ror32, i32, i32, i32)
 DEF_HELPER_2(sats, i32, i32, i32)
@@ -84,5 +85,8 @@ DEF_HELPER_3(set_mac_extu, void, env, i32, i32)
 DEF_HELPER_2(flush_flags, i32, env, i32)
 DEF_HELPER_2(raise_exception, void, env, i32)
 
+DEF_HELPER_3(bitfield_load, i64, i32, i32, i32)
+DEF_HELPER_4(bitfield_store, void, i32, i32, i32, i64)
+
 DEF_HELPER_3(abcd_cc, i32, env, i32, i32)
 DEF_HELPER_3(sbcd_cc, i32, env, i32, i32)
diff --git a/target-m68k/translate.c b/target-m68k/translate.c
index 5fc7a11..2725a9f 100644
--- a/target-m68k/translate.c
+++ b/target-m68k/translate.c
@@ -2878,6 +2878,335 @@ DISAS_INSN(rotate_mem)
 set_cc_op(s, CC_OP_FLAGS);
 }
 
+static void bitfield_param(uint16_t ext, TCGv *offset, TCGv *width, TCGv *mask)
+{
+TCGv tmp;
+
+/* offset */
+
+if (ext  0x0800) {
+*offset = tcg_temp_new_i32();
+tcg_gen_mov_i32(*offset, DREG(ext, 6));
+} else {
+*offset = tcg_temp_new_i32();
+tcg_gen_movi_i32(*offset, (ext  6)  31);
+}
+
+/* width */
+
+if (ext  0x0020) {
+*width = tcg_temp_new_i32();
+tcg_gen_subi_i32(*width, DREG(ext, 0), 1);
+tcg_gen_andi_i32(*width, *width, 31);
+tcg_gen_addi_i32(*width, *width, 1);
+} else {
+*width = tcg_temp_new_i32();
+tcg_gen_movi_i32(*width, ((ext - 1)  31) + 1);
+}
+
+/* mask */
+
+tmp = tcg_temp_new_i32();
+tcg_gen_sub_i32(tmp, tcg_const_i32(32), *width);
+*mask = tcg_temp_new_i32();
+tcg_gen_shl_i32(*mask, tcg_const_i32(0x), tmp);
+}
+
+DISAS_INSN(bitfield_reg)
+{
+uint16_t ext;
+TCGv tmp;
+TCGv tmp1;
+TCGv reg;
+TCGv offset;
+TCGv width;
+int op;
+TCGv reg2;
+TCGv mask;
+
+reg = DREG(insn, 0);
+op = (insn  8)  7;
+ext = read_im16(env, s);
+
+bitfield_param(ext, offset, width, mask);
+
+if (ext  0x0800) {
+tcg_gen_andi_i32(offset, offset, 31);
+}
+gen_helper_ror32(mask, mask, offset);
+
+/* reg  mask */
+
+tmp = tcg_temp_new_i32();
+tcg_gen_and_i32(tmp, reg, mask);
+
+tmp1 = tcg_temp_new_i32();
+gen_helper_rol32(tmp1, tmp, offset);
+
+reg2 = DREG(ext, 12);
+if (op == 7) {
+TCGv tmp2;
+
+tmp2 = tcg_temp_new_i32();
+tcg_gen_sub_i32(tmp2, tcg_const_i32(32), width);
+tcg_gen_shl_i32(tmp2, reg2, tmp2);
+tcg_gen_and_i32(tmp2, tmp2, mask);
+gen_logic_cc(s, tmp2, OS_LONG);
+
+tcg_temp_free_i32(tmp1);
+} else {
+gen_logic_cc(s, tmp1, OS_LONG);
+}
+
+switch (op) {
+case 0: /* bftst */
+break;
+case 1: /* bfextu */
+