On 08/09/2015 01:13 PM, Laurent Vivier wrote: > uint32_t HELPER(rol32)(uint32_t val, uint32_t shift) > { > uint32_t result; > @@ -1227,6 +1241,53 @@ void HELPER(set_mac_extu)(CPUM68KState *env, uint32_t > val, uint32_t acc) > env->macc[acc + 1] = res; > } > > +/* load from a bitfield */ > + > +uint64_t HELPER(bitfield_load)(uint32_t addr, uint32_t offset, uint32_t > width) > +{ > + uint8_t data[8]; > + uint64_t bitfield; > + int size; > + int i; > + > + size = (offset + width + 7) >> 3; > +#if defined(CONFIG_USER_ONLY) > + cpu_memory_rw_debug(NULL, (target_ulong)addr, data, size, 0); > +#else > + cpu_physical_memory_rw(addr, data, size, 0); > +#endif
Err, this bypasses virtual memory. Definitely not correct. You'll need to use cpu_ld*_data instead. > +DISAS_INSN(bitfield_reg) > +{ > + uint16_t ext; > + TCGv tmp; > + TCGv tmp1; > + TCGv reg; > + TCGv offset; > + TCGv width; > + int op; > + TCGv reg2; > + TCGv mask; > + > + reg = DREG(insn, 0); > + op = (insn >> 8) & 7; > + ext = read_im16(env, s); > + > + bitfield_param(ext, &offset, &width, &mask); > + > + if (ext & 0x0800) { > + tcg_gen_andi_i32(offset, offset, 31); > + } > + gen_helper_ror32(mask, mask, offset); Ah, the curious unused helpers. Anyway, tcg_gen_rotr_i32. Anyway, there's so much redundant computation in here let's name some sub-expressions, and give them their own temporaries. Let the tcg optimizer do its job if they turn out to be unused for a specific case. mask_msb = mask before the rotate (at msb). mask_inp = mask after the rotate (in place). > + tmp = tcg_temp_new_i32(); > + tcg_gen_and_i32(tmp, reg, mask); oldf_inp = tmp (old field in place). > + > + tmp1 = tcg_temp_new_i32(); > + gen_helper_rol32(tmp1, tmp, offset); oldf_msb = tmp1 (old field at msb). > + tcg_gen_sub_i32(tmp2, tcg_const_i32(32), width); comp_width = tmp2 (compliment of width). Use tcg_gen_subfi so you won't leak the const. Compute this once for all of the sub-cases. > + switch (op) { > + case 0: /* bftst */ > + break; > + case 1: /* bfextu */ > + tcg_gen_add_i32(tmp1, offset, width); > + tcg_gen_andi_i32(tmp1, tmp1, 31); > + gen_helper_rol32(reg2, tmp, tmp1); tcg_gen_shr_i32(reg2, oldf_msb, comp_width); > + break; > + case 2: /* bfchg */ > + tcg_gen_xor_i32(reg, reg, mask); > + break; > + case 3: /* bfexts */ > + gen_helper_rol32(reg2, tmp, offset); > + tcg_gen_sub_i32(width, tcg_const_i32(32), width); > + tcg_gen_sar_i32(reg2, reg2, width); tcg_gen_sar_i32(reg2, oldf_msb, comp_width); > + case 4: /* bfclr */ > + tcg_gen_not_i32(mask, mask); > + tcg_gen_and_i32(reg, reg, mask); tcg_gen_andc_i32(reg, reg, mask_inp); > + break; > + case 5: /* bfffo */ > + gen_helper_rol32(reg2, tmp, offset); > + gen_helper_bfffo(tmp, tmp, width); > + tcg_gen_add_i32(reg2, tmp, offset); There's a typo here if you look close. That said, tcg_gen_orc_i32(tmp, oldf_msb, mask_msb); gen_helper_clo32(tmp, tmp); tcg_gen_add_i32(reg2, tmp, offset); with uint32_t helper_clo32(uint32_t x) { return clo32(x); } The first opcode sets all bits outside the field, so that (if the field is smaller than 32 bits) we're guaranteed to find a one. At which point there's really very little that needs doing in the helper. > + case 6: /* bfset */ > + tcg_gen_or_i32(reg, reg, mask); > + break; tcg_gen_or_i32(reg, reg, mask_inp); > + case 7: /* bfins */ > + tcg_gen_shl_i32(tmp1, tcg_const_i32(1), width); Undefined if width == 32. That said... > + tcg_gen_subi_i32(tmp1, tmp1, 1); > + tcg_gen_and_i32(tmp, reg2, tmp1); > + tcg_gen_add_i32(tmp1, offset, width); > + tcg_gen_andi_i32(tmp1, tmp1, 31); > + gen_helper_ror32(tmp, tmp, tmp1); > + tcg_gen_not_i32(mask, mask); > + tcg_gen_and_i32(reg, reg, mask); > + tcg_gen_or_i32(reg, reg, tmp); > + break; /* Rotate the source so that the field is at msb. */ tcg_gen_rotr_i32(tmp, reg2, width); /* Isolate the field and set flags. */ tcg_gen_and_i32(tmp, tmp, mask_msb); gen_logic_cc(s, tmp, OS_LONG); /* Rotate the field into position. */ tcg_gen_rotr_i32(tmp, tmp, offset); /* Merge field into destination. */ tcg_gen_andc_i32(reg, reg, mask_inp); tcg_gen_or_i32(reg, reg, tmp); return; Handle the non-bfins after the switch with gen_logic_cc(s, oldf_msb, OS_LONG); > +DISAS_INSN(bitfield_mem) > +{ > + uint16_t ext; > + int op; > + TCGv_i64 bitfield; > + TCGv_i64 mask_bitfield; > + TCGv mask_cc; > + TCGv shift; > + TCGv val; > + TCGv src; > + TCGv offset; > + TCGv width; > + TCGv reg; > + TCGv tmp; > + > + op = (insn >> 8) & 7; > + ext = read_im16(env, s); > + src = gen_lea(env, s, insn, OS_LONG); > + if (IS_NULL_QREG(src)) { > + gen_addr_fault(s); > + return; > + } > + > + bitfield_param(ext, &offset, &width, &mask_cc); > + > + /* adjust src and offset */ > + > + /* src += offset >> 3; */ > + > + tmp = tcg_temp_new_i32(); > + tcg_gen_shri_i32(tmp, offset, 3); > + tcg_gen_add_i32(src, src, tmp); > + > + /* offset &= 7; */ > + > + tcg_gen_andi_i32(offset, offset, 7); > + > + /* load */ > + > + bitfield = tcg_temp_new_i64(); > + gen_helper_bitfield_load(bitfield, src, offset, width); Surely better to load the value from memory such that the field is positioned at the MSB, share code with bitfield_reg for the actual computation of flags and result, and then store the value back from the temporary at MSB as needed. r~