Add generic code generation that takes care of preparing operands around calls to decode.e.gen in a table-driven manner, so that ALU operations need not take care of that.
Signed-off-by: Paolo Bonzini <pbonz...@redhat.com> --- target/i386/tcg/decode-new.c.inc | 20 +++- target/i386/tcg/decode-new.h | 1 + target/i386/tcg/emit.c.inc | 152 +++++++++++++++++++++++++++++++ target/i386/tcg/translate.c | 24 +++++ 4 files changed, 195 insertions(+), 2 deletions(-) diff --git a/target/i386/tcg/decode-new.c.inc b/target/i386/tcg/decode-new.c.inc index de8ef51a2d..7f76051b2d 100644 --- a/target/i386/tcg/decode-new.c.inc +++ b/target/i386/tcg/decode-new.c.inc @@ -228,7 +228,7 @@ static bool decode_op_size(DisasContext *s, X86OpEntry *e, X86OpSize size, MemOp *ot = MO_64; return true; } - if (s->vex_l && e->s0 != X86_SIZE_qq) { + if (s->vex_l && e->s0 != X86_SIZE_qq && e->s1 != X86_SIZE_qq) { return false; } *ot = MO_128; @@ -741,7 +741,23 @@ static target_ulong disas_insn_new(DisasContext *s, CPUState *cpu, int b) if (decode.op[0].has_ea || decode.op[1].has_ea || decode.op[2].has_ea) { gen_load_ea(s, &decode.mem); } - decode.e.gen(s, env, &decode); + if (s->prefix & PREFIX_LOCK) { + if (decode.op[0].unit != X86_OP_INT || !decode.op[0].has_ea) { + goto illegal_op; + } + gen_load(s, s->T1, NULL, &decode.op[2], decode.immediate); + decode.e.gen(s, env, &decode); + } else { + if (decode.op[0].unit == X86_OP_MMX) { + gen_mmx_offset(s->ptr0, &decode.op[0]); + } else if (decode.op[0].unit == X86_OP_SSE) { + gen_xmm_offset(s->ptr0, &decode.op[0]); + } + gen_load(s, s->T0, s->ptr1, &decode.op[1], decode.immediate); + gen_load(s, s->T1, s->ptr2, &decode.op[2], decode.immediate); + decode.e.gen(s, env, &decode); + gen_writeback(s, &decode.op[0]); + } return s->pc; illegal_op: gen_illegal_opcode(s); diff --git a/target/i386/tcg/decode-new.h b/target/i386/tcg/decode-new.h index fb44560aae..a2d3c3867f 100644 --- a/target/i386/tcg/decode-new.h +++ b/target/i386/tcg/decode-new.h @@ -168,6 +168,7 @@ typedef struct X86DecodedOp { MemOp ot; /* For b/c/d/p/s/q/v/w/y/z */ X86OpUnit unit; bool has_ea; + int offset; /* For MMX and SSE */ } X86DecodedOp; struct X86DecodedInsn { diff --git a/target/i386/tcg/emit.c.inc b/target/i386/tcg/emit.c.inc index e86364ffc1..6fa0062d6a 100644 --- a/target/i386/tcg/emit.c.inc +++ b/target/i386/tcg/emit.c.inc @@ -29,3 +29,155 @@ static void gen_load_ea(DisasContext *s, AddressParts *mem) TCGv ea = gen_lea_modrm_1(s, *mem); gen_lea_v_seg(s, s->aflag, ea, mem->def_seg, s->override); } + +static void gen_mmx_offset(TCGv_ptr ptr, X86DecodedOp *op) +{ + if (!op->has_ea) { + op->offset = offsetof(CPUX86State, fpregs[op->n].mmx); + } else { + op->offset = offsetof(CPUX86State, mmx_t0); + } + tcg_gen_addi_ptr(ptr, cpu_env, op->offset); + + /* + * ptr is for passing to helpers, and points to the MMXReg; op->offset + * is for TCG ops and points to the operand. + */ + if (op->ot == MO_32) { + op->offset += offsetof(MMXReg, MMX_L(0)); + } +} + +static int xmm_offset(MemOp ot) +{ + if (ot == MO_8) { + return offsetof(ZMMReg, ZMM_B(0)); + } else if (ot == MO_16) { + return offsetof(ZMMReg, ZMM_W(0)); + } else if (ot == MO_32) { + return offsetof(ZMMReg, ZMM_L(0)); + } else if (ot == MO_64) { + return offsetof(ZMMReg, ZMM_Q(0)); + } else if (ot == MO_128) { + return offsetof(ZMMReg, ZMM_X(0)); + } else if (ot == MO_256) { + return offsetof(ZMMReg, ZMM_Y(0)); + } else { + abort(); + } +} + +static void gen_xmm_offset(TCGv_ptr ptr, X86DecodedOp *op) +{ + if (!op->has_ea) { + op->offset = ZMM_OFFSET(op->n); + } else { + op->offset = offsetof(CPUX86State, xmm_t0); + } + /* + * ptr is for passing to helpers, and points to the ZMMReg; op->offset + * is for TCG ops (especially gvec) and points to the base of the vector. + */ + tcg_gen_addi_ptr(ptr, cpu_env, op->offset); + op->offset += xmm_offset(op->ot); +} + +static void gen_load_sse(DisasContext *s, TCGv temp, MemOp ot, int dest_ofs) +{ + if (ot == MO_8) { + gen_op_ld_v(s, MO_8, temp, s->A0); + tcg_gen_st8_tl(temp, cpu_env, dest_ofs); + } else if (ot == MO_16) { + gen_op_ld_v(s, MO_16, temp, s->A0); + tcg_gen_st16_tl(temp, cpu_env, dest_ofs); + } else if (ot == MO_32) { + gen_op_ld_v(s, MO_32, temp, s->A0); + tcg_gen_st32_tl(temp, cpu_env, dest_ofs); + } else if (ot == MO_64) { + gen_ldq_env_A0(s, dest_ofs); + } else if (ot == MO_128) { + gen_ldo_env_A0(s, dest_ofs); + } else if (ot == MO_256) { + gen_ldy_env_A0(s, dest_ofs); + } +} + +static void gen_load(DisasContext *s, TCGv v, TCGv_ptr ptr, X86DecodedOp *op, uint64_t imm) +{ + switch (op->unit) { + case X86_OP_SKIP: + return; + case X86_OP_SEG: + tcg_gen_ld32u_tl(v, cpu_env, + offsetof(CPUX86State,segs[op->n].selector)); + break; + case X86_OP_CR: + tcg_gen_ld_tl(v, cpu_env, offsetof(CPUX86State, cr[op->n])); + break; + case X86_OP_DR: + tcg_gen_ld_tl(v, cpu_env, offsetof(CPUX86State, dr[op->n])); + break; + case X86_OP_INT: + if (op->has_ea) { + gen_op_ld_v(s, op->ot, v, s->A0); + } else { + gen_op_mov_v_reg(s, op->ot, v, op->n); + } + break; + case X86_OP_IMM: + tcg_gen_movi_tl(v, imm); + break; + + case X86_OP_MMX: + gen_mmx_offset(ptr, op); + goto load_vector; + + case X86_OP_SSE: + gen_xmm_offset(ptr, op); + load_vector: + if (op->has_ea) { + gen_load_sse(s, v, op->ot, op->offset); + } + break; + + default: + abort(); + } +} + +static void gen_writeback(DisasContext *s, X86DecodedOp *op) +{ + switch (op->unit) { + case X86_OP_SKIP: + break; + case X86_OP_SEG: + /* Note that reg == R_SS in gen_movl_seg_T0 always sets is_jmp. */ + gen_movl_seg_T0(s, op->n); + if (s->base.is_jmp) { + gen_jmp_im(s, s->pc - s->cs_base); + if (op->n == R_SS) { + s->flags &= ~HF_TF_MASK; + gen_eob_inhibit_irq(s, true); + } else { + gen_eob(s); + } + } + break; + case X86_OP_CR: + case X86_OP_DR: + /* TBD */ + break; + case X86_OP_INT: + if (op->has_ea) { + gen_op_st_v(s, op->ot, s->T0, s->A0); + } else { + gen_op_mov_reg_v(s, op->ot, op->n, s->T0); + } + break; + case X86_OP_MMX: + case X86_OP_SSE: + break; + default: + abort(); + } +} diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c index f66bf2ac79..7e9920e29c 100644 --- a/target/i386/tcg/translate.c +++ b/target/i386/tcg/translate.c @@ -2831,6 +2831,30 @@ static inline void gen_sto_env_A0(DisasContext *s, int offset) tcg_gen_qemu_st_i64(s->tmp1_i64, s->tmp0, mem_index, MO_LEUQ); } +static inline void gen_ldy_env_A0(DisasContext *s, int offset) +{ + int mem_index = s->mem_index; + gen_ldo_env_A0(s, offset); + tcg_gen_addi_tl(s->tmp0, s->A0, 16); + tcg_gen_qemu_ld_i64(s->tmp1_i64, s->tmp0, mem_index, MO_LEUQ); + tcg_gen_st_i64(s->tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(2))); + tcg_gen_addi_tl(s->tmp0, s->A0, 24); + tcg_gen_qemu_ld_i64(s->tmp1_i64, s->tmp0, mem_index, MO_LEUQ); + tcg_gen_st_i64(s->tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(3))); +} + +static inline void gen_sty_env_A0(DisasContext *s, int offset) +{ + int mem_index = s->mem_index; + gen_sto_env_A0(s, offset); + tcg_gen_addi_tl(s->tmp0, s->A0, 16); + tcg_gen_ld_i64(s->tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(2))); + tcg_gen_qemu_st_i64(s->tmp1_i64, s->tmp0, mem_index, MO_LEUQ); + tcg_gen_addi_tl(s->tmp0, s->A0, 24); + tcg_gen_ld_i64(s->tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(3))); + tcg_gen_qemu_st_i64(s->tmp1_i64, s->tmp0, mem_index, MO_LEUQ); +} + static inline void gen_op_movo(DisasContext *s, int d_offset, int s_offset) { tcg_gen_ld_i64(s->tmp1_i64, cpu_env, s_offset + offsetof(XMMReg, XMM_Q(0))); -- 2.37.2