Re: [PATCH v2 06/37] target/i386: add ALU load/writeback core

2022-09-24 Thread Richard Henderson

On 9/20/22 17:24, Paolo Bonzini wrote:

+static void gen_load_sse(DisasContext *s, TCGv temp, MemOp ot, int dest_ofs, 
bool aligned)
+{
+if (ot == MO_8) {
+gen_op_ld_v(s, MO_8, temp, s->A0);
+tcg_gen_st8_tl(temp, cpu_env, dest_ofs);
+} else if (ot == MO_16) {


switch + default assert.

Otherwise,
Reviewed-by: Richard Henderson 


r~



[PATCH v2 06/37] target/i386: add ALU load/writeback core

2022-09-20 Thread Paolo Bonzini
Add generic code generation that takes care of preparing operands
around calls to decode.e.gen in a table-driven manner, so that ALU
operations need not take care of that.

Signed-off-by: Paolo Bonzini 
---
 target/i386/tcg/decode-new.c.inc |  33 ++-
 target/i386/tcg/decode-new.h |   7 ++
 target/i386/tcg/emit.c.inc   | 155 +++
 target/i386/tcg/translate.c  |  18 
 4 files changed, 212 insertions(+), 1 deletion(-)

diff --git a/target/i386/tcg/decode-new.c.inc b/target/i386/tcg/decode-new.c.inc
index a908e8b086..be4e5705ed 100644
--- a/target/i386/tcg/decode-new.c.inc
+++ b/target/i386/tcg/decode-new.c.inc
@@ -513,6 +513,20 @@ static bool decode_insn(DisasContext *s, CPUX86State *env, 
X86DecodeFunc decode_
 return true;
 }
 
+static void decode_temp_free(X86DecodedOp *op)
+{
+if (op->v_ptr) {
+tcg_temp_free_ptr(op->v_ptr);
+}
+}
+
+static void decode_temps_free(X86DecodedInsn *decode)
+{
+decode_temp_free(>op[0]);
+decode_temp_free(>op[1]);
+decode_temp_free(>op[2]);
+}
+
 /*
  * Convert one instruction. s->base.is_jmp is set if the translation must
  * be stopped.
@@ -738,7 +752,24 @@ static void disas_insn_new(DisasContext *s, CPUState *cpu, 
int b)
 if (decode.op[0].has_ea || decode.op[1].has_ea || decode.op[2].has_ea) {
 gen_load_ea(s, );
 }
-decode.e.gen(s, env, );
+if (s->prefix & PREFIX_LOCK) {
+if (decode.op[0].unit != X86_OP_INT || !decode.op[0].has_ea) {
+goto illegal_op;
+}
+gen_load(s, , 2, s->T1);
+decode.e.gen(s, env, );
+} else {
+if (decode.op[0].unit == X86_OP_MMX) {
+compute_mmx_offset([0]);
+} else if (decode.op[0].unit == X86_OP_SSE) {
+compute_xmm_offset([0]);
+}
+gen_load(s, , 1, s->T0);
+gen_load(s, , 2, s->T1);
+decode.e.gen(s, env, );
+gen_writeback(s, , 0, s->T0);
+}
+decode_temps_free();
 return;
  illegal_op:
 gen_illegal_opcode(s);
diff --git a/target/i386/tcg/decode-new.h b/target/i386/tcg/decode-new.h
index 2f22d4d22e..3a856b48e7 100644
--- a/target/i386/tcg/decode-new.h
+++ b/target/i386/tcg/decode-new.h
@@ -168,6 +168,13 @@ typedef struct X86DecodedOp {
 MemOp ot; /* For b/c/d/p/s/q/v/w/y/z */
 X86OpUnit unit;
 bool has_ea;
+int offset;   /* For MMX and SSE */
+
+/*
+ * This field is used internally by macros OP0_PTR/OP1_PTR/OP2_PTR,
+ * do not access directly!
+ */
+TCGv_ptr v_ptr;
 } X86DecodedOp;
 
 struct X86DecodedInsn {
diff --git a/target/i386/tcg/emit.c.inc b/target/i386/tcg/emit.c.inc
index e86364ffc1..8f60658537 100644
--- a/target/i386/tcg/emit.c.inc
+++ b/target/i386/tcg/emit.c.inc
@@ -29,3 +29,158 @@ static void gen_load_ea(DisasContext *s, AddressParts *mem)
 TCGv ea = gen_lea_modrm_1(s, *mem);
 gen_lea_v_seg(s, s->aflag, ea, mem->def_seg, s->override);
 }
+
+static inline int mmx_offset(MemOp ot)
+{
+switch (ot) {
+case MO_8:
+return offsetof(MMXReg, MMX_B(0));
+case MO_16:
+return offsetof(MMXReg, MMX_W(0));
+case MO_32:
+return offsetof(MMXReg, MMX_L(0));
+case MO_64:
+return offsetof(MMXReg, MMX_Q(0));
+default:
+g_assert_not_reached();
+}
+}
+
+static inline int xmm_offset(MemOp ot)
+{
+switch (ot) {
+case MO_8:
+return offsetof(ZMMReg, ZMM_B(0));
+case MO_16:
+return offsetof(ZMMReg, ZMM_W(0));
+case MO_32:
+return offsetof(ZMMReg, ZMM_L(0));
+case MO_64:
+return offsetof(ZMMReg, ZMM_Q(0));
+case MO_128:
+return offsetof(ZMMReg, ZMM_X(0));
+case MO_256:
+return offsetof(ZMMReg, ZMM_Y(0));
+default:
+g_assert_not_reached();
+}
+}
+
+static void compute_mmx_offset(X86DecodedOp *op)
+{
+if (!op->has_ea) {
+op->offset = offsetof(CPUX86State, fpregs[op->n].mmx) + 
mmx_offset(op->ot);
+} else {
+op->offset = offsetof(CPUX86State, mmx_t0) + mmx_offset(op->ot);
+}
+}
+
+static void compute_xmm_offset(X86DecodedOp *op)
+{
+if (!op->has_ea) {
+op->offset = ZMM_OFFSET(op->n) + xmm_offset(op->ot);
+} else {
+op->offset = offsetof(CPUX86State, xmm_t0) + xmm_offset(op->ot);
+}
+}
+
+static void gen_load_sse(DisasContext *s, TCGv temp, MemOp ot, int dest_ofs, 
bool aligned)
+{
+if (ot == MO_8) {
+gen_op_ld_v(s, MO_8, temp, s->A0);
+tcg_gen_st8_tl(temp, cpu_env, dest_ofs);
+} else if (ot == MO_16) {
+gen_op_ld_v(s, MO_16, temp, s->A0);
+tcg_gen_st16_tl(temp, cpu_env, dest_ofs);
+} else if (ot == MO_32) {
+gen_op_ld_v(s, MO_32, temp, s->A0);
+tcg_gen_st32_tl(temp, cpu_env, dest_ofs);
+} else if (ot == MO_64) {
+gen_ldq_env_A0(s, dest_ofs);
+} else if (ot == MO_128) {
+gen_ldo_env_A0(s, dest_ofs, aligned);
+} else if (ot == MO_256) {
+gen_ldy_env_A0(s, dest_ofs, aligned);
+