On 9/12/22 00:04, Paolo Bonzini wrote:
+static void gen_VMOVHPx_ld(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + if (decode->op[0].offset != decode->op[1].offset) { + tcg_gen_ld_i64(s->tmp1_i64, cpu_env, decode->op[1].offset + offsetof(XMMReg, XMM_Q(0))); + tcg_gen_st_i64(s->tmp1_i64, cpu_env, decode->op[0].offset + offsetof(XMMReg, XMM_Q(0))); + } + gen_ldq_env_A0(s, decode->op[0].offset + offsetof(XMMReg, XMM_Q(1))); +}
Don't modify op0 before the load fault.
+static void gen_VMOVHLPS(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + tcg_gen_ld_i64(s->tmp1_i64, cpu_env, decode->op[2].offset + offsetof(XMMReg, XMM_Q(1))); + tcg_gen_st_i64(s->tmp1_i64, cpu_env, decode->op[0].offset + offsetof(XMMReg, XMM_Q(0))); + if (decode->op[0].offset != decode->op[1].offset) { + tcg_gen_ld_i64(s->tmp1_i64, cpu_env, decode->op[1].offset + offsetof(XMMReg, XMM_Q(1))); + tcg_gen_st_i64(s->tmp1_i64, cpu_env, decode->op[0].offset + offsetof(XMMReg, XMM_Q(1))); + } +} + +static void gen_VMOVLHPS(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + tcg_gen_ld_i64(s->tmp1_i64, cpu_env, decode->op[2].offset + offsetof(XMMReg, XMM_Q(0))); + tcg_gen_st_i64(s->tmp1_i64, cpu_env, decode->op[0].offset + offsetof(XMMReg, XMM_Q(1))); + if (decode->op[0].offset != decode->op[1].offset) { + tcg_gen_ld_i64(s->tmp1_i64, cpu_env, decode->op[1].offset + offsetof(XMMReg, XMM_Q(0))); + tcg_gen_st_i64(s->tmp1_i64, cpu_env, decode->op[0].offset + offsetof(XMMReg, XMM_Q(0))); + } +} + +static void gen_VMOVLPx(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + int vec_len = sse_vec_len(s, decode); + + tcg_gen_ld_i64(s->tmp1_i64, cpu_env, decode->op[2].offset + offsetof(XMMReg, XMM_Q(0))); + tcg_gen_gvec_mov(MO_64, decode->op[0].offset, decode->op[1].offset, vec_len, vec_len); + tcg_gen_st_i64(s->tmp1_i64, cpu_env, decode->op[0].offset + offsetof(XMMReg, XMM_Q(0))); +}
You've just been moving i64 pieces in the other functions, why is this one different using a gvec move in the middle? I do wonder if a generic helper moving offset->offset, with the comparison wouldn't be helpful within these functions, even when you know off1 != off2, due to Q(0) vs Q(1).
+static void gen_VMOVLPx_ld(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + int vec_len = sse_vec_len(s, decode); + + tcg_gen_gvec_mov(MO_64, decode->op[0].offset, decode->op[1].offset, vec_len, vec_len); + tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0, s->mem_index, MO_64); + tcg_gen_st_i64(s->tmp1_i64, s->ptr0, offsetof(ZMMReg, ZMM_Q(0))); +}
Don't modify op0 before load fault.
+static void gen_VMOVSD_ld(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + TCGv zero = tcg_const_i64(0); + + tcg_gen_st_i64(zero, s->ptr0, offsetof(ZMMReg, ZMM_Q(1))); + tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0, s->mem_index, MO_64); + tcg_gen_st_i64(s->tmp1_i64, s->ptr0, offsetof(ZMMReg, ZMM_Q(0))); + tcg_temp_free_i64(zero); +}
Likewise.
+static void gen_VMOVSS_ld(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + int vec_len = sse_vec_len(s, decode); + + tcg_gen_gvec_dup_imm(MO_64, decode->op[0].offset, vec_len, vec_len, 0); + tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0, s->mem_index, MO_32); + tcg_gen_st_i32(s->tmp2_i32, s->ptr0, offsetof(ZMMReg, ZMM_L(0))); +}
Likewise. r~