Re: [PATCH v2 15/25] target/i386: move 60-BF opcodes to new decoder

2024-05-06 Thread Richard Henderson

On 5/6/24 01:09, Paolo Bonzini wrote:

Compared to the old decoder, the main differences in translation
are for the little-used ARPL instruction.  IMUL is adjusted a bit
to share more code to produce flags, but is otherwise very similar.

Signed-off-by: Paolo Bonzini 


Reviewed-by: Richard Henderson 


+static void gen_POPA(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+   gen_popa(s);
+}

...

+static void gen_PUSHA(DisasContext *s, CPUX86State *env, X86DecodedInsn 
*decode)
+{
+   gen_pusha(s);
+}


3-space indent?


r~



[PATCH v2 15/25] target/i386: move 60-BF opcodes to new decoder

2024-05-06 Thread Paolo Bonzini
Compared to the old decoder, the main differences in translation
are for the little-used ARPL instruction.  IMUL is adjusted a bit
to share more code to produce flags, but is otherwise very similar.

Signed-off-by: Paolo Bonzini 
---
 target/i386/tcg/decode-new.h |   3 +
 target/i386/tcg/translate.c  |   9 +-
 target/i386/tcg/decode-new.c.inc | 185 ++
 target/i386/tcg/emit.c.inc   | 323 +++
 4 files changed, 518 insertions(+), 2 deletions(-)

diff --git a/target/i386/tcg/decode-new.h b/target/i386/tcg/decode-new.h
index 8ffde8d1cd6..790ad5e1d00 100644
--- a/target/i386/tcg/decode-new.h
+++ b/target/i386/tcg/decode-new.h
@@ -48,6 +48,7 @@ typedef enum X86OpType {
 
 /* Custom */
 X86_TYPE_WM, /* modrm byte selects an XMM/YMM memory operand */
+X86_TYPE_I_unsigned, /* Immediate, zero-extended */
 X86_TYPE_2op, /* 2-operand RMW instruction */
 X86_TYPE_LoBits, /* encoded in bits 0-2 of the operand + REX.B */
 X86_TYPE_0, /* Hard-coded GPRs (RAX..RDI) */
@@ -165,6 +166,8 @@ typedef enum X86InsnSpecial {
 /* Always locked if it has a memory operand (XCHG) */
 X86_SPECIAL_Locked,
 
+/* Do not apply segment base to effective address */
+X86_SPECIAL_NoSeg,
 /*
  * Rd/Mb or Rd/Mw in the manual: register operand 0 is treated as 32 bits
  * (and writeback zero-extends it to 64 bits if applicable).  PREFIX_DATA
diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c
index 4069bd4f125..8f633814586 100644
--- a/target/i386/tcg/translate.c
+++ b/target/i386/tcg/translate.c
@@ -1288,7 +1288,11 @@ static void gen_cmps(DisasContext *s, MemOp ot)
 gen_string_movl_A0_EDI(s);
 gen_op_ld_v(s, ot, s->T1, s->A0);
 gen_string_movl_A0_ESI(s);
-gen_op(s, OP_CMPL, ot, OR_TMP0);
+gen_op_ld_v(s, ot, s->T0, s->A0);
+tcg_gen_mov_tl(cpu_cc_src, s->T1);
+tcg_gen_mov_tl(s->cc_srcT, s->T0);
+tcg_gen_sub_tl(cpu_cc_dst, s->T0, s->T1);
+set_cc_op(s, CC_OP_SUBB + ot);
 
 dshift = gen_compute_Dshift(s, ot);
 gen_op_add_reg(s, s->aflag, R_ESI, dshift);
@@ -3121,6 +3125,7 @@ static bool disas_insn(DisasContext *s, CPUState *cpu)
 
 s->pc = s->base.pc_next;
 s->override = -1;
+s->popl_esp_hack = 0;
 #ifdef TARGET_X86_64
 s->rex_r = 0;
 s->rex_x = 0;
@@ -3178,7 +3183,7 @@ static bool disas_insn(DisasContext *s, CPUState *cpu)
 #ifndef CONFIG_USER_ONLY
 use_new &= b <= limit;
 #endif
-if (use_new && b <= 0x5f) {
+if (use_new && b <= 0xbf) {
 disas_insn_new(s, cpu, b);
 return true;
 }
diff --git a/target/i386/tcg/decode-new.c.inc b/target/i386/tcg/decode-new.c.inc
index c6fd7a053bd..55fc0173a41 100644
--- a/target/i386/tcg/decode-new.c.inc
+++ b/target/i386/tcg/decode-new.c.inc
@@ -33,6 +33,22 @@
  * ("cannot encode 16-bit or 32-bit size in 64-bit mode") as modifiers of the
  * "v" or "z" sizes.  The decoder simply makes them separate operand sizes.
  *
+ * The manual lists immediate far destinations as Ap (technically an implicit
+ * argument).  The decoder splits them into two immediates, using "Ip" for
+ * the offset part (that comes first in the instruction stream) and "Iw" for
+ * the segment/selector part.  The size of the offset is given by s->dflag
+ * and the instructions are illegal in 64-bit mode, so the choice of "Ip"
+ * is somewhat arbitrary; "Iv" or "Iz" would work just as well.
+ *
+ * Operand types
+ * -
+ *
+ * Immediates are almost always signed or masked away in helpers.  Two
+ * common exceptions are IN/OUT and absolute jumps.  For these, there is
+ * an additional custom operand type "I_unsigned".  Alternatively, the
+ * mask could be applied (and the original sign-extended value would be
+ * optimized away by TCG) in the emitter function.
+ *
  * Vector operands
  * ---
  *
@@ -151,6 +167,8 @@
  */
 #define X86_OP_ENTRYrr(op, op0, s0, op1, s1, ...) \
 X86_OP_ENTRY3(op, None, None, op0, s0, op1, s1, ## __VA_ARGS__)
+#define X86_OP_ENTRYwr(op, op0, s0, op1, s1, ...) \
+X86_OP_ENTRY3(op, op0, s0, None, None, op1, s1, ## __VA_ARGS__)
 #define X86_OP_ENTRY2(op, op0, s0, op1, s1, ...)  \
 X86_OP_ENTRY3(op, op0, s0, 2op, s0, op1, s1, ## __VA_ARGS__)
 #define X86_OP_ENTRYw(op, op0, s0, ...)   \
@@ -163,6 +181,7 @@
 X86_OP_ENTRY3(op, None, None, None, None, None, None, ## __VA_ARGS__)
 
 #define cpuid(feat) .cpuid = X86_FEAT_##feat,
+#define noseg .special = X86_SPECIAL_NoSeg,
 #define xchg .special = X86_SPECIAL_Locked,
 #define lock .special = X86_SPECIAL_HasLock,
 #define mmx .special = X86_SPECIAL_MMX,
@@ -209,6 +228,8 @@
 #define p_66_f3_f2.valid_prefix = P_66 | P_F3 | P_F2,
 #define p_00_66_f3_f2 .valid_prefix = P_00 | P_66 | P_F3 | P_F2,
 
+#define UNKNOWN_OPCODE ((X86OpEntry) {})
+
 static uint8_t get_modrm(DisasContext *s, CPUX86State *env)
 {
 if (!s->has_modrm) {
@@