The shift instructions are rewritten instead of reusing code from the old
decoder. Rotates use CC_OP_ADCOX more extensively and generally rely
more on the optimizer, so that the code generators are shared between
the immediate-count and variable-count cases.
In particular, this makes gen_RCL and gen_RCR pretty efficient for the
count == 1 case, which becomes (apart from a few extra movs) something like:
(compute_cc_all if needed)
// save old value for OF calculation
mov cc_src2, T0
// the bulk of RCL is just this!
deposit T0, cc_src, T0, 1, TARGET_LONG_BITS - 1
// compute carry
shr cc_dst, cc_src2, length - 1
and cc_dst, cc_dst, 1
// compute overflow
xor cc_src2, cc_src2, T0
extract cc_src2, cc_src2, length - 1, 1
32-bit MUL and IMUL are also slightly more efficient on 64-bit hosts.
Signed-off-by: Paolo Bonzini
---
target/i386/tcg/decode-new.h |1 +
target/i386/tcg/translate.c | 23 +-
target/i386/tcg/decode-new.c.inc | 142 +
target/i386/tcg/emit.c.inc | 1014 +-
4 files changed, 1169 insertions(+), 11 deletions(-)
diff --git a/target/i386/tcg/decode-new.h b/target/i386/tcg/decode-new.h
index 790ad5e1d00..77bb31eb143 100644
--- a/target/i386/tcg/decode-new.h
+++ b/target/i386/tcg/decode-new.h
@@ -89,6 +89,7 @@ typedef enum X86OpSize {
X86_SIZE_x, /* 128/256-bit, based on operand size */
X86_SIZE_y, /* 32/64-bit, based on operand size */
X86_SIZE_z, /* 16-bit for 16-bit operand size, else 32-bit */
+X86_SIZE_z_f64, /* 32-bit for 32-bit operand size or 64-bit mode, else
16-bit */
/* Custom */
X86_SIZE_d64,
diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c
index 708fe023224..79b6e2760fe 100644
--- a/target/i386/tcg/translate.c
+++ b/target/i386/tcg/translate.c
@@ -38,6 +38,9 @@
#include "exec/helper-info.c.inc"
#undef HELPER_H
+/* Fixes for Windows namespace pollution. */
+#undef IN
+#undef OUT
#define PREFIX_REPZ 0x01
#define PREFIX_REPNZ 0x02
@@ -2488,14 +2491,24 @@ static inline int insn_const_size(MemOp ot)
}
}
+static void gen_conditional_jump_labels(DisasContext *s, target_long diff,
+TCGLabel *not_taken, TCGLabel *taken)
+{
+if (not_taken) {
+gen_set_label(not_taken);
+}
+gen_jmp_rel_csize(s, 0, 1);
+
+gen_set_label(taken);
+gen_jmp_rel(s, s->dflag, diff, 0);
+}
+
static void gen_jcc(DisasContext *s, int b, int diff)
{
TCGLabel *l1 = gen_new_label();
gen_jcc1(s, b, l1);
-gen_jmp_rel_csize(s, 0, 1);
-gen_set_label(l1);
-gen_jmp_rel(s, s->dflag, diff, 0);
+gen_conditional_jump_labels(s, diff, NULL, l1);
}
static void gen_cmovcc1(DisasContext *s, int b, TCGv dest, TCGv src)
@@ -2752,7 +2765,7 @@ static void gen_unknown_opcode(CPUX86State *env,
DisasContext *s)
/* an interrupt is different from an exception because of the
privilege checks */
-static void gen_interrupt(DisasContext *s, int intno)
+static void gen_interrupt(DisasContext *s, uint8_t intno)
{
gen_update_cc_op(s);
gen_update_eip_cur(s);
@@ -3183,7 +3196,7 @@ static bool disas_insn(DisasContext *s, CPUState *cpu)
#ifndef CONFIG_USER_ONLY
use_new &= b <= limit;
#endif
-if (use_new && b <= 0xbf) {
+if (use_new && (b < 0xd8 || b >= 0xe0)) {
disas_insn_new(s, cpu, b);
return true;
}
diff --git a/target/i386/tcg/decode-new.c.inc b/target/i386/tcg/decode-new.c.inc
index 55fc0173a41..a47ecab6dd4 100644
--- a/target/i386/tcg/decode-new.c.inc
+++ b/target/i386/tcg/decode-new.c.inc
@@ -135,6 +135,8 @@
## __VA_ARGS__\
}
+#define X86_OP_GROUP1(op, op0, s0, ...) \
+X86_OP_GROUP3(op, op0, s0, 2op, s0, None, None, ## __VA_ARGS__)
#define X86_OP_GROUP2(op, op0, s0, op1, s1, ...) \
X86_OP_GROUP3(op, op0, s0, 2op, s0, op1, s1, ## __VA_ARGS__)
#define X86_OP_GROUPw(op, op0, s0, ...) \
@@ -1174,6 +1176,83 @@ static void decode_group1A(DisasContext *s, CPUX86State
*env, X86OpEntry *entry,
}
}
+static void decode_group2(DisasContext *s, CPUX86State *env, X86OpEntry
*entry, uint8_t *b)
+{
+static const X86GenFunc group2_gen[8] = {
+gen_ROL, gen_ROR, gen_RCL, gen_RCR,
+gen_SHL, gen_SHR, gen_SHL /* SAL, undocumented */, gen_SAR,
+};
+int op = (get_modrm(s, env) >> 3) & 7;
+entry->gen = group2_gen[op];
+if (op == 7) {
+entry->special = X86_SPECIAL_SExtT0;
+} else {
+entry->special = X86_SPECIAL_ZExtT0;
+}
+}
+
+static void decode_group3(DisasContext *s, CPUX86State *env, X86OpEntry
*entry, uint8_t *b)
+{
+static const X86OpEntry opcodes_grp3[16] = {
+/* 0xf6 */
+[0x00] = X86_OP_ENTRYrr(AND, E,b, I,b),
+[0x02] = X86_OP_ENTRY1(NOT, E,b, lock),
+[0x03] = X86_OP_ENTRY1(NEG,