This saves 2 insns and 10 bytes from the implementation of
each memory operation.
Signed-off-by: Richard Henderson r...@twiddle.net
---
tcg/i386/tcg-target.c | 120 +++---
1 file changed, 56 insertions(+), 64 deletions(-)
diff --git a/tcg/i386/tcg-target.c b/tcg/i386/tcg-target.c
index bbe2963..beffbbe 100644
--- a/tcg/i386/tcg-target.c
+++ b/tcg/i386/tcg-target.c
@@ -1421,16 +1421,25 @@ static void tcg_out_qemu_st_slow_path(TCGContext *s,
TCGLabelQemuLdst *l)
int arch_prctl(int code, unsigned long addr);
+static int32_t guest_base_ofs;
static int guest_base_flags;
-static inline void setup_guest_base_seg(void)
+static int guest_base_reg = -1;
+static inline void setup_guest_base(TCGContext *s)
{
if (arch_prctl(ARCH_SET_GS, GUEST_BASE) == 0) {
guest_base_flags = P_GS;
+} else if (GUEST_BASE == (int32_t)GUEST_BASE) {
+guest_base_ofs = GUEST_BASE;
+} else {
+guest_base_reg = TCG_REG_EBP;
+tcg_regset_set_reg(s-reserved_regs, guest_base_reg);
+tcg_out_movi(s, TCG_TYPE_PTR, guest_base_reg, GUEST_BASE);
}
}
#else
-# define guest_base_flags 0
-static inline void setup_guest_base_seg(void) { }
+# define guest_base_flags 0
+# define guest_base_reg-1
+# define guest_base_ofsGUEST_BASE
#endif /* SOFTMMU */
static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
@@ -1571,38 +1580,28 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg
*args, bool is64)
s-code_ptr, label_ptr);
#else
{
-int32_t offset = GUEST_BASE;
TCGReg base = addrlo;
-int index = -1;
-int seg = 0;
+int flags = 0;
-if (GUEST_BASE guest_base_flags) {
-seg = guest_base_flags;
-offset = 0;
+if (GUEST_BASE == 0 || guest_base_flags) {
+flags = guest_base_flags;
if (TCG_TARGET_REG_BITS == 64 TARGET_LONG_BITS == 32) {
-seg |= P_ADDR32;
-}
-} else if (TCG_TARGET_REG_BITS == 64) {
-if (TARGET_LONG_BITS == 32) {
-tcg_out_ext32u(s, TCG_REG_L0, base);
-base = TCG_REG_L0;
-}
-if (offset != GUEST_BASE) {
-tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_L1, GUEST_BASE);
-index = TCG_REG_L1;
-offset = 0;
+flags |= P_ADDR32;
}
+} else if (TCG_TARGET_REG_BITS == 64 TARGET_LONG_BITS == 32) {
+tcg_out_ext32u(s, TCG_REG_L1, base);
+base = TCG_REG_L1;
}
-tcg_out_qemu_ld_direct(s, datalo, datahi,
- base, index, offset, seg, opc);
+tcg_out_qemu_ld_direct(s, datalo, datahi, base, guest_base_reg,
+ guest_base_ofs, flags, opc);
}
#endif
}
static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
- TCGReg base, intptr_t ofs, int seg,
- TCGMemOp memop)
+ TCGReg base, int index, intptr_t ofs,
+ int seg, TCGMemOp memop)
{
/* ??? Ideally we wouldn't need a scratch register. For user-only,
we could perform the bswap twice to restore the original value
@@ -1626,8 +1625,8 @@ static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg
datalo, TCGReg datahi,
tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
datalo = scratch;
}
-tcg_out_modrm_offset(s, OPC_MOVB_EvGv + P_REXB_R + seg,
- datalo, base, ofs);
+tcg_out_modrm_sib_offset(s, OPC_MOVB_EvGv + P_REXB_R + seg, datalo,
+ base, index, 0, ofs);
break;
case MO_16:
if (bswap) {
@@ -1635,7 +1634,8 @@ static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg
datalo, TCGReg datahi,
tcg_out_rolw_8(s, scratch);
datalo = scratch;
}
-tcg_out_modrm_offset(s, movop + P_DATA16 + seg, datalo, base, ofs);
+tcg_out_modrm_sib_offset(s, movop + P_DATA16 + seg, datalo,
+ base, index, 0, ofs);
break;
case MO_32:
if (bswap) {
@@ -1643,7 +1643,7 @@ static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg
datalo, TCGReg datahi,
tcg_out_bswap32(s, scratch);
datalo = scratch;
}
-tcg_out_modrm_offset(s, movop + seg, datalo, base, ofs);
+tcg_out_modrm_sib_offset(s, movop + seg, datalo, base, index, 0, ofs);
break;
case MO_64:
if (TCG_TARGET_REG_BITS == 64) {
@@ -1652,22 +1652,27 @@ static void tcg_out_qemu_st_direct(TCGContext *s,
TCGReg datalo, TCGReg datahi,
tcg_out_bswap64(s, scratch);
datalo = scratch;
}
-tcg_out_modrm_offset(s, movop