Optimize "rep ins" by allowing emulator to write back more than one
datum at a time. Introduce new operand type OP_MEM_STR which tells
writeback() that dst contains pointer to an array that should be written
back as opposite to just one data element.

Signed-off-by: Gleb Natapov <g...@redhat.com>
---
 arch/x86/include/asm/kvm_emulate.h |    4 +++-
 arch/x86/kvm/emulate.c             |   33 ++++++++++++++++++++++++++++-----
 2 files changed, 31 insertions(+), 6 deletions(-)

diff --git a/arch/x86/include/asm/kvm_emulate.h 
b/arch/x86/include/asm/kvm_emulate.h
index c80c091..08d1c64 100644
--- a/arch/x86/include/asm/kvm_emulate.h
+++ b/arch/x86/include/asm/kvm_emulate.h
@@ -213,8 +213,9 @@ typedef u32 __attribute__((vector_size(16))) sse128_t;
 
 /* Type, address-of, and value of an instruction's operand. */
 struct operand {
-       enum { OP_REG, OP_MEM, OP_IMM, OP_XMM, OP_MM, OP_NONE } type;
+       enum { OP_REG, OP_MEM, OP_MEM_STR, OP_IMM, OP_XMM, OP_MM, OP_NONE } 
type;
        unsigned int bytes;
+       unsigned int count;
        union {
                unsigned long orig_val;
                u64 orig_val64;
@@ -234,6 +235,7 @@ struct operand {
                char valptr[sizeof(unsigned long) + 2];
                sse128_t vec_val;
                u64 mm_val;
+               void *data;
        };
 };
 
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 8a1c42e..e87e616 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -1301,8 +1301,15 @@ static int pio_in_emulated(struct x86_emulate_ctxt *ctxt,
                rc->end = n * size;
        }
 
-       memcpy(dest, rc->data + rc->pos, size);
-       rc->pos += size;
+       if (ctxt->rep_prefix && !(ctxt->eflags & EFLG_DF)) {
+               ctxt->dst.data = rc->data + rc->pos;
+               ctxt->dst.type = OP_MEM_STR;
+               ctxt->dst.count = (rc->end - rc->pos) / size;
+               rc->pos = rc->end;
+       } else {
+               memcpy(dest, rc->data + rc->pos, size);
+               rc->pos += size;
+       }
        return 1;
 }
 
@@ -1546,6 +1553,14 @@ static int writeback(struct x86_emulate_ctxt *ctxt)
                if (rc != X86EMUL_CONTINUE)
                        return rc;
                break;
+       case OP_MEM_STR:
+               rc = segmented_write(ctxt,
+                               ctxt->dst.addr.mem,
+                               ctxt->dst.data,
+                               ctxt->dst.bytes * ctxt->dst.count);
+               if (rc != X86EMUL_CONTINUE)
+                       return rc;
+               break;
        case OP_XMM:
                write_sse_reg(ctxt, &ctxt->dst.vec_val, ctxt->dst.addr.xmm);
                break;
@@ -2793,7 +2808,7 @@ int emulator_task_switch(struct x86_emulate_ctxt *ctxt,
 static void string_addr_inc(struct x86_emulate_ctxt *ctxt, int reg,
                struct operand *op)
 {
-       int df = (ctxt->eflags & EFLG_DF) ? -1 : 1;
+       int df = (ctxt->eflags & EFLG_DF) ? -op->count : op->count;
 
        register_address_increment(ctxt, reg_rmw(ctxt, reg), df * op->bytes);
        op->addr.mem.ea = register_address(ctxt, reg_read(ctxt, reg));
@@ -3733,7 +3748,7 @@ static struct opcode opcode_table[256] = {
        I(DstReg | SrcMem | ModRM | Src2Imm, em_imul_3op),
        I(SrcImmByte | Mov | Stack, em_push),
        I(DstReg | SrcMem | ModRM | Src2ImmByte, em_imul_3op),
-       I2bvIP(DstDI | SrcDX | Mov | String, em_in, ins, check_perm_in), /* 
insb, insw/insd */
+       I2bvIP(DstDI | SrcDX | Mov | String | Unaligned, em_in, ins, 
check_perm_in), /* insb, insw/insd */
        I2bvIP(SrcSI | DstDX | String, em_out, outs, check_perm_out), /* outsb, 
outsw/outsd */
        /* 0x70 - 0x7F */
        X16(D(SrcImmByte)),
@@ -3991,6 +4006,7 @@ static int decode_operand(struct x86_emulate_ctxt *ctxt, 
struct operand *op,
                        register_address(ctxt, reg_read(ctxt, VCPU_REGS_RDI));
                op->addr.mem.seg = VCPU_SREG_ES;
                op->val = 0;
+               op->count = 1;
                break;
        case OpDX:
                op->type = OP_REG;
@@ -4034,6 +4050,7 @@ static int decode_operand(struct x86_emulate_ctxt *ctxt, 
struct operand *op,
                        register_address(ctxt, reg_read(ctxt, VCPU_REGS_RSI));
                op->addr.mem.seg = seg_override(ctxt);
                op->val = 0;
+               op->count = 1;
                break;
        case OpImmFAddr:
                op->type = OP_IMM;
@@ -4575,8 +4592,14 @@ writeback:
                string_addr_inc(ctxt, VCPU_REGS_RDI, &ctxt->dst);
 
        if (ctxt->rep_prefix && (ctxt->d & String)) {
+               unsigned int count;
                struct read_cache *r = &ctxt->io_read;
-               register_address_increment(ctxt, reg_rmw(ctxt, VCPU_REGS_RCX), 
-1);
+               if ((ctxt->d & SrcMask) == SrcSI)
+                       count = ctxt->src.count;
+               else
+                       count = ctxt->dst.count;
+               register_address_increment(ctxt, reg_rmw(ctxt, VCPU_REGS_RCX),
+                               -count);
 
                if (!string_insn_completed(ctxt)) {
                        /*
-- 
1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to