To optimize "rep ins" instruction do IO in big chunks ahead of time
instead of doing it only when required during instruction emulation.

Signed-off-by: Gleb Natapov <g...@redhat.com>
---
 arch/x86/include/asm/kvm_emulate.h |    7 +++++++
 arch/x86/kvm/emulate.c             |   34 ++++++++++++++++++++++++++++++----
 2 files changed, 37 insertions(+), 4 deletions(-)

diff --git a/arch/x86/include/asm/kvm_emulate.h 
b/arch/x86/include/asm/kvm_emulate.h
index f74b4ad..da7a711 100644
--- a/arch/x86/include/asm/kvm_emulate.h
+++ b/arch/x86/include/asm/kvm_emulate.h
@@ -150,6 +150,12 @@ struct fetch_cache {
        unsigned long end;
 };
 
+struct read_cache {
+       u8 data[1024];
+       unsigned long pos;
+       unsigned long end;
+};
+
 struct decode_cache {
        u8 twobyte;
        u8 b;
@@ -177,6 +183,7 @@ struct decode_cache {
        void *modrm_ptr;
        unsigned long modrm_val;
        struct fetch_cache fetch;
+       struct read_cache io_read;
 };
 
 struct x86_emulate_ctxt {
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 76ed77d..987be2a 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -1222,6 +1222,28 @@ done:
        return (rc == X86EMUL_UNHANDLEABLE) ? -1 : 0;
 }
 
+static int pio_in_emulated(struct x86_emulate_ctxt *ctxt,
+                          struct x86_emulate_ops *ops,
+                          unsigned int size, unsigned short port,
+                          void *dest, unsigned int count)
+{
+       struct read_cache *mc = &ctxt->decode.io_read;
+
+       if (mc->pos == mc->end) { /* refill pio read ahead */
+               unsigned int n = sizeof(mc->data) / size;
+               n = min(n, count);
+               mc->pos = mc->end = 0;
+               if (!ops->pio_in_emulated(size, port, mc->data, n,
+                                         ctxt->vcpu))
+                       return 0;
+               mc->end = n * size;
+       }
+
+       memcpy(dest, mc->data + mc->pos, size);
+       mc->pos += size;
+       return 1;
+}
+
 static u32 desc_limit_scaled(struct desc_struct *desc)
 {
        u32 limit = get_desc_limit(desc);
@@ -2601,8 +2623,11 @@ special_insn:
                        kvm_inject_gp(ctxt->vcpu, 0);
                        goto done;
                }
-               if (!ops->pio_in_emulated(c->dst.bytes, c->regs[VCPU_REGS_RDX],
-                                         &c->dst.val, 1, ctxt->vcpu))
+               if (c->rep_prefix)
+                       ctxt->restart = true;
+               if (!pio_in_emulated(ctxt, ops, c->dst.bytes,
+                                    c->regs[VCPU_REGS_RDX], &c->dst.val,
+                                    c->rep_prefix ? c->regs[VCPU_REGS_RCX] : 
1))
                        goto done; /* IO is needed, skip writeback */
 
                register_address_increment(c, &c->regs[VCPU_REGS_RDI],
@@ -2908,8 +2933,9 @@ special_insn:
                        goto done;
                }
                if (io_dir_in)
-                       ops->pio_in_emulated((c->d & ByteOp) ? 1 : c->op_bytes,
-                                            port, &c->dst.val, 1, ctxt->vcpu);
+                       pio_in_emulated(ctxt, ops,
+                                       (c->d & ByteOp) ? 1 : c->op_bytes,
+                                       port, &c->dst.val, 1);
                else
                        ops->pio_out_emulated((c->d & ByteOp) ? 1 : c->op_bytes,
                                              port, &c->regs[VCPU_REGS_RAX], 1,
-- 
1.6.5

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to