In preparation for threaded execution model, this patch introduces a KVM VCPU
data structure 'struct kvm_cpu'.

Cc: Asias He <[email protected]>
Cc: Cyrill Gorcunov <[email protected]>
Cc: Ingo Molnar <[email protected]>
Signed-off-by: Pekka Enberg <[email protected]>
---
 tools/kvm/Makefile              |    1 +
 tools/kvm/cpuid.c               |    6 +-
 tools/kvm/include/kvm/kvm-cpu.h |   33 ++++
 tools/kvm/include/kvm/kvm.h     |   16 --
 tools/kvm/ioport.c              |    2 +
 tools/kvm/kvm-cpu.c             |  370 +++++++++++++++++++++++++++++++++++++++
 tools/kvm/kvm-run.c             |   62 ++++---
 tools/kvm/kvm.c                 |  338 -----------------------------------
 8 files changed, 445 insertions(+), 383 deletions(-)
 create mode 100644 tools/kvm/include/kvm/kvm-cpu.h
 create mode 100644 tools/kvm/kvm-cpu.c

diff --git a/tools/kvm/Makefile b/tools/kvm/Makefile
index 4bcfd74..141cdec 100644
--- a/tools/kvm/Makefile
+++ b/tools/kvm/Makefile
@@ -21,6 +21,7 @@ OBJS  += disk-image.o
 OBJS   += interrupt.o
 OBJS   += ioport.o
 OBJS   += kvm.o
+OBJS   += kvm-cpu.o
 OBJS   += main.o
 OBJS   += mmio.o
 OBJS   += pci.o
diff --git a/tools/kvm/cpuid.c b/tools/kvm/cpuid.c
index f7cc930..0b26eb1 100644
--- a/tools/kvm/cpuid.c
+++ b/tools/kvm/cpuid.c
@@ -1,3 +1,5 @@
+#include "kvm/kvm-cpu.h"
+
 #include "kvm/kvm.h"
 #include "kvm/util.h"
 
@@ -30,14 +32,14 @@ static void filter_cpuid(struct kvm_cpuid2 *kvm_cpuid)
        }
 }
 
-void kvm__setup_cpuid(struct kvm *self)
+void kvm_cpu__setup_cpuid(struct kvm_cpu *self)
 {
        struct kvm_cpuid2 *kvm_cpuid;
 
        kvm_cpuid = calloc(1, sizeof(*kvm_cpuid) + MAX_KVM_CPUID_ENTRIES * 
sizeof(*kvm_cpuid->entries));
 
        kvm_cpuid->nent = MAX_KVM_CPUID_ENTRIES;
-       if (ioctl(self->sys_fd, KVM_GET_SUPPORTED_CPUID, kvm_cpuid) < 0)
+       if (ioctl(self->kvm->sys_fd, KVM_GET_SUPPORTED_CPUID, kvm_cpuid) < 0)
                die_perror("KVM_GET_SUPPORTED_CPUID failed");
 
        filter_cpuid(kvm_cpuid);
diff --git a/tools/kvm/include/kvm/kvm-cpu.h b/tools/kvm/include/kvm/kvm-cpu.h
new file mode 100644
index 0000000..d36dadf
--- /dev/null
+++ b/tools/kvm/include/kvm/kvm-cpu.h
@@ -0,0 +1,33 @@
+#ifndef KVM__KVM_CPU_H
+#define KVM__KVM_CPU_H
+
+#include <linux/kvm.h> /* for struct kvm_regs */
+
+#include <stdint.h>
+
+struct kvm;
+
+struct kvm_cpu {
+       struct kvm              *kvm;           /* parent KVM */
+       int                     vcpu_fd;        /* For VCPU ioctls() */
+       struct kvm_run          *kvm_run;
+
+       struct kvm_regs         regs;
+       struct kvm_sregs        sregs;
+       struct kvm_fpu          fpu;
+
+       struct kvm_msrs         *msrs;          /* dynamically allocated */
+};
+
+struct kvm_cpu *kvm_cpu__init(struct kvm *kvm);
+void kvm_cpu__delete(struct kvm_cpu *self);
+void kvm_cpu__reset_vcpu(struct kvm_cpu *self);
+void kvm_cpu__setup_cpuid(struct kvm_cpu *self);
+void kvm_cpu__enable_singlestep(struct kvm_cpu *self);
+void kvm_cpu__run(struct kvm_cpu *self);
+
+void kvm_cpu__show_code(struct kvm_cpu *self);
+void kvm_cpu__show_registers(struct kvm_cpu *self);
+void kvm_cpu__show_page_tables(struct kvm_cpu *self);
+
+#endif /* KVM__KVM_CPU_H */
diff --git a/tools/kvm/include/kvm/kvm.h b/tools/kvm/include/kvm/kvm.h
index a099307..7af98f9 100644
--- a/tools/kvm/include/kvm/kvm.h
+++ b/tools/kvm/include/kvm/kvm.h
@@ -3,8 +3,6 @@
 
 #include "kvm/interrupt.h"
 
-#include <linux/kvm.h> /* for struct kvm_regs */
-
 #include <stdbool.h>
 #include <stdint.h>
 #include <time.h>
@@ -12,9 +10,7 @@
 struct kvm {
        int                     sys_fd;         /* For system ioctls(), i.e. 
/dev/kvm */
        int                     vm_fd;          /* For VM ioctls() */
-       int                     vcpu_fd;        /* For VCPU ioctls() */
        timer_t                 timerid;        /* Posix timer for interrupts */
-       struct kvm_run          *kvm_run;
 
        struct disk_image       *disk_image;
        uint64_t                ram_size;
@@ -26,25 +22,16 @@ struct kvm {
        uint16_t                boot_ip;
        uint16_t                boot_sp;
 
-       struct kvm_regs         regs;
-       struct kvm_sregs        sregs;
-       struct kvm_fpu          fpu;
-       struct kvm_msrs         *msrs;  /* dynamically allocated */
-
        struct interrupt_table  interrupt_table;
 };
 
 struct kvm *kvm__init(const char *kvm_dev, unsigned long ram_size);
 void kvm__delete(struct kvm *self);
-void kvm__setup_cpuid(struct kvm *self);
-void kvm__enable_singlestep(struct kvm *self);
 bool kvm__load_kernel(struct kvm *kvm, const char *kernel_filename,
                        const char *initrd_filename, const char 
*kernel_cmdline);
-void kvm__reset_vcpu(struct kvm *self);
 void kvm__setup_bios(struct kvm *self);
 void kvm__start_timer(struct kvm *self);
 void kvm__stop_timer(struct kvm *self);
-void kvm__run(struct kvm *self);
 void kvm__irq_line(struct kvm *self, int irq, int level);
 bool kvm__emulate_io(struct kvm *self, uint16_t port, void *data, int 
direction, int size, uint32_t count);
 bool kvm__emulate_mmio(struct kvm *self, uint64_t phys_addr, uint8_t *data, 
uint32_t len, uint8_t is_write);
@@ -52,9 +39,6 @@ bool kvm__emulate_mmio(struct kvm *self, uint64_t phys_addr, 
uint8_t *data, uint
 /*
  * Debugging
  */
-void kvm__show_code(struct kvm *self);
-void kvm__show_registers(struct kvm *self);
-void kvm__show_page_tables(struct kvm *self);
 void kvm__dump_mem(struct kvm *self, unsigned long addr, unsigned long size);
 
 extern const char *kvm_exit_reasons[];
diff --git a/tools/kvm/ioport.c b/tools/kvm/ioport.c
index 4579e89..6303571 100644
--- a/tools/kvm/ioport.c
+++ b/tools/kvm/ioport.c
@@ -2,6 +2,8 @@
 
 #include "kvm/kvm.h"
 
+#include <linux/kvm.h> /* for KVM_EXIT_* */
+
 #include <stdbool.h>
 #include <assert.h>
 #include <limits.h>
diff --git a/tools/kvm/kvm-cpu.c b/tools/kvm/kvm-cpu.c
new file mode 100644
index 0000000..374adb2
--- /dev/null
+++ b/tools/kvm/kvm-cpu.c
@@ -0,0 +1,370 @@
+#include "kvm/kvm-cpu.h"
+
+#include "kvm/util.h"
+#include "kvm/kvm.h"
+
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <stdio.h>
+
+static inline bool is_in_protected_mode(struct kvm_cpu *self)
+{
+       return self->sregs.cr0 & 0x01;
+}
+
+static inline uint64_t ip_to_flat(struct kvm_cpu *self, uint64_t ip)
+{
+       uint64_t cs;
+
+       /*
+        * NOTE! We should take code segment base address into account here.
+        * Luckily it's usually zero because Linux uses flat memory model.
+        */
+       if (is_in_protected_mode(self))
+               return ip;
+
+       cs = self->sregs.cs.selector;
+
+       return ip + (cs << 4);
+}
+
+static inline uint32_t selector_to_base(uint16_t selector)
+{
+       /*
+        * KVM on Intel requires 'base' to be 'selector * 16' in real mode.
+        */
+       return (uint32_t)selector * 16;
+}
+
+static struct kvm_cpu *kvm_cpu__new(struct kvm *kvm)
+{
+       struct kvm_cpu *self;
+
+       self            = calloc(1, sizeof *self);
+       if (!self)
+               return NULL;
+
+       self->kvm       = kvm;
+
+       return self;
+}
+
+void kvm_cpu__delete(struct kvm_cpu *self)
+{
+       if (self->msrs)
+               free(self->msrs);
+
+       free(self);
+}
+
+struct kvm_cpu *kvm_cpu__init(struct kvm *kvm)
+{
+       struct kvm_cpu *self;
+       int mmap_size;
+
+       self            = kvm_cpu__new(kvm);
+       if (!self)
+               return NULL;
+
+       self->vcpu_fd = ioctl(self->kvm->vm_fd, KVM_CREATE_VCPU, 0);
+       if (self->vcpu_fd < 0)
+               die_perror("KVM_CREATE_VCPU ioctl");
+
+       mmap_size = ioctl(self->kvm->sys_fd, KVM_GET_VCPU_MMAP_SIZE, 0);
+       if (mmap_size < 0)
+               die_perror("KVM_GET_VCPU_MMAP_SIZE ioctl");
+
+       self->kvm_run = mmap(NULL, mmap_size, PROT_READ|PROT_WRITE, MAP_SHARED, 
self->vcpu_fd, 0);
+       if (self->kvm_run == MAP_FAILED)
+               die("unable to mmap vcpu fd");
+
+       return self;
+}
+
+void kvm_cpu__enable_singlestep(struct kvm_cpu *self)
+{
+       struct kvm_guest_debug debug = {
+               .control        = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_SINGLESTEP,
+       };
+
+       if (ioctl(self->vcpu_fd, KVM_SET_GUEST_DEBUG, &debug) < 0)
+               warning("KVM_SET_GUEST_DEBUG failed");
+}
+
+static struct kvm_msrs *kvm_msrs__new(size_t nmsrs)
+{
+       struct kvm_msrs *self = calloc(1, sizeof(*self) + (sizeof(struct 
kvm_msr_entry) * nmsrs));
+
+       if (!self)
+               die("out of memory");
+
+       return self;
+}
+
+#define MSR_IA32_TIME_STAMP_COUNTER    0x10
+
+#define MSR_IA32_SYSENTER_CS           0x174
+#define MSR_IA32_SYSENTER_ESP          0x175
+#define MSR_IA32_SYSENTER_EIP          0x176
+
+#define MSR_IA32_STAR                  0xc0000081
+#define MSR_IA32_LSTAR                 0xc0000082
+#define MSR_IA32_CSTAR                 0xc0000083
+#define MSR_IA32_FMASK                 0xc0000084
+#define MSR_IA32_KERNEL_GS_BASE                0xc0000102
+
+#define KVM_MSR_ENTRY(_index, _data)   \
+       (struct kvm_msr_entry) { .index = _index, .data = _data }
+
+static void kvm_cpu__setup_msrs(struct kvm_cpu *self)
+{
+       unsigned long ndx = 0;
+
+       self->msrs = kvm_msrs__new(100);
+
+       self->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_SYSENTER_CS,        
0x0);
+       self->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_SYSENTER_ESP,       
0x0);
+       self->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_SYSENTER_EIP,       
0x0);
+#ifdef CONFIG_X86_64
+       self->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_STAR,               
0x0);
+       self->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_CSTAR,              
0x0);
+       self->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_KERNEL_GS_BASE,     
0x0);
+       self->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_FMASK,              
0x0);
+       self->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_LSTAR,              
0x0);
+#endif
+       self->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_TIME_STAMP_COUNTER, 
0x0);
+
+       self->msrs->nmsrs       = ndx;
+
+       if (ioctl(self->vcpu_fd, KVM_SET_MSRS, self->msrs) < 0)
+               die_perror("KVM_SET_MSRS failed");
+}
+
+static void kvm_cpu__setup_fpu(struct kvm_cpu *self)
+{
+       self->fpu = (struct kvm_fpu) {
+               .fcw            = 0x37f,
+               .mxcsr          = 0x1f80,
+       };
+
+       if (ioctl(self->vcpu_fd, KVM_SET_FPU, &self->fpu) < 0)
+               die_perror("KVM_SET_FPU failed");
+}
+
+static void kvm_cpu__setup_regs(struct kvm_cpu *self)
+{
+       self->regs = (struct kvm_regs) {
+               /* We start the guest in 16-bit real mode  */
+               .rflags         = 0x0000000000000002ULL,
+
+               .rip            = self->kvm->boot_ip,
+               .rsp            = self->kvm->boot_sp,
+               .rbp            = self->kvm->boot_sp,
+       };
+
+       if (self->regs.rip > USHRT_MAX)
+               die("ip 0x%" PRIx64 " is too high for real mode", (uint64_t) 
self->regs.rip);
+
+       if (ioctl(self->vcpu_fd, KVM_SET_REGS, &self->regs) < 0)
+               die_perror("KVM_SET_REGS failed");
+}
+
+static void kvm_cpu__setup_sregs(struct kvm_cpu *self)
+{
+
+       if (ioctl(self->vcpu_fd, KVM_GET_SREGS, &self->sregs) < 0)
+               die_perror("KVM_GET_SREGS failed");
+
+       self->sregs.cs.selector = self->kvm->boot_selector;
+       self->sregs.cs.base     = selector_to_base(self->kvm->boot_selector);
+       self->sregs.ss.selector = self->kvm->boot_selector;
+       self->sregs.ss.base     = selector_to_base(self->kvm->boot_selector);
+       self->sregs.ds.selector = self->kvm->boot_selector;
+       self->sregs.ds.base     = selector_to_base(self->kvm->boot_selector);
+       self->sregs.es.selector = self->kvm->boot_selector;
+       self->sregs.es.base     = selector_to_base(self->kvm->boot_selector);
+       self->sregs.fs.selector = self->kvm->boot_selector;
+       self->sregs.fs.base     = selector_to_base(self->kvm->boot_selector);
+       self->sregs.gs.selector = self->kvm->boot_selector;
+       self->sregs.gs.base     = selector_to_base(self->kvm->boot_selector);
+
+       if (ioctl(self->vcpu_fd, KVM_SET_SREGS, &self->sregs) < 0)
+               die_perror("KVM_SET_SREGS failed");
+}
+
+/**
+ * kvm_cpu__reset_vcpu - reset virtual CPU to a known state
+ */
+void kvm_cpu__reset_vcpu(struct kvm_cpu *self)
+{
+       kvm_cpu__setup_sregs(self);
+       kvm_cpu__setup_regs(self);
+       kvm_cpu__setup_fpu(self);
+       kvm_cpu__setup_msrs(self);
+}
+
+static void print_dtable(const char *name, struct kvm_dtable *dtable)
+{
+       printf(" %s                 %016" PRIx64 "  %08" PRIx16 "\n",
+               name, (uint64_t) dtable->base, (uint16_t) dtable->limit);
+}
+
+static void print_segment(const char *name, struct kvm_segment *seg)
+{
+       printf(" %s       %04" PRIx16 "      %016" PRIx64 "  %08" PRIx32 "  
%02" PRIx8 "    %x %x   %x  %x %x %x %x\n",
+               name, (uint16_t) seg->selector, (uint64_t) seg->base, 
(uint32_t) seg->limit,
+               (uint8_t) seg->type, seg->present, seg->dpl, seg->db, seg->s, 
seg->l, seg->g, seg->avl);
+}
+
+void kvm_cpu__show_registers(struct kvm_cpu *self)
+{
+       unsigned long cr0, cr2, cr3;
+       unsigned long cr4, cr8;
+       unsigned long rax, rbx, rcx;
+       unsigned long rdx, rsi, rdi;
+       unsigned long rbp,  r8,  r9;
+       unsigned long r10, r11, r12;
+       unsigned long r13, r14, r15;
+       unsigned long rip, rsp;
+       struct kvm_sregs sregs;
+       unsigned long rflags;
+       struct kvm_regs regs;
+       int i;
+
+       if (ioctl(self->vcpu_fd, KVM_GET_REGS, &regs) < 0)
+               die("KVM_GET_REGS failed");
+
+       rflags = regs.rflags;
+
+       rip = regs.rip; rsp = regs.rsp;
+       rax = regs.rax; rbx = regs.rbx; rcx = regs.rcx;
+       rdx = regs.rdx; rsi = regs.rsi; rdi = regs.rdi;
+       rbp = regs.rbp; r8  = regs.r8;  r9  = regs.r9;
+       r10 = regs.r10; r11 = regs.r11; r12 = regs.r12;
+       r13 = regs.r13; r14 = regs.r14; r15 = regs.r15;
+
+       printf("Registers:\n");
+       printf(" rip: %016lx   rsp: %016lx flags: %016lx\n", rip, rsp, rflags);
+       printf(" rax: %016lx   rbx: %016lx   rcx: %016lx\n", rax, rbx, rcx);
+       printf(" rdx: %016lx   rsi: %016lx   rdi: %016lx\n", rdx, rsi, rdi);
+       printf(" rbp: %016lx   r8:  %016lx   r9:  %016lx\n", rbp, r8,  r9);
+       printf(" r10: %016lx   r11: %016lx   r12: %016lx\n", r10, r11, r12);
+       printf(" r13: %016lx   r14: %016lx   r15: %016lx\n", r13, r14, r15);
+
+       if (ioctl(self->vcpu_fd, KVM_GET_SREGS, &sregs) < 0)
+               die("KVM_GET_REGS failed");
+
+       cr0 = sregs.cr0; cr2 = sregs.cr2; cr3 = sregs.cr3;
+       cr4 = sregs.cr4; cr8 = sregs.cr8;
+
+       printf(" cr0: %016lx   cr2: %016lx   cr3: %016lx\n", cr0, cr2, cr3);
+       printf(" cr4: %016lx   cr8: %016lx\n", cr4, cr8);
+       printf("Segment registers:\n");
+       printf(" register  selector  base              limit     type  p dpl db 
s l g avl\n");
+       print_segment("cs ", &sregs.cs);
+       print_segment("ss ", &sregs.ss);
+       print_segment("ds ", &sregs.ds);
+       print_segment("es ", &sregs.es);
+       print_segment("fs ", &sregs.fs);
+       print_segment("gs ", &sregs.gs);
+       print_segment("tr ", &sregs.tr);
+       print_segment("ldt", &sregs.ldt);
+       print_dtable("gdt", &sregs.gdt);
+       print_dtable("idt", &sregs.idt);
+       printf(" [ efer: %016" PRIx64 "  apic base: %016" PRIx64 "  nmi: %s 
]\n",
+               (uint64_t) sregs.efer, (uint64_t) sregs.apic_base,
+               (self->kvm->nmi_disabled ? "disabled" : "enabled"));
+       printf("Interrupt bitmap:\n");
+       printf(" ");
+       for (i = 0; i < (KVM_NR_INTERRUPTS + 63) / 64; i++)
+               printf("%016" PRIx64 " ", (uint64_t) sregs.interrupt_bitmap[i]);
+       printf("\n");
+}
+
+void kvm_cpu__show_code(struct kvm_cpu *self)
+{
+       unsigned int code_bytes = 64;
+       unsigned int code_prologue = code_bytes * 43 / 64;
+       unsigned int code_len = code_bytes;
+       unsigned char c;
+       unsigned int i;
+       uint8_t *ip;
+
+       if (ioctl(self->vcpu_fd, KVM_GET_REGS, &self->regs) < 0)
+               die("KVM_GET_REGS failed");
+
+       if (ioctl(self->vcpu_fd, KVM_GET_SREGS, &self->sregs) < 0)
+               die("KVM_GET_SREGS failed");
+
+       ip = guest_flat_to_host(self->kvm, ip_to_flat(self, self->regs.rip) - 
code_prologue);
+
+       printf("Code: ");
+
+       for (i = 0; i < code_len; i++, ip++) {
+               if (!host_ptr_in_ram(self->kvm, ip))
+                       break;
+
+               c = *ip;
+
+               if (ip == guest_flat_to_host(self->kvm, ip_to_flat(self, 
self->regs.rip)))
+                       printf("<%02x> ", c);
+               else
+                       printf("%02x ", c);
+       }
+
+       printf("\n");
+
+       printf("Stack:\n");
+       kvm__dump_mem(self->kvm, self->regs.rsp, 32);
+}
+
+void kvm_cpu__show_page_tables(struct kvm_cpu *self)
+{
+       uint64_t *pte1;
+       uint64_t *pte2;
+       uint64_t *pte3;
+       uint64_t *pte4;
+
+       if (!is_in_protected_mode(self))
+               return;
+
+       if (ioctl(self->vcpu_fd, KVM_GET_SREGS, &self->sregs) < 0)
+               die("KVM_GET_SREGS failed");
+
+       pte4    = guest_flat_to_host(self->kvm, self->sregs.cr3);
+       if (!host_ptr_in_ram(self->kvm, pte4))
+               return;
+
+       pte3    = guest_flat_to_host(self->kvm, (*pte4 & ~0xfff));
+       if (!host_ptr_in_ram(self->kvm, pte3))
+               return;
+
+       pte2    = guest_flat_to_host(self->kvm, (*pte3 & ~0xfff));
+       if (!host_ptr_in_ram(self->kvm, pte2))
+               return;
+
+       pte1    = guest_flat_to_host(self->kvm, (*pte2 & ~0xfff));
+       if (!host_ptr_in_ram(self->kvm, pte1))
+               return;
+
+       printf("Page Tables:\n");
+       if (*pte2 & (1 << 7))
+               printf(" pte4: %016" PRIx64 "   pte3: %016" PRIx64
+                       "   pte2: %016" PRIx64 "\n",
+                       *pte4, *pte3, *pte2);
+       else
+               printf(" pte4: %016" PRIx64 "   pte3: %016" PRIx64 "   pte2: 
%016"
+                       PRIx64 "   pte1: %016" PRIx64 "\n",
+                       *pte4, *pte3, *pte2, *pte1);
+}
+
+void kvm_cpu__run(struct kvm_cpu *self)
+{
+       int err;
+
+       err = ioctl(self->vcpu_fd, KVM_RUN, 0);
+       if (err && (errno != EINTR && errno != EAGAIN))
+               die_perror("KVM_RUN failed");
+}
diff --git a/tools/kvm/kvm-run.c b/tools/kvm/kvm-run.c
index 9b0786a..9392818 100644
--- a/tools/kvm/kvm-run.c
+++ b/tools/kvm/kvm-run.c
@@ -10,6 +10,7 @@
 /* user defined header files */
 #include <linux/types.h>
 #include <kvm/kvm.h>
+#include <kvm/kvm-cpu.h>
 #include <kvm/8250-serial.h>
 #include <kvm/virtio-blk.h>
 #include <kvm/virtio-console.h>
@@ -29,6 +30,7 @@
 #define MIN_RAM_SIZE_BYTE      (MIN_RAM_SIZE_MB << MB_SHIFT)
 
 static struct kvm *kvm;
+static struct kvm_cpu *cpu;
 
 static void handle_sigint(int sig)
 {
@@ -37,10 +39,11 @@ static void handle_sigint(int sig)
 
 static void handle_sigquit(int sig)
 {
-       kvm__show_registers(kvm);
-       kvm__show_code(kvm);
-       kvm__show_page_tables(kvm);
+       kvm_cpu__show_registers(cpu);
+       kvm_cpu__show_code(cpu);
+       kvm_cpu__show_page_tables(cpu);
 
+       kvm_cpu__delete(cpu);
        kvm__delete(kvm);
 
        exit(1);
@@ -130,13 +133,17 @@ int kvm_cmd_run(int argc, const char **argv, const char 
*prefix)
 
        kvm = kvm__init(kvm_dev, ram_size);
 
+       cpu = kvm_cpu__init(kvm);
+       if (!cpu)
+               die("unable to initialize KVM VCPU");
+
        if (image_filename) {
                kvm->disk_image = disk_image__open(image_filename);
                if (!kvm->disk_image)
                        die("unable to load disk image %s", image_filename);
        }
 
-       kvm__setup_cpuid(kvm);
+       kvm_cpu__setup_cpuid(cpu);
 
        strcpy(real_cmdline, "notsc nolapic noacpi pci=conf1 console=ttyS0 ");
        if (!kernel_cmdline || !strstr(kernel_cmdline, "root=")) {
@@ -153,12 +160,12 @@ int kvm_cmd_run(int argc, const char **argv, const char 
*prefix)
                                real_cmdline))
                die("unable to load kernel %s", kernel_filename);
 
-       kvm__reset_vcpu(kvm);
+       kvm_cpu__reset_vcpu(cpu);
 
        kvm__setup_bios(kvm);
 
        if (single_step)
-               kvm__enable_singlestep(kvm);
+               kvm_cpu__enable_singlestep(cpu);
 
        serial8250__init(kvm);
 
@@ -171,23 +178,23 @@ int kvm_cmd_run(int argc, const char **argv, const char 
*prefix)
        kvm__start_timer(kvm);
 
        for (;;) {
-               kvm__run(kvm);
+               kvm_cpu__run(cpu);
 
-               switch (kvm->kvm_run->exit_reason) {
+               switch (cpu->kvm_run->exit_reason) {
                case KVM_EXIT_DEBUG:
-                       kvm__show_registers(kvm);
-                       kvm__show_code(kvm);
+                       kvm_cpu__show_registers(cpu);
+                       kvm_cpu__show_code(cpu);
                        break;
                case KVM_EXIT_IO: {
                        bool ret;
 
                        ret = kvm__emulate_io(kvm,
-                                       kvm->kvm_run->io.port,
-                                       (uint8_t *)kvm->kvm_run +
-                                       kvm->kvm_run->io.data_offset,
-                                       kvm->kvm_run->io.direction,
-                                       kvm->kvm_run->io.size,
-                                       kvm->kvm_run->io.count);
+                                       cpu->kvm_run->io.port,
+                                       (uint8_t *)cpu->kvm_run +
+                                       cpu->kvm_run->io.data_offset,
+                                       cpu->kvm_run->io.direction,
+                                       cpu->kvm_run->io.size,
+                                       cpu->kvm_run->io.count);
 
                        if (!ret)
                                goto panic_kvm;
@@ -197,10 +204,10 @@ int kvm_cmd_run(int argc, const char **argv, const char 
*prefix)
                        bool ret;
 
                        ret = kvm__emulate_mmio(kvm,
-                                       kvm->kvm_run->mmio.phys_addr,
-                                       kvm->kvm_run->mmio.data,
-                                       kvm->kvm_run->mmio.len,
-                                       kvm->kvm_run->mmio.is_write);
+                                       cpu->kvm_run->mmio.phys_addr,
+                                       cpu->kvm_run->mmio.data,
+                                       cpu->kvm_run->mmio.len,
+                                       cpu->kvm_run->mmio.is_write);
 
                        if (!ret)
                                goto panic_kvm;
@@ -227,15 +234,16 @@ exit_kvm:
 
 panic_kvm:
        fprintf(stderr, "KVM exit reason: %" PRIu32 " (\"%s\")\n",
-               kvm->kvm_run->exit_reason,
-               kvm_exit_reasons[kvm->kvm_run->exit_reason]);
-       if (kvm->kvm_run->exit_reason == KVM_EXIT_UNKNOWN)
+               cpu->kvm_run->exit_reason,
+               kvm_exit_reasons[cpu->kvm_run->exit_reason]);
+       if (cpu->kvm_run->exit_reason == KVM_EXIT_UNKNOWN)
                fprintf(stderr, "KVM exit code: 0x%" PRIu64 "\n",
-                       kvm->kvm_run->hw.hardware_exit_reason);
+                       cpu->kvm_run->hw.hardware_exit_reason);
        disk_image__close(kvm->disk_image);
-       kvm__show_registers(kvm);
-       kvm__show_code(kvm);
-       kvm__show_page_tables(kvm);
+       kvm_cpu__show_registers(cpu);
+       kvm_cpu__show_code(cpu);
+       kvm_cpu__show_page_tables(cpu);
+       kvm_cpu__delete(cpu);
        kvm__delete(kvm);
 
        return 1;
diff --git a/tools/kvm/kvm.c b/tools/kvm/kvm.c
index 2cd206d..af1f65f 100644
--- a/tools/kvm/kvm.c
+++ b/tools/kvm/kvm.c
@@ -107,9 +107,6 @@ void kvm__delete(struct kvm *self)
 {
        kvm__stop_timer(self);
 
-       if (self->msrs)
-               free(self->msrs);
-
        free(self->ram_start);
        free(self);
 }
@@ -162,7 +159,6 @@ struct kvm *kvm__init(const char *kvm_dev, unsigned long 
ram_size)
        struct kvm_pit_config pit_config = { .flags = 0, };
        struct kvm *self;
        long page_size;
-       int mmap_size;
        int ret;
 
        if (!kvm__cpu_supports_vm())
@@ -222,31 +218,9 @@ struct kvm *kvm__init(const char *kvm_dev, unsigned long 
ram_size)
        if (ret < 0)
                die_perror("KVM_CREATE_IRQCHIP ioctl");
 
-       self->vcpu_fd = ioctl(self->vm_fd, KVM_CREATE_VCPU, 0);
-       if (self->vcpu_fd < 0)
-               die_perror("KVM_CREATE_VCPU ioctl");
-
-       mmap_size = ioctl(self->sys_fd, KVM_GET_VCPU_MMAP_SIZE, 0);
-       if (mmap_size < 0)
-               die_perror("KVM_GET_VCPU_MMAP_SIZE ioctl");
-
-       self->kvm_run = mmap(NULL, mmap_size, PROT_READ|PROT_WRITE, MAP_SHARED, 
self->vcpu_fd, 0);
-       if (self->kvm_run == MAP_FAILED)
-               die("unable to mmap vcpu fd");
-
        return self;
 }
 
-void kvm__enable_singlestep(struct kvm *self)
-{
-       struct kvm_guest_debug debug = {
-               .control        = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_SINGLESTEP,
-       };
-
-       if (ioctl(self->vcpu_fd, KVM_SET_GUEST_DEBUG, &debug) < 0)
-               warning("KVM_SET_GUEST_DEBUG failed");
-}
-
 #define BOOT_LOADER_SELECTOR   0x1000
 #define BOOT_LOADER_IP         0x0000
 #define BOOT_LOADER_SP         0x8000
@@ -417,154 +391,6 @@ found_kernel:
        return ret;
 }
 
-static inline uint64_t ip_flat_to_real(struct kvm *self, uint64_t ip)
-{
-       uint64_t cs = self->sregs.cs.selector;
-
-       return ip - (cs << 4);
-}
-
-static inline bool is_in_protected_mode(struct kvm *self)
-{
-       return self->sregs.cr0 & 0x01;
-}
-
-static inline uint64_t ip_to_flat(struct kvm *self, uint64_t ip)
-{
-       uint64_t cs;
-
-       /*
-        * NOTE! We should take code segment base address into account here.
-        * Luckily it's usually zero because Linux uses flat memory model.
-        */
-       if (is_in_protected_mode(self))
-               return ip;
-
-       cs = self->sregs.cs.selector;
-
-       return ip + (cs << 4);
-}
-
-static inline uint32_t selector_to_base(uint16_t selector)
-{
-       /*
-        * KVM on Intel requires 'base' to be 'selector * 16' in real mode.
-        */
-       return (uint32_t)selector * 16;
-}
-
-static struct kvm_msrs *kvm_msrs__new(size_t nmsrs)
-{
-       struct kvm_msrs *self = calloc(1, sizeof(*self) + (sizeof(struct 
kvm_msr_entry) * nmsrs));
-
-       if (!self)
-               die("out of memory");
-
-       return self;
-}
-
-#define MSR_IA32_TIME_STAMP_COUNTER    0x10
-
-#define MSR_IA32_SYSENTER_CS           0x174
-#define MSR_IA32_SYSENTER_ESP          0x175
-#define MSR_IA32_SYSENTER_EIP          0x176
-
-#define MSR_IA32_STAR                  0xc0000081
-#define MSR_IA32_LSTAR                 0xc0000082
-#define MSR_IA32_CSTAR                 0xc0000083
-#define MSR_IA32_FMASK                 0xc0000084
-#define MSR_IA32_KERNEL_GS_BASE                0xc0000102
-
-#define KVM_MSR_ENTRY(_index, _data)   \
-       (struct kvm_msr_entry) { .index = _index, .data = _data }
-
-static void kvm__setup_msrs(struct kvm *self)
-{
-       unsigned long ndx = 0;
-
-       self->msrs = kvm_msrs__new(100);
-
-       self->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_SYSENTER_CS,        
0x0);
-       self->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_SYSENTER_ESP,       
0x0);
-       self->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_SYSENTER_EIP,       
0x0);
-#ifdef CONFIG_X86_64
-       self->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_STAR,               
0x0);
-       self->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_CSTAR,              
0x0);
-       self->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_KERNEL_GS_BASE,     
0x0);
-       self->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_FMASK,              
0x0);
-       self->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_LSTAR,              
0x0);
-#endif
-       self->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_TIME_STAMP_COUNTER, 
0x0);
-
-       self->msrs->nmsrs       = ndx;
-
-       if (ioctl(self->vcpu_fd, KVM_SET_MSRS, self->msrs) < 0)
-               die_perror("KVM_SET_MSRS failed");
-}
-
-static void kvm__setup_fpu(struct kvm *self)
-{
-       self->fpu = (struct kvm_fpu) {
-               .fcw            = 0x37f,
-               .mxcsr          = 0x1f80,
-       };
-
-       if (ioctl(self->vcpu_fd, KVM_SET_FPU, &self->fpu) < 0)
-               die_perror("KVM_SET_FPU failed");
-}
-
-static void kvm__setup_regs(struct kvm *self)
-{
-       self->regs = (struct kvm_regs) {
-               /* We start the guest in 16-bit real mode  */
-               .rflags         = 0x0000000000000002ULL,
-
-               .rip            = self->boot_ip,
-               .rsp            = self->boot_sp,
-               .rbp            = self->boot_sp,
-       };
-
-       if (self->regs.rip > USHRT_MAX)
-               die("ip 0x%" PRIx64 " is too high for real mode", (uint64_t) 
self->regs.rip);
-
-       if (ioctl(self->vcpu_fd, KVM_SET_REGS, &self->regs) < 0)
-               die_perror("KVM_SET_REGS failed");
-}
-
-static void kvm__setup_sregs(struct kvm *self)
-{
-
-       if (ioctl(self->vcpu_fd, KVM_GET_SREGS, &self->sregs) < 0)
-               die_perror("KVM_GET_SREGS failed");
-
-       self->sregs.cs.selector = self->boot_selector;
-       self->sregs.cs.base     = selector_to_base(self->boot_selector);
-       self->sregs.ss.selector = self->boot_selector;
-       self->sregs.ss.base     = selector_to_base(self->boot_selector);
-       self->sregs.ds.selector = self->boot_selector;
-       self->sregs.ds.base     = selector_to_base(self->boot_selector);
-       self->sregs.es.selector = self->boot_selector;
-       self->sregs.es.base     = selector_to_base(self->boot_selector);
-       self->sregs.fs.selector = self->boot_selector;
-       self->sregs.fs.base     = selector_to_base(self->boot_selector);
-       self->sregs.gs.selector = self->boot_selector;
-       self->sregs.gs.base     = selector_to_base(self->boot_selector);
-
-       if (ioctl(self->vcpu_fd, KVM_SET_SREGS, &self->sregs) < 0)
-               die_perror("KVM_SET_SREGS failed");
-}
-
-/**
- * kvm__reset_vcpu - reset virtual CPU to a known state
- */
-void kvm__reset_vcpu(struct kvm *self)
-{
-       kvm__setup_sregs(self);
-       kvm__setup_regs(self);
-       kvm__setup_fpu(self);
-       kvm__setup_msrs(self);
-}
-
 /**
  * kvm__setup_bios - inject BIOS into guest system memory
  * @self - guest system descriptor
@@ -629,15 +455,6 @@ void kvm__stop_timer(struct kvm *self)
        self->timerid = 0;
 }
 
-void kvm__run(struct kvm *self)
-{
-       int err;
-
-       err = ioctl(self->vcpu_fd, KVM_RUN, 0);
-       if (err && (errno != EINTR && errno != EAGAIN))
-               die_perror("KVM_RUN failed");
-}
-
 void kvm__irq_line(struct kvm *self, int irq, int level)
 {
        struct kvm_irq_level irq_level;
@@ -653,161 +470,6 @@ void kvm__irq_line(struct kvm *self, int irq, int level)
                die_perror("KVM_IRQ_LINE failed");
 }
 
-static void print_dtable(const char *name, struct kvm_dtable *dtable)
-{
-       printf(" %s                 %016" PRIx64 "  %08" PRIx16 "\n",
-               name, (uint64_t) dtable->base, (uint16_t) dtable->limit);
-}
-
-static void print_segment(const char *name, struct kvm_segment *seg)
-{
-       printf(" %s       %04" PRIx16 "      %016" PRIx64 "  %08" PRIx32 "  
%02" PRIx8 "    %x %x   %x  %x %x %x %x\n",
-               name, (uint16_t) seg->selector, (uint64_t) seg->base, 
(uint32_t) seg->limit,
-               (uint8_t) seg->type, seg->present, seg->dpl, seg->db, seg->s, 
seg->l, seg->g, seg->avl);
-}
-
-void kvm__show_registers(struct kvm *self)
-{
-       unsigned long cr0, cr2, cr3;
-       unsigned long cr4, cr8;
-       unsigned long rax, rbx, rcx;
-       unsigned long rdx, rsi, rdi;
-       unsigned long rbp,  r8,  r9;
-       unsigned long r10, r11, r12;
-       unsigned long r13, r14, r15;
-       unsigned long rip, rsp;
-       struct kvm_sregs sregs;
-       unsigned long rflags;
-       struct kvm_regs regs;
-       int i;
-
-       if (ioctl(self->vcpu_fd, KVM_GET_REGS, &regs) < 0)
-               die("KVM_GET_REGS failed");
-
-       rflags = regs.rflags;
-
-       rip = regs.rip; rsp = regs.rsp;
-       rax = regs.rax; rbx = regs.rbx; rcx = regs.rcx;
-       rdx = regs.rdx; rsi = regs.rsi; rdi = regs.rdi;
-       rbp = regs.rbp; r8  = regs.r8;  r9  = regs.r9;
-       r10 = regs.r10; r11 = regs.r11; r12 = regs.r12;
-       r13 = regs.r13; r14 = regs.r14; r15 = regs.r15;
-
-       printf("Registers:\n");
-       printf(" rip: %016lx   rsp: %016lx flags: %016lx\n", rip, rsp, rflags);
-       printf(" rax: %016lx   rbx: %016lx   rcx: %016lx\n", rax, rbx, rcx);
-       printf(" rdx: %016lx   rsi: %016lx   rdi: %016lx\n", rdx, rsi, rdi);
-       printf(" rbp: %016lx   r8:  %016lx   r9:  %016lx\n", rbp, r8,  r9);
-       printf(" r10: %016lx   r11: %016lx   r12: %016lx\n", r10, r11, r12);
-       printf(" r13: %016lx   r14: %016lx   r15: %016lx\n", r13, r14, r15);
-
-       if (ioctl(self->vcpu_fd, KVM_GET_SREGS, &sregs) < 0)
-               die("KVM_GET_REGS failed");
-
-       cr0 = sregs.cr0; cr2 = sregs.cr2; cr3 = sregs.cr3;
-       cr4 = sregs.cr4; cr8 = sregs.cr8;
-
-       printf(" cr0: %016lx   cr2: %016lx   cr3: %016lx\n", cr0, cr2, cr3);
-       printf(" cr4: %016lx   cr8: %016lx\n", cr4, cr8);
-       printf("Segment registers:\n");
-       printf(" register  selector  base              limit     type  p dpl db 
s l g avl\n");
-       print_segment("cs ", &sregs.cs);
-       print_segment("ss ", &sregs.ss);
-       print_segment("ds ", &sregs.ds);
-       print_segment("es ", &sregs.es);
-       print_segment("fs ", &sregs.fs);
-       print_segment("gs ", &sregs.gs);
-       print_segment("tr ", &sregs.tr);
-       print_segment("ldt", &sregs.ldt);
-       print_dtable("gdt", &sregs.gdt);
-       print_dtable("idt", &sregs.idt);
-       printf(" [ efer: %016" PRIx64 "  apic base: %016" PRIx64 "  nmi: %s 
]\n",
-               (uint64_t) sregs.efer, (uint64_t) sregs.apic_base,
-               (self->nmi_disabled ? "disabled" : "enabled"));
-       printf("Interrupt bitmap:\n");
-       printf(" ");
-       for (i = 0; i < (KVM_NR_INTERRUPTS + 63) / 64; i++)
-               printf("%016" PRIx64 " ", (uint64_t) sregs.interrupt_bitmap[i]);
-       printf("\n");
-}
-
-void kvm__show_code(struct kvm *self)
-{
-       unsigned int code_bytes = 64;
-       unsigned int code_prologue = code_bytes * 43 / 64;
-       unsigned int code_len = code_bytes;
-       unsigned char c;
-       unsigned int i;
-       uint8_t *ip;
-
-       if (ioctl(self->vcpu_fd, KVM_GET_REGS, &self->regs) < 0)
-               die("KVM_GET_REGS failed");
-
-       if (ioctl(self->vcpu_fd, KVM_GET_SREGS, &self->sregs) < 0)
-               die("KVM_GET_SREGS failed");
-
-       ip = guest_flat_to_host(self, ip_to_flat(self, self->regs.rip) - 
code_prologue);
-
-       printf("Code: ");
-
-       for (i = 0; i < code_len; i++, ip++) {
-               if (!host_ptr_in_ram(self, ip))
-                       break;
-
-               c = *ip;
-
-               if (ip == guest_flat_to_host(self, ip_to_flat(self, 
self->regs.rip)))
-                       printf("<%02x> ", c);
-               else
-                       printf("%02x ", c);
-       }
-
-       printf("\n");
-
-       printf("Stack:\n");
-       kvm__dump_mem(self, self->regs.rsp, 32);
-}
-
-void kvm__show_page_tables(struct kvm *self)
-{
-       uint64_t *pte1;
-       uint64_t *pte2;
-       uint64_t *pte3;
-       uint64_t *pte4;
-
-       if (!is_in_protected_mode(self))
-               return;
-
-       if (ioctl(self->vcpu_fd, KVM_GET_SREGS, &self->sregs) < 0)
-               die("KVM_GET_SREGS failed");
-
-       pte4    = guest_flat_to_host(self, self->sregs.cr3);
-       if (!host_ptr_in_ram(self, pte4))
-               return;
-
-       pte3    = guest_flat_to_host(self, (*pte4 & ~0xfff));
-       if (!host_ptr_in_ram(self, pte3))
-               return;
-
-       pte2    = guest_flat_to_host(self, (*pte3 & ~0xfff));
-       if (!host_ptr_in_ram(self, pte2))
-               return;
-
-       pte1    = guest_flat_to_host(self, (*pte2 & ~0xfff));
-       if (!host_ptr_in_ram(self, pte1))
-               return;
-
-       printf("Page Tables:\n");
-       if (*pte2 & (1 << 7))
-               printf(" pte4: %016" PRIx64 "   pte3: %016" PRIx64
-                       "   pte2: %016" PRIx64 "\n",
-                       *pte4, *pte3, *pte2);
-       else
-               printf(" pte4: %016" PRIx64 "   pte3: %016" PRIx64 "   pte2: 
%016"
-                       PRIx64 "   pte1: %016" PRIx64 "\n",
-                       *pte4, *pte3, *pte2, *pte1);
-}
-
 void kvm__dump_mem(struct kvm *self, unsigned long addr, unsigned long size)
 {
        unsigned char *p;
-- 
1.7.0.4

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to