From 0d7f1ee470fe907e00ac6246bfa11e5322bc64fb Mon Sep 17 00:00:00 2001
From: Feng (Eric) Liu <eric.e.liu@intel.com>
Date: Sat, 15 Mar 2008 06:07:33 -0400
Subject: [PATCH] KVM: Add some trace entries in current code, when the KVM_TRACE
compilation option is enabled, it outputs the data info thrace
buffer. Define some interfaces for userspace tools to use the
buffer and analyze the trace data.

Signed-off-by: Feng (Eric) Liu <eric.e.liu@intel.com>
---
 arch/x86/kvm/Kconfig       |    7 ++++++
 arch/x86/kvm/Makefile      |    2 +-
 arch/x86/kvm/vmx.c         |   21 +++++++++++++++++-
 arch/x86/kvm/x86.c         |   21 +++++++++++++++++++
 include/asm-x86/kvm.h      |   20 ++++++++++++++++++
 include/asm-x86/kvm_host.h |   47 ++++++++++++++++++++++++++++++++++++++++++++
 include/linux/kvm.h        |   46 +++++++++++++++++++++++++++++++++++++++++++
 include/linux/kvm_host.h   |    6 +++++
 virt/kvm/kvm_main.c        |   22 ++++++++++++++++++++
 9 files changed, 189 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index 41962e7..9fa53be 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -36,6 +36,13 @@ config KVM
 
 	  If unsure, say N.
 
+config KVM_TRACE
+	bool "KVM trace support"
+	depends on KVM
+	default n 
+	---help---
+	  Say yes here to enable KVM trace support
+
 config KVM_INTEL
 	tristate "KVM for Intel processors support"
 	depends on KVM
diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
index 4d0c22e..db5dfa9 100644
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -2,7 +2,7 @@
 # Makefile for Kernel-based Virtual Machine module
 #
 
-common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o)
+common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o kvm_trace.o ioapic.o)
 
 EXTRA_CFLAGS += -Ivirt/kvm -Iarch/x86/kvm
 
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 9951ec9..55f22ad 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -1793,6 +1793,8 @@ out:
 static void vmx_inject_irq(struct kvm_vcpu *vcpu, int irq)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
+	KVMTRACE_1D(INJ_VIRQ, vcpu, 0,
+		    irq | INTR_TYPE_SOFT_INTR | INTR_INFO_VALID_MASK);
 
 	if (vcpu->arch.rmode.active) {
 		vmx->rmode.irq.pending = true;
@@ -1944,6 +1946,7 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 		error_code = vmcs_read32(VM_EXIT_INTR_ERROR_CODE);
 	if (is_page_fault(intr_info)) {
 		cr2 = vmcs_readl(EXIT_QUALIFICATION);
+		KVMTRACE_2D(PAGE_FAULT, vcpu, 0, error_code, cr2);
 		return kvm_mmu_page_fault(vcpu, cr2, error_code);
 	}
 
@@ -1972,6 +1975,7 @@ static int handle_external_interrupt(struct kvm_vcpu *vcpu,
 				     struct kvm_run *kvm_run)
 {
 	++vcpu->stat.irq_exits;
+	KVMTRACE_1D(INTR, vcpu, 0, vmcs_read32(VM_EXIT_INTR_INFO));
 	return 1;
 }
 
@@ -2029,6 +2033,7 @@ static int handle_cr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 	reg = (exit_qualification >> 8) & 15;
 	switch ((exit_qualification >> 4) & 3) {
 	case 0: /* mov to cr */
+		KVMTRACE_2D(CR_WRITE, vcpu, 0, cr, vcpu->arch.regs[reg]);
 		switch (cr) {
 		case 0:
 			vcpu_load_rsp_rip(vcpu);
@@ -2061,6 +2066,7 @@ static int handle_cr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 		vcpu->arch.cr0 &= ~X86_CR0_TS;
 		vmcs_writel(CR0_READ_SHADOW, vcpu->arch.cr0);
 		vmx_fpu_activate(vcpu);
+		KVMTRACE_0D(CLTS, vcpu, 0);
 		skip_emulated_instruction(vcpu);
 		return 1;
 	case 1: /*mov from cr*/
@@ -2069,12 +2075,14 @@ static int handle_cr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 			vcpu_load_rsp_rip(vcpu);
 			vcpu->arch.regs[reg] = vcpu->arch.cr3;
 			vcpu_put_rsp_rip(vcpu);
+			KVMTRACE_2D(CR_READ, vcpu, 0, cr, vcpu->arch.regs[reg]);
 			skip_emulated_instruction(vcpu);
 			return 1;
 		case 8:
 			vcpu_load_rsp_rip(vcpu);
 			vcpu->arch.regs[reg] = kvm_get_cr8(vcpu);
 			vcpu_put_rsp_rip(vcpu);
+			KVMTRACE_2D(CR_READ, vcpu, 0, cr, vcpu->arch.regs[reg]);
 			skip_emulated_instruction(vcpu);
 			return 1;
 		}
@@ -2120,6 +2128,7 @@ static int handle_dr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 			val = 0;
 		}
 		vcpu->arch.regs[reg] = val;
+		KVMTRACE_2D(DR_READ, vcpu, 0, dr, val);
 	} else {
 		/* mov to dr */
 	}
@@ -2144,6 +2153,8 @@ static int handle_rdmsr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 		return 1;
 	}
 
+	KVMTRACE_3D(MSR_READ, vcpu, ecx, 0, (u32)data, data >> 32);
+
 	/* FIXME: handling of bits 32:63 of rax, rdx */
 	vcpu->arch.regs[VCPU_REGS_RAX] = data & -1u;
 	vcpu->arch.regs[VCPU_REGS_RDX] = (data >> 32) & -1u;
@@ -2157,6 +2168,8 @@ static int handle_wrmsr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 	u64 data = (vcpu->arch.regs[VCPU_REGS_RAX] & -1u)
 		| ((u64)(vcpu->arch.regs[VCPU_REGS_RDX] & -1u) << 32);
 
+	KVMTRACE_3D(MSR_WRITE, vcpu, ecx, 0, (u32)data, data >> 32);
+
 	if (vmx_set_msr(vcpu, ecx, data) != 0) {
 		kvm_inject_gp(vcpu, 0);
 		return 1;
@@ -2271,6 +2284,8 @@ static int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
 	u32 vectoring_info = vmx->idt_vectoring_info;
 
+	KVMTRACE_2D(VMEXIT, vcpu, 1, exit_reason, vmcs_readl(GUEST_RIP));
+
 	if (unlikely(vmx->fail)) {
 		kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY;
 		kvm_run->fail_entry.hardware_entry_failure_reason
@@ -2351,7 +2366,7 @@ static void vmx_intr_assist(struct kvm_vcpu *vcpu)
 				enable_irq_window(vcpu);
 			return;
 		}
-
+		KVMTRACE_1D(INJ_VIRQ, vcpu, 0, idtv_info_field);
 		vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, idtv_info_field);
 		vmcs_write32(VM_ENTRY_INSTRUCTION_LEN,
 				vmcs_read32(VM_EXIT_INSTRUCTION_LEN));
@@ -2537,8 +2552,10 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 	intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
 
 	/* We need to handle NMIs before interrupts are enabled */
-	if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == 0x200) /* nmi */
+	if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == 0x200) { /* nmi */
+		KVMTRACE_0D(NMI, vcpu, 0);
 		asm("int $2");
+	}
 }
 
 static void vmx_free_vmcs(struct kvm_vcpu *vcpu)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 5339ab1..1d8a15f 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -306,6 +306,7 @@ EXPORT_SYMBOL_GPL(kvm_set_cr0);
 void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw)
 {
 	kvm_set_cr0(vcpu, (vcpu->arch.cr0 & ~0x0ful) | (msw & 0x0f));
+	KVMTRACE_1D(LMSW, vcpu, 0, (vcpu->arch.cr0 & ~0x0ful) | (msw & 0x0f));
 }
 EXPORT_SYMBOL_GPL(kvm_lmsw);
 
@@ -2281,6 +2282,12 @@ int kvm_emulate_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
 	vcpu->arch.pio.guest_page_offset = 0;
 	vcpu->arch.pio.rep = 0;
 
+	if (vcpu->run->io.direction == KVM_EXIT_IO_IN)
+		KVMTRACE_2D(IO_READ, vcpu, 0, vcpu->run->io.port, size);
+	else
+		KVMTRACE_3D(IO_WRITE, vcpu, 0, vcpu->run->io.port, size,
+			    vcpu->arch.regs[VCPU_REGS_RAX]);
+
 	kvm_x86_ops->cache_regs(vcpu);
 	memcpy(vcpu->arch.pio_data, &vcpu->arch.regs[VCPU_REGS_RAX], 4);
 	kvm_x86_ops->decache_regs(vcpu);
@@ -2319,6 +2326,11 @@ int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
 	vcpu->arch.pio.guest_page_offset = offset_in_page(address);
 	vcpu->arch.pio.rep = rep;
 
+	if (vcpu->run->io.direction == KVM_EXIT_IO_IN)
+		KVMTRACE_2D(IO_READ, vcpu, 0, vcpu->run->io.port, size);
+	else
+		KVMTRACE_2D(IO_WRITE, vcpu, 0, vcpu->run->io.port, size);
+
 	if (!count) {
 		kvm_x86_ops->skip_emulated_instruction(vcpu);
 		return 1;
@@ -2428,6 +2440,7 @@ void kvm_arch_exit(void)
 int kvm_emulate_halt(struct kvm_vcpu *vcpu)
 {
 	++vcpu->stat.halt_exits;
+	KVMTRACE_0D(HLT, vcpu, 0);
 	if (irqchip_in_kernel(vcpu->kvm)) {
 		vcpu->arch.mp_state = VCPU_MP_STATE_HALTED;
 		kvm_vcpu_block(vcpu);
@@ -2453,6 +2466,8 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
 	a2 = vcpu->arch.regs[VCPU_REGS_RDX];
 	a3 = vcpu->arch.regs[VCPU_REGS_RSI];
 
+	KVMTRACE_1D(VMMCALL, vcpu, 0, nr);
+
 	if (!is_long_mode(vcpu)) {
 		nr &= 0xFFFFFFFF;
 		a0 &= 0xFFFFFFFF;
@@ -2638,6 +2653,11 @@ void kvm_emulate_cpuid(struct kvm_vcpu *vcpu)
 	}
 	kvm_x86_ops->decache_regs(vcpu);
 	kvm_x86_ops->skip_emulated_instruction(vcpu);
+	KVMTRACE_3D(CPUID, vcpu, 0, function,
+		   (u64)vcpu->arch.regs[VCPU_REGS_RAX] << 32
+		   | vcpu->arch.regs[VCPU_REGS_RBX],
+		   (u64)vcpu->arch.regs[VCPU_REGS_RCX] << 32
+		   | vcpu->arch.regs[VCPU_REGS_RDX]);
 }
 EXPORT_SYMBOL_GPL(kvm_emulate_cpuid);
 
@@ -2790,6 +2810,7 @@ again:
 		if (test_and_clear_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests))
 			kvm_x86_ops->tlb_flush(vcpu);
 
+	KVMTRACE_0D(VMENTRY, vcpu, 1);
 	kvm_x86_ops->run(vcpu, kvm_run);
 
 	vcpu->guest_mode = 0;
diff --git a/include/asm-x86/kvm.h b/include/asm-x86/kvm.h
index 12b4b25..7271f14 100644
--- a/include/asm-x86/kvm.h
+++ b/include/asm-x86/kvm.h
@@ -209,4 +209,24 @@ struct kvm_pit_state {
 	struct kvm_pit_channel_state channels[3];
 };
 
+#define KVM_TRC_INJ_VIRQ        (KVM_TRC_HANDLER + 0x02)
+#define KVM_TRC_REINJ_VIRQ      (KVM_TRC_HANDLER + 0x03)
+#define KVM_TRC_IO_READ         (KVM_TRC_HANDLER + 0x04)
+#define KVM_TRC_IO_WRITE        (KVM_TRC_HANDLER + 0x05)
+#define KVM_TRC_CR_READ         (KVM_TRC_HANDLER + 0x06)
+#define KVM_TRC_CR_WRITE        (KVM_TRC_HANDLER + 0x07)
+#define KVM_TRC_DR_READ         (KVM_TRC_HANDLER + 0x08)
+#define KVM_TRC_DR_WRITE        (KVM_TRC_HANDLER + 0x09)
+#define KVM_TRC_MSR_READ        (KVM_TRC_HANDLER + 0x0A)
+#define KVM_TRC_MSR_WRITE       (KVM_TRC_HANDLER + 0x0B)
+#define KVM_TRC_CPUID           (KVM_TRC_HANDLER + 0x0C)
+#define KVM_TRC_INTR            (KVM_TRC_HANDLER + 0x0D)
+#define KVM_TRC_NMI             (KVM_TRC_HANDLER + 0x0E)
+#define KVM_TRC_SMI             (KVM_TRC_HANDLER + 0x0F)
+#define KVM_TRC_VMMCALL         (KVM_TRC_HANDLER + 0x10)
+#define KVM_TRC_HLT             (KVM_TRC_HANDLER + 0x11)
+#define KVM_TRC_INVLPG          (KVM_TRC_HANDLER + 0x12)
+#define KVM_TRC_CLTS            (KVM_TRC_HANDLER + 0x13)
+#define KVM_TRC_LMSW            (KVM_TRC_HANDLER + 0x14)
+
 #endif
diff --git a/include/asm-x86/kvm_host.h b/include/asm-x86/kvm_host.h
index 12932bb..5012ef1 100644
--- a/include/asm-x86/kvm_host.h
+++ b/include/asm-x86/kvm_host.h
@@ -634,4 +634,51 @@ static inline void kvm_inject_gp(struct kvm_vcpu *vcpu, u32 error_code)
 #define TSS_REDIRECTION_SIZE (256 / 8)
 #define RMODE_TSS_SIZE (TSS_BASE_SIZE + TSS_REDIRECTION_SIZE + TSS_IOPB_SIZE + 1)
 
+#ifdef CONFIG_KVM_TRACE
+#define KVMTRACE_ND(evt, vcpu, cycles, count, d1, d2, d3, d4)   	\
+ do {							        	\
+	if (unlikely(kvm_trace_enable_flag)) {				\
+		if (KVM_TRC_##evt == KVM_TRC_VMEXIT ||                  \
+			KVM_TRC_##evt == KVM_TRC_PAGE_FAULT) { 		\
+			struct {                                        \
+				u32 pid:16, vid:16;			\
+				u32 data1;                              \
+				unsigned long data2; 			\
+			} _d;                                           \
+			_d.pid  = (u16)current->tgid;			\
+			_d.vid  = (vcpu)->vcpu_id;		   	\
+			_d.data1 = d1;                                  \
+			_d.data2 = d2;					\
+			kvm_trace_var(KVM_TRC_ ## evt, cycles,		\
+				sizeof(_d), (unsigned char *)&_d);	\
+		} else {						\
+			struct {					\
+				u32 pid:16, vid:16;			\
+				u32 data[4];				\
+			} _d;						\
+			_d.pid  = (u16)current->tgid;			\
+			_d.vid  = (vcpu)->vcpu_id;			\
+			_d.data[0] = d1;				\
+			_d.data[1] = d2;				\
+			_d.data[2] = d3;				\
+			_d.data[4] = d4;				\
+			kvm_trace_var(KVM_TRC_##evt, cycles, 		\
+			sizeof(u32) * count + 1, (unsigned char *)&_d);	\
+		}							\
+	}                                                               \
+ } while (0)
+#else
+#define KVMTRACE_ND(evt, vcpu, cycles, count, d1, d2, d3, d4) do { } while (0)
+#endif
+
+#define KVMTRACE_4D(evt, vcpu, cycles, d1, d2, d3, d4) \
+		KVMTRACE_ND(evt, vcpu, cycles, 4, d1, d2, d3, d4)
+#define KVMTRACE_3D(evt, vcpu, cycles, d1, d2, d3) \
+		KVMTRACE_ND(evt, vcpu, cycles, 3, d1, d2, d3, 0)
+#define KVMTRACE_2D(evt, vcpu, cycles, d1, d2) \
+		KVMTRACE_ND(evt, vcpu, cycles, 2, d1, d2, 0, 0)
+#define KVMTRACE_1D(evt, vcpu, cycles, d1) \
+		KVMTRACE_ND(evt, vcpu, cycles, 1, d1, 0, 0, 0)
+#define KVMTRACE_0D(evt, vcpu, cycles) \
+		KVMTRACE_ND(evt, vcpu, cycles, 0, 0, 0, 0, 0)
 #endif
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index a2f3274..8734e1a 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -13,6 +13,13 @@
 
 #define KVM_API_VERSION 12
 
+/* for KVM_ENABLE_TRACE */
+struct kvm_trace_info {
+	__u32 pcpu_pages;	/* trace buffer size in pages for per cpu */
+	__u32 ncpus;
+	__u64 userspace_addr;
+};
+
 /* for KVM_CREATE_MEMORY_REGION */
 struct kvm_memory_region {
 	__u32 slot;
@@ -204,6 +211,42 @@ struct kvm_vapic_addr {
 	__u64 vapic_addr;
 };
 
+#define KVM_TRC_GEN             0x0001f000    /* General trace */
+#define KVM_TRC_LOST_REC        (KVM_TRC_GEN + 1)
+#define KVM_TRC_WRAP_BUF        (KVM_TRC_GEN + 2)
+
+#define KVM_TRC_KVM             0x0002f000    /* KVM trace */
+#define KVM_TRC_ENTRYEXIT       0x00021000
+#define KVM_TRC_HANDLER         0x00022000
+
+#define KVM_TRC_VMENTRY         (KVM_TRC_ENTRYEXIT + 0x01)
+#define KVM_TRC_VMEXIT          (KVM_TRC_ENTRYEXIT + 0x02)
+#define KVM_TRC_PAGE_FAULT      (KVM_TRC_HANDLER + 0x01)
+
+#define KVM_TRC_EXTRA_MAX       7
+
+/* This structure represents a single trace buffer record. */
+struct kvm_trace_rec {
+	__u32 event:28;
+	__u32 extra_u32:3;
+	__u32 cycles_in:1;
+	union {
+		struct {
+			__u32 cycles_lo, cycles_hi;
+			__u32 extra_u32[KVM_TRC_EXTRA_MAX];
+		} cycles;
+		struct {
+			__u32 extra_u32[KVM_TRC_EXTRA_MAX];
+		} nocycles;
+	} u;
+};
+
+struct kvm_trace_buf {
+	__u32 cons;
+	__u32 prod;
+	char data[0];
+};
+
 #define KVMIO 0xAE
 
 /*
@@ -223,6 +266,9 @@ struct kvm_vapic_addr {
 #define KVM_GET_VCPU_MMAP_SIZE    _IO(KVMIO,   0x04) /* in bytes */
 #define KVM_GET_SUPPORTED_CPUID   _IOWR(KVMIO, 0x05, struct kvm_cpuid2)
 
+#define KVM_ENABLE_TRACE    _IOWR(KVMIO, 0x06, struct kvm_trace_info)
+#define KVM_DISABLE_TRACE   _IO(KVMIO,   0x07)
+
 /*
  * Extension capability list.
  */
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 958e003..97cee93 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -307,4 +307,10 @@ struct kvm_stats_debugfs_item {
 };
 extern struct kvm_stats_debugfs_item debugfs_entries[];
 
+extern int kvm_trace_enable_flag;
+void kvm_trace_var(u32 event, int cycles, int extra, unsigned char *extra_data);
+int kvm_dev_ioctl_enable_trace(struct kvm_trace_info *t_info);
+int  kvm_dev_ioctl_disable_trace(void);
+void kvm_trace_exit(void);
+
 #endif
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 30bf832..8ef7f4d 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -1122,6 +1122,27 @@ static long kvm_dev_ioctl(struct file *filp,
 		r += PAGE_SIZE;    /* pio data page */
 #endif
 		break;
+	case KVM_ENABLE_TRACE: {
+		struct kvm_trace_info trace_info;
+
+		r = -EFAULT;
+		if (copy_from_user(&trace_info, argp, sizeof trace_info))
+			goto out;
+		r = kvm_dev_ioctl_enable_trace(&trace_info);
+		if (r)
+			goto out;
+		r = -EFAULT;
+		if (copy_to_user(argp, &trace_info, sizeof trace_info))
+			goto out;
+		r = 0;
+		break;
+	}
+	case KVM_DISABLE_TRACE:
+		r = -EINVAL;
+		if (arg)
+			goto out;
+		r = kvm_dev_ioctl_disable_trace();
+		break;
 	default:
 		return kvm_arch_dev_ioctl(filp, ioctl, arg);
 	}
@@ -1447,6 +1468,7 @@ EXPORT_SYMBOL_GPL(kvm_init);
 
 void kvm_exit(void)
 {
+	kvm_trace_exit();
 	misc_deregister(&kvm_dev);
 	kmem_cache_destroy(kvm_vcpu_cache);
 	sysdev_unregister(&kvm_sysdev);
-- 
1.5.1

