Below is the kernel patch to enable perf to collect guest os statistics.

Joerg,

Would you like to add support on svm? I don't know the exact point to trigger
NMI to host with svm.

See below code with vmx:
 
+               kvm_before_handle_nmi(&vmx->vcpu);
                asm("int $2");
+               kvm_after_handle_nmi(&vmx->vcpu);

Signed-off-by: Zhang Yanmin <[email protected]>

---

diff -Nraup --exclude=tools linux-2.6_tip0413/arch/x86/include/asm/perf_event.h 
linux-2.6_tip0413_perfkvm/arch/x86/include/asm/perf_event.h
--- linux-2.6_tip0413/arch/x86/include/asm/perf_event.h 2010-04-14 
11:11:03.992966568 +0800
+++ linux-2.6_tip0413_perfkvm/arch/x86/include/asm/perf_event.h 2010-04-14 
11:13:17.261881591 +0800
@@ -135,17 +135,10 @@ extern void perf_events_lapic_init(void)
  */
 #define PERF_EFLAGS_EXACT      (1UL << 3)
 
-#define perf_misc_flags(regs)                          \
-({     int misc = 0;                                   \
-       if (user_mode(regs))                            \
-               misc |= PERF_RECORD_MISC_USER;          \
-       else                                            \
-               misc |= PERF_RECORD_MISC_KERNEL;        \
-       if (regs->flags & PERF_EFLAGS_EXACT)            \
-               misc |= PERF_RECORD_MISC_EXACT;         \
-       misc; })
-
-#define perf_instruction_pointer(regs) ((regs)->ip)
+struct pt_regs;
+extern unsigned long perf_instruction_pointer(struct pt_regs *regs);
+extern unsigned long perf_misc_flags(struct pt_regs *regs);
+#define perf_misc_flags(regs)  perf_misc_flags(regs)
 
 #else
 static inline void init_hw_perf_events(void)           { }
diff -Nraup --exclude=tools linux-2.6_tip0413/arch/x86/kernel/cpu/perf_event.c 
linux-2.6_tip0413_perfkvm/arch/x86/kernel/cpu/perf_event.c
--- linux-2.6_tip0413/arch/x86/kernel/cpu/perf_event.c  2010-04-14 
11:11:04.825028810 +0800
+++ linux-2.6_tip0413_perfkvm/arch/x86/kernel/cpu/perf_event.c  2010-04-14 
17:02:12.198063684 +0800
@@ -1720,6 +1720,11 @@ struct perf_callchain_entry *perf_callch
 {
        struct perf_callchain_entry *entry;
 
+       if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
+               /* TODO: We don't support guest os callchain now */
+               return NULL;
+       }
+
        if (in_nmi())
                entry = &__get_cpu_var(pmc_nmi_entry);
        else
@@ -1743,3 +1748,30 @@ void perf_arch_fetch_caller_regs(struct 
        regs->cs = __KERNEL_CS;
        local_save_flags(regs->flags);
 }
+
+unsigned long perf_instruction_pointer(struct pt_regs *regs)
+{
+       unsigned long ip;
+       if (perf_guest_cbs && perf_guest_cbs->is_in_guest())
+               ip = perf_guest_cbs->get_guest_ip();
+       else
+               ip = instruction_pointer(regs);
+       return ip;
+}
+
+unsigned long perf_misc_flags(struct pt_regs *regs)
+{
+       int misc = 0;
+       if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
+               misc |= perf_guest_cbs->is_user_mode() ?
+                       PERF_RECORD_MISC_GUEST_USER :
+                       PERF_RECORD_MISC_GUEST_KERNEL;
+       } else
+               misc |= user_mode(regs) ? PERF_RECORD_MISC_USER :
+                       PERF_RECORD_MISC_KERNEL;
+       if (regs->flags & PERF_EFLAGS_EXACT)
+               misc |= PERF_RECORD_MISC_EXACT;
+
+       return misc;
+}
+
diff -Nraup --exclude=tools linux-2.6_tip0413/arch/x86/kvm/vmx.c 
linux-2.6_tip0413_perfkvm/arch/x86/kvm/vmx.c
--- linux-2.6_tip0413/arch/x86/kvm/vmx.c        2010-04-14 11:11:04.353024541 
+0800
+++ linux-2.6_tip0413_perfkvm/arch/x86/kvm/vmx.c        2010-04-15 
10:28:39.516891050 +0800
@@ -3654,8 +3654,11 @@ static void vmx_complete_interrupts(stru
 
        /* We need to handle NMIs before interrupts are enabled */
        if ((exit_intr_info & INTR_INFO_INTR_TYPE_MASK) == INTR_TYPE_NMI_INTR &&
-           (exit_intr_info & INTR_INFO_VALID_MASK))
+           (exit_intr_info & INTR_INFO_VALID_MASK)) {
+               kvm_before_handle_nmi(&vmx->vcpu);
                asm("int $2");
+               kvm_after_handle_nmi(&vmx->vcpu);
+       }
 
        idtv_info_valid = idt_vectoring_info & VECTORING_INFO_VALID_MASK;
 
diff -Nraup --exclude=tools linux-2.6_tip0413/arch/x86/kvm/x86.c 
linux-2.6_tip0413_perfkvm/arch/x86/kvm/x86.c
--- linux-2.6_tip0413/arch/x86/kvm/x86.c        2010-04-14 11:11:04.341042024 
+0800
+++ linux-2.6_tip0413_perfkvm/arch/x86/kvm/x86.c        2010-04-15 
17:16:41.340064784 +0800
@@ -40,6 +40,7 @@
 #include <linux/user-return-notifier.h>
 #include <linux/srcu.h>
 #include <linux/slab.h>
+#include <linux/perf_event.h>
 #include <trace/events/kvm.h>
 #undef TRACE_INCLUDE_FILE
 #define CREATE_TRACE_POINTS
@@ -3765,6 +3766,47 @@ static void kvm_timer_init(void)
        }
 }
 
+static DEFINE_PER_CPU(struct kvm_vcpu *, current_vcpu);
+
+static int kvm_is_in_guest(void)
+{
+       return percpu_read(current_vcpu) != NULL;
+}
+
+static int kvm_is_user_mode(void)
+{
+       int user_mode = 3;
+       if (percpu_read(current_vcpu))
+               user_mode = kvm_x86_ops->get_cpl(percpu_read(current_vcpu));
+       return user_mode != 0;
+}
+
+static unsigned long kvm_get_guest_ip(void)
+{
+       unsigned long ip = 0;
+       if (percpu_read(current_vcpu))
+               ip = kvm_rip_read(percpu_read(current_vcpu));
+       return ip;
+}
+
+static struct perf_guest_info_callbacks kvm_guest_cbs = {
+       .is_in_guest            = kvm_is_in_guest,
+       .is_user_mode           = kvm_is_user_mode,
+       .get_guest_ip           = kvm_get_guest_ip,
+};
+
+void kvm_before_handle_nmi(struct kvm_vcpu *vcpu)
+{
+       percpu_write(current_vcpu, vcpu);
+}
+EXPORT_SYMBOL_GPL(kvm_before_handle_nmi);
+
+void kvm_after_handle_nmi(struct kvm_vcpu *vcpu)
+{
+       percpu_write(current_vcpu, NULL);
+}
+EXPORT_SYMBOL_GPL(kvm_after_handle_nmi);
+
 int kvm_arch_init(void *opaque)
 {
        int r;
@@ -3801,6 +3843,8 @@ int kvm_arch_init(void *opaque)
 
        kvm_timer_init();
 
+       perf_register_guest_info_callbacks(&kvm_guest_cbs);
+
        return 0;
 
 out:
@@ -3809,6 +3853,8 @@ out:
 
 void kvm_arch_exit(void)
 {
+       perf_unregister_guest_info_callbacks(&kvm_guest_cbs);
+
        if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
                cpufreq_unregister_notifier(&kvmclock_cpufreq_notifier_block,
                                            CPUFREQ_TRANSITION_NOTIFIER);
diff -Nraup --exclude=tools linux-2.6_tip0413/arch/x86/kvm/x86.h 
linux-2.6_tip0413_perfkvm/arch/x86/kvm/x86.h
--- linux-2.6_tip0413/arch/x86/kvm/x86.h        2010-04-14 11:11:04.328996790 
+0800
+++ linux-2.6_tip0413_perfkvm/arch/x86/kvm/x86.h        2010-04-15 
10:27:57.116972433 +0800
@@ -65,4 +65,7 @@ static inline int is_paging(struct kvm_v
        return kvm_read_cr0_bits(vcpu, X86_CR0_PG);
 }
 
+void kvm_before_handle_nmi(struct kvm_vcpu *vcpu);
+void kvm_after_handle_nmi(struct kvm_vcpu *vcpu);
+
 #endif
diff -Nraup --exclude=tools linux-2.6_tip0413/include/linux/perf_event.h 
linux-2.6_tip0413_perfkvm/include/linux/perf_event.h
--- linux-2.6_tip0413/include/linux/perf_event.h        2010-04-14 
11:11:16.922212684 +0800
+++ linux-2.6_tip0413_perfkvm/include/linux/perf_event.h        2010-04-14 
11:34:33.478072738 +0800
@@ -288,11 +288,13 @@ struct perf_event_mmap_page {
        __u64   data_tail;              /* user-space written tail */
 };
 
-#define PERF_RECORD_MISC_CPUMODE_MASK          (3 << 0)
+#define PERF_RECORD_MISC_CPUMODE_MASK          (7 << 0)
 #define PERF_RECORD_MISC_CPUMODE_UNKNOWN       (0 << 0)
 #define PERF_RECORD_MISC_KERNEL                        (1 << 0)
 #define PERF_RECORD_MISC_USER                  (2 << 0)
 #define PERF_RECORD_MISC_HYPERVISOR            (3 << 0)
+#define PERF_RECORD_MISC_GUEST_KERNEL          (4 << 0)
+#define PERF_RECORD_MISC_GUEST_USER            (5 << 0)
 
 #define PERF_RECORD_MISC_EXACT                 (1 << 14)
 /*
@@ -446,6 +448,12 @@ enum perf_callchain_context {
 # include <asm/perf_event.h>
 #endif
 
+struct perf_guest_info_callbacks {
+       int (*is_in_guest) (void);
+       int (*is_user_mode) (void);
+       unsigned long (*get_guest_ip) (void);
+};
+
 #ifdef CONFIG_HAVE_HW_BREAKPOINT
 #include <asm/hw_breakpoint.h>
 #endif
@@ -920,6 +928,12 @@ static inline void perf_event_mmap(struc
                __perf_event_mmap(vma);
 }
 
+extern struct perf_guest_info_callbacks *perf_guest_cbs;
+extern int perf_register_guest_info_callbacks(
+               struct perf_guest_info_callbacks *);
+extern int perf_unregister_guest_info_callbacks(
+               struct perf_guest_info_callbacks *);
+
 extern void perf_event_comm(struct task_struct *tsk);
 extern void perf_event_fork(struct task_struct *tsk);
 
@@ -989,6 +1003,11 @@ perf_sw_event(u32 event_id, u64 nr, int 
 static inline void
 perf_bp_event(struct perf_event *event, void *data)                    { }
 
+static inline int perf_register_guest_info_callbacks
+(struct perf_guest_info_callbacks *) {return 0; }
+static inline int perf_unregister_guest_info_callbacks
+(struct perf_guest_info_callbacks *) {return 0; }
+
 static inline void perf_event_mmap(struct vm_area_struct *vma)         { }
 static inline void perf_event_comm(struct task_struct *tsk)            { }
 static inline void perf_event_fork(struct task_struct *tsk)            { }
diff -Nraup --exclude=tools linux-2.6_tip0413/kernel/perf_event.c 
linux-2.6_tip0413_perfkvm/kernel/perf_event.c
--- linux-2.6_tip0413/kernel/perf_event.c       2010-04-14 11:12:04.090770764 
+0800
+++ linux-2.6_tip0413_perfkvm/kernel/perf_event.c       2010-04-14 
11:13:17.265859229 +0800
@@ -2797,6 +2797,27 @@ void perf_arch_fetch_caller_regs(struct 
 
 
 /*
+ * We assume there is only KVM supporting the callbacks.
+ * Later on, we might change it to a list if there is
+ * another virtualization implementation supporting the callbacks.
+ */
+struct perf_guest_info_callbacks *perf_guest_cbs;
+
+int perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *cbs)
+{
+       perf_guest_cbs = cbs;
+       return 0;
+}
+EXPORT_SYMBOL_GPL(perf_register_guest_info_callbacks);
+
+int perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks *cbs)
+{
+       perf_guest_cbs = NULL;
+       return 0;
+}
+EXPORT_SYMBOL_GPL(perf_unregister_guest_info_callbacks);
+
+/*
  * Output
  */
 static bool perf_output_space(struct perf_mmap_data *data, unsigned long tail,
@@ -3748,7 +3769,7 @@ void __perf_event_mmap(struct vm_area_st
                .event_id  = {
                        .header = {
                                .type = PERF_RECORD_MMAP,
-                               .misc = 0,
+                               .misc = PERF_RECORD_MISC_USER,
                                /* .size */
                        },
                        /* .pid */


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to