From df6e45936dd43be14441f5e9ec19a130325d4341 Mon Sep 17 00:00:00 2001
From: Feng (Eric) Liu <eric.e.liu@intel.com>
Date: Sat, 15 Mar 2008 04:53:53 -0400
Subject: [PATCH] kvm: Add a tool kvmtrace for collecting binary data from
trace buffer. It can get the buffer virtual address and
the size in page for per cpu by the argument of ioctl
KVM_ENABLE_TRACE. It also can set the trace buffer size
for per cpu, but only can set one time in a module cycle.

Signed-off-by: Feng (Eric) Liu <eric.e.liu@intel.com>
---
 kernel/Kbuild              |    2 +-
 user/Makefile              |    5 +-
 user/config-x86-common.mak |    4 +-
 user/kvmtrace.c            |  397 ++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 404 insertions(+), 4 deletions(-)
 create mode 100644 user/kvmtrace.c

diff --git a/kernel/Kbuild b/kernel/Kbuild
index 014cc17..5fc3101 100644
--- a/kernel/Kbuild
+++ b/kernel/Kbuild
@@ -1,6 +1,6 @@
 EXTRA_CFLAGS := -I$(src)/include -include $(src)/external-module-compat.h
 obj-m := kvm.o kvm-intel.o kvm-amd.o
-kvm-objs := kvm_main.o x86.o mmu.o x86_emulate.o anon_inodes.o irq.o i8259.o \
+kvm-objs := kvm_main.o kvm_trace.o x86.o mmu.o x86_emulate.o anon_inodes.o irq.o i8259.o \
 	 lapic.o ioapic.o preempt.o i8254.o
 kvm-intel-objs := vmx.o vmx-debug.o
 kvm-amd-objs := svm.o
diff --git a/user/Makefile b/user/Makefile
index 225a435..ff2192d 100644
--- a/user/Makefile
+++ b/user/Makefile
@@ -31,6 +31,9 @@ CXXFLAGS = $(autodepend-flags)
 
 autodepend-flags = -MMD -MF $(dir $*).$(notdir $*).d
 
+kvmtrace: $(kvmtrace_objs)
+	$(CC) $(CFLAGS) $^ -o $@
+
 kvmctl: LDFLAGS += -pthread -lrt
 
 kvmctl: $(kvmctl_objs)
@@ -42,4 +45,4 @@ kvmctl: $(kvmctl_objs)
 -include .*.d
 
 clean: arch_clean
-	$(RM) kvmctl *.o *.a .*.d
+	$(RM) kvmctl kvmtrace *.o *.a .*.d
diff --git a/user/config-x86-common.mak b/user/config-x86-common.mak
index 8cfdd45..4c90fe6 100644
--- a/user/config-x86-common.mak
+++ b/user/config-x86-common.mak
@@ -1,9 +1,9 @@
 #This is a make file with common rules for both x86 & x86-64
 
-all: kvmctl test_cases
+all: kvmctl kvmtrace test_cases
 
 kvmctl_objs= main.o iotable.o ../libkvm/libkvm.a
-
+kvmtrace_objs= kvmtrace.o
 balloon_ctl: balloon_ctl.o
 
 FLATLIBS = $(TEST_DIR)/libcflat.a $(libgcc)
diff --git a/user/kvmtrace.c b/user/kvmtrace.c
new file mode 100644
index 0000000..1a05205
--- /dev/null
+++ b/user/kvmtrace.c
@@ -0,0 +1,397 @@
+/*
+ * Tool for collecting trace buffer data from kvm-trace.
+ *
+ * Copyright (C) 2004 by Intel Research Cambridge
+ *
+ * Author: Mark Williamson, mark.a.williamson@intel.com
+ * Author: Eric Liu, eric.e.liu@intel.com
+ * Date:   February 2008
+ */
+
+#include <time.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <errno.h>
+#include <signal.h>
+#include <inttypes.h>
+#include <string.h>
+#include <getopt.h>
+#include <assert.h>
+#include <sys/ioctl.h>
+
+#ifndef __user
+#define __user /* temporary, until installed via make headers_install */
+#endif
+#include <linux/kvm.h>
+
+#if defined(__i386__)
+#define mb()  __asm__ __volatile__ ( "lock; addl $0,0(%%esp)" : : : "memory" )
+#define rmb() __asm__ __volatile__ ( "lock; addl $0,0(%%esp)" : : : "memory" )
+#define wmb() __asm__ __volatile__ ( "" : : : "memory")
+#elif defined(__x86_64__)
+#define mb()  __asm__ __volatile__ ( "mfence" : : : "memory")
+#define rmb() __asm__ __volatile__ ( "lfence" : : : "memory")
+#define wmb() __asm__ __volatile__ ( "" : : : "memory")
+#elif defined(__ia64__)
+#define mb()   __asm__ __volatile__ ("mf" ::: "memory")
+#define rmb()  __asm__ __volatile__ ("mf" ::: "memory")
+#define wmb()  __asm__ __volatile__ ("mf" ::: "memory")
+#endif
+
+#define PERROR(_m, _a...)                                       \
+do {                                                            \
+    int __saved_errno = errno;                                  \
+    fprintf(stderr, "ERROR: " _m " (%d = %s)\n" , ## _a ,       \
+            __saved_errno, strerror(__saved_errno));            \
+    errno = __saved_errno;                                      \
+} while (0)
+
+#define POLL_SLEEP_MILLIS 100
+#define DEFAULT_TBUF_SIZE 20
+
+#define BITS_PER_LONG 32
+#define KVM_TRC_EXTRA_SHIFT 28
+#define KVM_TRC_CPU_CHG (KVM_TRC_GEN + 3)
+
+typedef struct settings_st {
+    char *outfile;
+    struct timespec poll_sleep;
+    uint32_t cpu_mask;
+    unsigned long tbuf_pages;
+    uint8_t discard:1;
+} settings_t;
+
+static settings_t opts;
+static int interrupted = 0;
+
+static void close_handler(int signal)
+{
+    interrupted = 1;
+}
+
+static void sigalrm_handler(int signal)
+{
+}
+
+static int cpu_in_mask(unsigned cpu, uint32_t cpu_mask)
+{
+	return ((1U << (cpu % BITS_PER_LONG)) & cpu_mask) != 0;
+}
+
+static struct timespec millis_to_timespec(unsigned long millis)
+{
+    struct timespec spec;
+    
+    spec.tv_sec = millis / 1000;
+    spec.tv_nsec = (millis % 1000) * 1000;
+
+    return spec;
+}
+
+static void write_buffer(unsigned int cpu, unsigned char *start, int size,
+                         int total_size, int outfd)
+{
+    size_t written = 0;
+    
+    if ( total_size != 0 )
+    {
+        struct {
+            uint32_t header;
+            struct {
+                unsigned cpu;
+                unsigned byte_count;
+            } extra;
+        } rec;
+
+        rec.header = KVM_TRC_CPU_CHG
+            | ((sizeof(rec.extra)/sizeof(uint32_t)) << KVM_TRC_EXTRA_SHIFT);
+        rec.extra.cpu = cpu;
+        rec.extra.byte_count = total_size;
+
+        written = write(outfd, &rec, sizeof(rec));
+
+        if ( written != sizeof(rec) )
+        {
+            fprintf(stderr, "Cannot write cpu change (write returned %zd)\n",
+                    written);
+            goto fail;
+        }
+    }
+
+    written = write(outfd, start, size);
+    if ( written != size )
+    {
+        fprintf(stderr, "Write failed! (size %d, returned %zd)\n",
+                size, written);
+        goto fail;
+    }
+
+    return;
+
+ fail:
+    PERROR("Failed to write trace data");
+    exit(EXIT_FAILURE);
+}
+
+static struct kvm_trace_buf **init_bufs_ptrs(unsigned long bufs_mapped, unsigned int num,
+                              unsigned long size)
+{
+    int i;
+    struct kvm_trace_buf **user_ptrs;
+
+    user_ptrs = (struct kvm_trace_buf **)calloc(num, sizeof(struct kvm_trace_buf *));
+    if ( user_ptrs == NULL )
+    {
+        PERROR( "Failed to allocate memory for buffer pointers\n");
+        exit(EXIT_FAILURE);
+    }
+    
+    for ( i = 0; i<num; i++ )
+        user_ptrs[i] = (struct kvm_trace_buf *)(bufs_mapped + size * i);
+
+    return user_ptrs;
+}
+
+static int monitor_tbufs(int kvm_fd, int outfd)
+{
+    int i, ret;
+
+    unsigned long tbufs_mapped; 
+    struct kvm_trace_buf **meta;
+
+    unsigned int  ncpus;
+    unsigned long pcpu_size;
+    unsigned long data_size;
+
+    struct kvm_trace_info t_info;
+
+    t_info.pcpu_pages = opts.tbuf_pages;
+    ret = ioctl(kvm_fd, KVM_ENABLE_TRACE, &t_info);    
+    if (ret == -1) {
+        PERROR("Failure to enable kvm trace");
+        exit(EXIT_FAILURE);
+    }
+
+    tbufs_mapped = t_info.userspace_addr;
+    ncpus = t_info.ncpus;
+    pcpu_size = t_info.pcpu_pages * sysconf(_SC_PAGESIZE);
+    data_size = pcpu_size - sizeof(struct kvm_trace_buf);
+    meta  = init_bufs_ptrs(tbufs_mapped, ncpus, pcpu_size);
+
+    if ( opts.discard )
+        for ( i = 0; i < ncpus; i++ )
+            meta[i]->cons = meta[i]->prod;
+
+    while ( !interrupted )
+    {
+        for ( i = 0;
+	     (i < ncpus) && cpu_in_mask(i, opts.cpu_mask) && !interrupted; i++ )
+        {
+            unsigned long start_offset, end_offset, window_size, cons, prod;
+            unsigned char *data;
+
+            cons = meta[i]->cons;
+            prod = meta[i]->prod;
+	    data = meta[i]->data;
+            rmb();
+
+            if ( cons == prod )
+                continue;
+            
+            assert(prod > cons);           
+
+            window_size = prod - cons;
+            start_offset = cons % data_size;
+            end_offset = prod % data_size;
+
+            if ( end_offset > start_offset )
+            {
+            	write_buffer(i, data + start_offset,
+                             window_size,
+                             window_size,
+                             outfd);
+            }
+            else
+            {
+               write_buffer(i, data + start_offset,
+                            data_size - start_offset,
+                            window_size,
+                            outfd);
+               write_buffer(i, data,
+                            end_offset,
+                            0, outfd);
+            }
+
+            mb();
+            meta[i]->cons = prod;
+        }
+        nanosleep(&opts.poll_sleep, NULL);
+    }
+
+    free(meta);
+    close(outfd);
+    /* it is necessary to disable trace */
+    ioctl(kvm_fd, KVM_DISABLE_TRACE, 0);    
+    return 0;
+}
+
+#define xstr(x) str(x)
+#define str(x) #x
+
+const char *program_version     = "kvmtrace v0.1";
+const char *program_bug_address = "<kvm-devel@lists.sourceforge.net>";
+
+void usage(void)
+{
+#define USAGE_STR \
+"Usage: kvmtrace [OPTION...] [output file]\n" \
+"Tool to capture kvm trace buffer data\n" \
+"\n" \
+"  -c, --cpu-mask=c        Set cpu-mask\n" \
+"  -s, --poll-sleep=p      Set sleep time, p, in milliseconds between\n" \
+"                          polling the trace buffer for new data\n" \
+"                          (default " xstr(POLL_SLEEP_MILLIS) ").\n" \
+"  -S, --trace-buf-size=N  Set trace buffer size in pages (default " \
+                           xstr(DEFAULT_TBUF_SIZE) ").\n" \
+"                          N.B. that the trace buffer cannot be resized.\n" \
+"                          if it has already been set this module cycle,\n" \
+"                          this argument will be ignored.\n" \
+"  -?, --help              Show this message\n" \
+"  -V, --version           Print program version\n" \
+"\n" \
+"This tool is used to capture trace buffer data from kvm. The\n" \
+"data is output in a binary format, in the following order:\n" \
+"\n" \
+"  CPU(uint) TSC(uint64_t) EVENT(uint32_t) D1 D2 D3 D4 D5 (all uint32_t)\n" \
+"\n" \
+"The output should be parsed using the tool kvmtrace_format,\n" \
+"which can produce human-readable output in ASCII format.\n" 
+
+    printf(USAGE_STR);
+    printf("\nReport bugs to %s\n", program_bug_address);
+
+    exit(EXIT_FAILURE);
+}
+
+long argtol(const char * arg, int base)
+{
+    char *endp;
+    long val;
+
+    errno = 0;
+    val = strtol(arg, &endp, base);
+    
+    if (errno != 0) {
+        fprintf(stderr, "Invalid option argument: %s\n", arg);
+        fprintf(stderr, "Error: %s\n\n", strerror(errno));
+        usage();
+    } else if (endp == arg || *endp != '\0') {
+        fprintf(stderr, "Invalid option argument: %s\n\n", arg);
+        usage();
+    }
+
+    return val;
+}
+
+void parse_args(int argc, char **argv)
+{
+    int option;
+    static struct option long_options[] = {
+        { "poll-sleep",     required_argument, 0, 's' },
+        { "cpu-mask",       required_argument, 0, 'c' },
+        { "trace-buf-size", required_argument, 0, 'S' },
+        { "help",           no_argument,       0, '?' },
+        { "version",        no_argument,       0, 'V' },
+        { 0, 0, 0, 0 }
+    };
+
+    while ( (option = getopt_long(argc, argv, "c:e:s:S:?V",
+                    long_options, NULL)) != -1) 
+    {
+        switch ( option )
+        {
+        case 's':
+            opts.poll_sleep = millis_to_timespec(argtol(optarg, 0));
+            break;
+
+        case 'c':
+            opts.cpu_mask = argtol(optarg, 0);
+            break;
+        
+        case 'S':
+            opts.tbuf_pages = argtol(optarg, 0);
+            break;
+
+        case 'V':
+            printf("%s\n", program_version);
+            exit(EXIT_SUCCESS);
+            break;
+            
+        default:
+            usage();
+        }
+    }
+
+    if (optind != (argc-1))
+        usage();
+
+    opts.outfile = argv[optind];
+}
+
+#ifndef O_LARGEFILE
+#define O_LARGEFILE     0
+#endif
+
+int main(int argc, char **argv)
+{
+    int fd;
+    int outfd = 1, ret;
+    struct sigaction act;
+
+    opts.outfile = 0;
+    opts.poll_sleep = millis_to_timespec(POLL_SLEEP_MILLIS);
+    opts.cpu_mask = ~0U;
+
+    parse_args(argc, argv);
+
+    if((fd = open("/dev/kvm", O_RDWR)) == -1)
+    {
+    	printf("can't open /dev/kvm \n");		
+    	return 0;
+    }	
+    
+    if ( opts.outfile )
+        outfd = open(opts.outfile,
+                     O_WRONLY | O_CREAT | O_TRUNC | O_LARGEFILE,
+                     0644);
+
+    if ( outfd < 0 )
+    {
+        perror("Could not open output file");
+        exit(EXIT_FAILURE);
+    }        
+
+    if ( isatty(outfd) )
+    {
+        fprintf(stderr, "Cannot output to a TTY, specify a log file.\n");
+        exit(EXIT_FAILURE);
+    }
+
+    act.sa_handler = close_handler;
+    act.sa_flags = 0;
+    sigemptyset(&act.sa_mask);
+    sigaction(SIGHUP,  &act, NULL);
+    sigaction(SIGTERM, &act, NULL);
+    sigaction(SIGINT,  &act, NULL);
+    if (signal(SIGALRM, sigalrm_handler) == SIG_ERR)
+	perror("can't catch SIGALRM\n");
+
+    ret = monitor_tbufs(fd, outfd);
+    close(fd);
+    return ret;
+}
-- 
1.5.1

