date:20120917

This adds a method to call init/exit functions similar to the kernel's init 
functions.

Signed-off-by: Sasha Levin levinsasha...@gmail.com
---
 tools/kvm/include/kvm/util-init.h | 51 +
 tools/kvm/util/init.c | 69 +++
 2 files changed, 120 insertions(+)
 create mode 100644 tools/kvm/include/kvm/util-init.h
 create mode 100644 tools/kvm/util/init.c

diff --git a/tools/kvm/include/kvm/util-init.h 
b/tools/kvm/include/kvm/util-init.h
new file mode 100644
index 000..beccb4e
--- /dev/null
+++ b/tools/kvm/include/kvm/util-init.h
@@ -0,0 +1,51 @@
+#ifndef KVM__UTIL_INIT_H
+#define KVM__UTIL_INIT_H
+
+struct kvm;
+
+struct init_item {
+   struct hlist_node n;
+   const char *fn_name;
+   int (*init)(struct kvm *);
+};
+
+int init_list__init(struct kvm *kvm);
+int init_list__exit(struct kvm *kvm);
+
+int init_list_add(struct init_item *t, int (*init)(struct kvm *),
+   int priority, const char *name);
+int exit_list_add(struct init_item *t, int (*init)(struct kvm *),
+   int priority, const char *name);
+
+#define __init_list_add(cb, l) \
+static void __attribute__ ((constructor)) __init__##cb(void)   \
+{  \
+   static char name[] = #cb;   \
+   static struct init_item t;  \
+   init_list_add(t, cb, l, name); \
+}
+
+#define __exit_list_add(cb, l) \
+static void __attribute__ ((constructor)) __init__##cb(void)   \
+{  \
+   static char name[] = #cb;   \
+   static struct init_item t;  \
+   exit_list_add(t, cb, l, name); \
+}
+
+#define core_init(cb) __init_list_add(cb, 0)
+#define base_init(cb) __init_list_add(cb, 2)
+#define dev_base_init(cb)  __init_list_add(cb, 4)
+#define dev_init(cb) __init_list_add(cb, 5)
+#define virtio_dev_init(cb) __init_list_add(cb, 6)
+#define firmware_init(cb) __init_list_add(cb, 7)
+#define late_init(cb) __init_list_add(cb, 9)
+
+#define core_exit(cb) __exit_list_add(cb, 0)
+#define base_exit(cb) __exit_list_add(cb, 2)
+#define dev_base_exit(cb) __exit_list_add(cb, 4)
+#define dev_exit(cb) __exit_list_add(cb, 5)
+#define virtio_dev_exit(cb) __exit_list_add(cb, 6)
+#define firmware_exit(cb) __exit_list_add(cb, 7)
+#define late_exit(cb) __exit_list_add(cb, 9)
+#endif
diff --git a/tools/kvm/util/init.c b/tools/kvm/util/init.c
new file mode 100644
index 000..b3face1
--- /dev/null
+++ b/tools/kvm/util/init.c
@@ -0,0 +1,69 @@
+#include linux/list.h
+#include linux/kernel.h
+
+#include kvm/kvm.h
+#include kvm/util-init.h
+
+#define PRIORITY_LISTS 10
+
+static struct hlist_head init_lists[PRIORITY_LISTS];
+static struct hlist_head exit_lists[PRIORITY_LISTS];
+
+int init_list_add(struct init_item *t, int (*init)(struct kvm *),
+   int priority, const char *name)
+{
+   t-init = init;
+   t-fn_name = name;
+   hlist_add_head(t-n, init_lists[priority]);
+
+   return 0;
+}
+
+int exit_list_add(struct init_item *t, int (*init)(struct kvm *),
+   int priority, const char *name)
+{
+   t-init = init;
+   t-fn_name = name;
+   hlist_add_head(t-n, exit_lists[priority]);
+
+   return 0;
+}
+
+int init_list__init(struct kvm *kvm)
+{
+   unsigned int i;
+   int r = 0;
+   struct hlist_node *n;
+   struct init_item *t;
+
+   for (i = 0; i  ARRAY_SIZE(init_lists); i++)
+   hlist_for_each_entry(t, n, init_lists[i], n) {
+   r = t-init(kvm);
+   if (r  0) {
+   pr_warning(Failed init: %s\n, t-fn_name);
+   goto fail;
+   }
+   }
+
+fail:
+   return r;
+}
+
+int init_list__exit(struct kvm *kvm)
+{
+   int i;
+   int r = 0;
+   struct hlist_node *n;
+   struct init_item *t;
+
+   for (i = ARRAY_SIZE(exit_lists) - 1; i = 0; i--)
+   hlist_for_each_entry(t, n, exit_lists[i], n) {
+   r = t-init(kvm);
+   if (r  0) {
+   pr_warning(%s failed.\n, t-fn_name);
+   goto fail;
+   }
+   }
+fail:
+   return r;
+}
-- 
1.7.12

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 1/8] kvm tools: add init/exit automatic calls

This adds a method to call init/exit functions similar to the kernel's init 
functions.

Signed-off-by: Sasha Levin levinsasha...@gmail.com
---
 tools/kvm/include/kvm/util-init.h | 51 +
 tools/kvm/util/init.c | 69 +++
 2 files changed, 120 insertions(+)
 create mode 100644 tools/kvm/include/kvm/util-init.h
 create mode 100644 tools/kvm/util/init.c

diff --git a/tools/kvm/include/kvm/util-init.h 
b/tools/kvm/include/kvm/util-init.h
new file mode 100644
index 000..beccb4e
--- /dev/null
+++ b/tools/kvm/include/kvm/util-init.h
@@ -0,0 +1,51 @@
+#ifndef KVM__UTIL_INIT_H
+#define KVM__UTIL_INIT_H
+
+struct kvm;
+
+struct init_item {
+   struct hlist_node n;
+   const char *fn_name;
+   int (*init)(struct kvm *);
+};
+
+int init_list__init(struct kvm *kvm);
+int init_list__exit(struct kvm *kvm);
+
+int init_list_add(struct init_item *t, int (*init)(struct kvm *),
+   int priority, const char *name);
+int exit_list_add(struct init_item *t, int (*init)(struct kvm *),
+   int priority, const char *name);
+
+#define __init_list_add(cb, l) \
+static void __attribute__ ((constructor)) __init__##cb(void)   \
+{  \
+   static char name[] = #cb;   \
+   static struct init_item t;  \
+   init_list_add(t, cb, l, name); \
+}
+
+#define __exit_list_add(cb, l) \
+static void __attribute__ ((constructor)) __init__##cb(void)   \
+{  \
+   static char name[] = #cb;   \
+   static struct init_item t;  \
+   exit_list_add(t, cb, l, name); \
+}
+
+#define core_init(cb) __init_list_add(cb, 0)
+#define base_init(cb) __init_list_add(cb, 2)
+#define dev_base_init(cb)  __init_list_add(cb, 4)
+#define dev_init(cb) __init_list_add(cb, 5)
+#define virtio_dev_init(cb) __init_list_add(cb, 6)
+#define firmware_init(cb) __init_list_add(cb, 7)
+#define late_init(cb) __init_list_add(cb, 9)
+
+#define core_exit(cb) __exit_list_add(cb, 0)
+#define base_exit(cb) __exit_list_add(cb, 2)
+#define dev_base_exit(cb) __exit_list_add(cb, 4)
+#define dev_exit(cb) __exit_list_add(cb, 5)
+#define virtio_dev_exit(cb) __exit_list_add(cb, 6)
+#define firmware_exit(cb) __exit_list_add(cb, 7)
+#define late_exit(cb) __exit_list_add(cb, 9)
+#endif
diff --git a/tools/kvm/util/init.c b/tools/kvm/util/init.c
new file mode 100644
index 000..b3face1
--- /dev/null
+++ b/tools/kvm/util/init.c
@@ -0,0 +1,69 @@
+#include linux/list.h
+#include linux/kernel.h
+
+#include kvm/kvm.h
+#include kvm/util-init.h
+
+#define PRIORITY_LISTS 10
+
+static struct hlist_head init_lists[PRIORITY_LISTS];
+static struct hlist_head exit_lists[PRIORITY_LISTS];
+
+int init_list_add(struct init_item *t, int (*init)(struct kvm *),
+   int priority, const char *name)
+{
+   t-init = init;
+   t-fn_name = name;
+   hlist_add_head(t-n, init_lists[priority]);
+
+   return 0;
+}
+
+int exit_list_add(struct init_item *t, int (*init)(struct kvm *),
+   int priority, const char *name)
+{
+   t-init = init;
+   t-fn_name = name;
+   hlist_add_head(t-n, exit_lists[priority]);
+
+   return 0;
+}
+
+int init_list__init(struct kvm *kvm)
+{
+   unsigned int i;
+   int r = 0;
+   struct hlist_node *n;
+   struct init_item *t;
+
+   for (i = 0; i  ARRAY_SIZE(init_lists); i++)
+   hlist_for_each_entry(t, n, init_lists[i], n) {
+   r = t-init(kvm);
+   if (r  0) {
+   pr_warning(Failed init: %s\n, t-fn_name);
+   goto fail;
+   }
+   }
+
+fail:
+   return r;
+}
+
+int init_list__exit(struct kvm *kvm)
+{
+   int i;
+   int r = 0;
+   struct hlist_node *n;
+   struct init_item *t;
+
+   for (i = ARRAY_SIZE(exit_lists) - 1; i = 0; i--)
+   hlist_for_each_entry(t, n, exit_lists[i], n) {
+   r = t-init(kvm);
+   if (r  0) {
+   pr_warning(%s failed.\n, t-fn_name);
+   goto fail;
+   }
+   }
+fail:
+   return r;
+}
-- 
1.7.12

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 3/8] kvm tools: pass kvm ptr directly to timer injection

This will help us get rid of the global kvm object.

Signed-off-by: Sasha Levin levinsasha...@gmail.com
---
 tools/kvm/builtin-run.c | 10 --
 tools/kvm/kvm.c |  1 +
 2 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/tools/kvm/builtin-run.c b/tools/kvm/builtin-run.c
index bca9122..ee59d1c 100644
--- a/tools/kvm/builtin-run.c
+++ b/tools/kvm/builtin-run.c
@@ -163,8 +163,10 @@ void kvm_run_set_wrapper_sandbox(void)
OPT_END()   \
};
 
-static void handle_sigalrm(int sig)
+static void handle_sigalrm(int sig, siginfo_t *si, void *uc)
 {
+   struct kvm *kvm = si-si_value.sival_ptr;
+
kvm__arch_periodic_poll(kvm);
 }
 
@@ -476,12 +478,16 @@ static int kvm_cmd_run_init(int argc, const char **argv)
 {
static char real_cmdline[2048], default_name[20];
unsigned int nr_online_cpus;
+   struct sigaction sa;
 
kvm = kvm__new();
if (IS_ERR(kvm))
return PTR_ERR(kvm);
 
-   signal(SIGALRM, handle_sigalrm);
+   sa.sa_flags = SA_SIGINFO;
+   sa.sa_sigaction = handle_sigalrm;
+   sigemptyset(sa.sa_mask);
+   sigaction(SIGALRM, sa, NULL);
 
nr_online_cpus = sysconf(_SC_NPROCESSORS_ONLN);
kvm-cfg.custom_rootfs_name = default;
diff --git a/tools/kvm/kvm.c b/tools/kvm/kvm.c
index 20322a3..d107931 100644
--- a/tools/kvm/kvm.c
+++ b/tools/kvm/kvm.c
@@ -358,6 +358,7 @@ int kvm_timer__init(struct kvm *kvm)
sev.sigev_value.sival_int   = 0;
sev.sigev_notify= SIGEV_THREAD_ID;
sev.sigev_signo = SIGALRM;
+   sev.sigev_value.sival_ptr   = kvm;
sev._sigev_un._tid  = syscall(__NR_gettid);
 
r = timer_create(CLOCK_REALTIME, sev, kvm-timerid);
-- 
1.7.12

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 2/8] kvm tools: use init/exit where possible

Switch to using init/exit calls instead of the repeating call blocks in 
builtin-run.

Signed-off-by: Sasha Levin levinsasha...@gmail.com
---
 tools/kvm/Makefile  |   1 +
 tools/kvm/builtin-run.c | 246 +---
 tools/kvm/disk/core.c   |   2 +
 tools/kvm/framebuffer.c |   3 +
 tools/kvm/hw/i8042.c|   1 +
 tools/kvm/hw/pci-shmem.c|   2 +
 tools/kvm/hw/rtc.c  |   2 +
 tools/kvm/hw/serial.c   |   2 +
 tools/kvm/include/kvm/kvm.h |   2 +
 tools/kvm/ioeventfd.c   |   2 +
 tools/kvm/ioport.c  |   2 +
 tools/kvm/kvm-cpu.c |   2 +
 tools/kvm/kvm-ipc.c |   2 +
 tools/kvm/kvm.c |   4 +
 tools/kvm/pci.c |   2 +
 tools/kvm/symbol.c  |   4 +-
 tools/kvm/term.c|   2 +
 tools/kvm/ui/sdl.c  |   7 +-
 tools/kvm/ui/vnc.c  |   7 +-
 tools/kvm/util/threadpool.c |   3 +
 tools/kvm/virtio/9p.c   |   1 +
 tools/kvm/virtio/balloon.c  |   2 +
 tools/kvm/virtio/blk.c  |   2 +
 tools/kvm/virtio/console.c  |   2 +
 tools/kvm/virtio/net.c  |   2 +
 tools/kvm/virtio/rng.c  |   2 +
 tools/kvm/virtio/scsi.c |   2 +
 tools/kvm/x86/irq.c |   2 +
 28 files changed, 66 insertions(+), 247 deletions(-)

diff --git a/tools/kvm/Makefile b/tools/kvm/Makefile
index efa3d4f..862e76b 100644
--- a/tools/kvm/Makefile
+++ b/tools/kvm/Makefile
@@ -84,6 +84,7 @@ OBJS  += net/uip/buf.o
 OBJS   += net/uip/csum.o
 OBJS   += net/uip/dhcp.o
 OBJS   += kvm-cmd.o
+OBJS   += util/init.o
 OBJS   += util/rbtree.o
 OBJS   += util/threadpool.o
 OBJS   += util/parse-options.o
diff --git a/tools/kvm/builtin-run.c b/tools/kvm/builtin-run.c
index 5ddffaa..bca9122 100644
--- a/tools/kvm/builtin-run.c
+++ b/tools/kvm/builtin-run.c
@@ -476,7 +476,6 @@ static int kvm_cmd_run_init(int argc, const char **argv)
 {
static char real_cmdline[2048], default_name[20];
unsigned int nr_online_cpus;
-   int r;
 
kvm = kvm__new();
if (IS_ERR(kvm))
@@ -646,160 +645,7 @@ static int kvm_cmd_run_init(int argc, const char **argv)
printf(  # %s run -k %s -m %Lu -c %d --name %s\n, KVM_BINARY_NAME,
kvm-cfg.kernel_filename, kvm-cfg.ram_size / 1024 / 1024, 
kvm-cfg.nrcpus, kvm-cfg.guest_name);
 
-   r = kvm__init(kvm);
-   if (r)
-   goto fail;
-
-   r = term_init(kvm);
-   if (r  0) {
-   pr_err(term_init() failed with error %d\n, r);
-   goto fail;
-   }
-
-
-   r = ioeventfd__init(kvm);
-   if (r  0) {
-   pr_err(ioeventfd__init() failed with error %d\n, r);
-   goto fail;
-   }
-
-   r = kvm_cpu__init(kvm);
-   if (r  0) {
-   pr_err(kvm_cpu__init() failed with error %d\n, r);
-   goto fail;
-   }
-
-   r = irq__init(kvm);
-   if (r  0) {
-   pr_err(irq__init() failed with error %d\n, r);
-   goto fail;
-   }
-
-   r = pci__init(kvm);
-   if (r  0) {
-   pr_err(pci__init() failed with error %d\n, r);
-   goto fail;
-   }
-
-   r = ioport__init(kvm);
-   if (r  0) {
-   pr_err(ioport__init() failed with error %d\n, r);
-   goto fail;
-   }
-
-   r = disk_image__init(kvm);
-   if (r  0) {
-   pr_err(disk_image__init() failed with error %d\n, r);
-   goto fail;
-   }
-
-   r = symbol_init(kvm);
-   if (r  0)
-   pr_debug(symbol_init() failed with error %d\n, r);
-
-   r = rtc__init(kvm);
-   if (r  0) {
-   pr_err(rtc__init() failed with error %d\n, r);
-   goto fail;
-   }
-
-   r = serial8250__init(kvm);
-   if (r  0) {
-   pr_err(serial__init() failed with error %d\n, r);
-   goto fail;
-   }
-
-   r = virtio_blk__init(kvm);
-   if (r  0) {
-   pr_err(virtio_blk__init() failed with error %d\n, r);
-   goto fail;
-   }
-
-   r = virtio_scsi_init(kvm);
-   if (r  0) {
-   pr_err(virtio_scsi_init() failed with error %d\n, r);
-   goto fail;
-   }
-
-   r = virtio_console__init(kvm);
-   if (r  0) {
-   pr_err(virtio_console__init() failed with error %d\n, r);
-   goto fail;
-   }
-
-   r = virtio_rng__init(kvm);
-   if (r  0) {
-   pr_err(virtio_rng__init() failed with error %d\n, r);
-   goto fail;
-   }
-
-   r = virtio_bln__init(kvm);
-   if (r  0) {
-   pr_err(virtio_rng__init() failed with error %d\n, r);
-   goto fail;
-   }
-
-   r = virtio_9p__init(kvm);
-   if (r  0) {
-   pr_err(virtio_9p__init() failed with error %d\n, r);
-   goto fail;
-   }
-
-   r = virtio_net__init(kvm);
-   if (r  0) {
-   pr_err(virtio_net__init() failed with error

[PATCH 4/8] kvm tools: remove global kvm object

This was ugly, and now we get rid of it.

Signed-off-by: Sasha Levin levinsasha...@gmail.com
---
 tools/kvm/builtin-run.c| 48 +++---
 tools/kvm/hw/i8042.c   |  4 ++--
 tools/kvm/hw/pci-shmem.c   |  2 +-
 tools/kvm/hw/rtc.c | 10 -
 tools/kvm/hw/serial.c  |  8 +++
 tools/kvm/hw/vesa.c|  2 +-
 tools/kvm/include/kvm/brlock.h | 16 +++---
 tools/kvm/include/kvm/ioport.h |  7 +++---
 tools/kvm/include/kvm/kvm.h|  4 ++--
 tools/kvm/include/kvm/term.h   |  4 ++--
 tools/kvm/ioport.c | 16 +++---
 tools/kvm/kvm-cpu.c|  2 +-
 tools/kvm/kvm-ipc.c|  4 ++--
 tools/kvm/kvm.c|  6 ++
 tools/kvm/mmio.c   | 10 -
 tools/kvm/pci.c| 10 -
 tools/kvm/term.c   |  7 +++---
 tools/kvm/virtio/balloon.c |  5 ++---
 tools/kvm/virtio/console.c |  2 +-
 tools/kvm/virtio/net.c | 12 +--
 tools/kvm/virtio/pci.c |  6 +++---
 tools/kvm/x86/ioport.c | 20 +-
 22 files changed, 102 insertions(+), 103 deletions(-)

diff --git a/tools/kvm/builtin-run.c b/tools/kvm/builtin-run.c
index ee59d1c..1b046dc 100644
--- a/tools/kvm/builtin-run.c
+++ b/tools/kvm/builtin-run.c
@@ -53,7 +53,6 @@
 #define KB_SHIFT   (10)
 #define GB_SHIFT   (30)
 
-struct kvm *kvm;
 __thread struct kvm_cpu *current_kvm_cpu;
 
 static int  kvm_run_wrapper;
@@ -339,11 +338,13 @@ static const char *find_vmlinux(void)
 
 void kvm_run_help(void)
 {
+   struct kvm *kvm = NULL;
+
BUILD_OPTIONS(options, kvm-cfg, kvm);
usage_with_options(run_usage, options);
 }
 
-static int kvm_setup_guest_init(void)
+static int kvm_setup_guest_init(struct kvm *kvm)
 {
const char *rootfs = kvm-cfg.custom_rootfs_name;
char tmp[PATH_MAX];
@@ -367,7 +368,7 @@ static int kvm_setup_guest_init(void)
return 0;
 }
 
-static int kvm_run_set_sandbox(void)
+static int kvm_run_set_sandbox(struct kvm *kvm)
 {
const char *guestfs_name = kvm-cfg.custom_rootfs_name;
char path[PATH_MAX], script[PATH_MAX], *tmp;
@@ -439,7 +440,7 @@ static void resolve_program(const char *src, char *dst, 
size_t len)
strncpy(dst, src, len);
 }
 
-static void kvm_run_write_sandbox_cmd(const char **argv, int argc)
+static void kvm_run_write_sandbox_cmd(struct kvm *kvm, const char **argv, int 
argc)
 {
const char script_hdr[] = #! /bin/bash\n\n;
char program[PATH_MAX];
@@ -474,15 +475,15 @@ static void kvm_run_write_sandbox_cmd(const char **argv, 
int argc)
close(fd);
 }
 
-static int kvm_cmd_run_init(int argc, const char **argv)
+static struct kvm *kvm_cmd_run_init(int argc, const char **argv)
 {
static char real_cmdline[2048], default_name[20];
unsigned int nr_online_cpus;
struct sigaction sa;
+   struct kvm *kvm = kvm__new();
 
-   kvm = kvm__new();
if (IS_ERR(kvm))
-   return PTR_ERR(kvm);
+   return kvm;
 
sa.sa_flags = SA_SIGINFO;
sa.sa_sigaction = handle_sigalrm;
@@ -502,7 +503,7 @@ static int kvm_cmd_run_init(int argc, const char **argv)
if (strcmp(argv[0], --) == 0) {
if (kvm_run_wrapper == KVM_RUN_SANDBOX) {
kvm-cfg.sandbox = 
DEFAULT_SANDBOX_FILENAME;
-   kvm_run_write_sandbox_cmd(argv+1, 
argc-1);
+   kvm_run_write_sandbox_cmd(kvm, argv+1, 
argc-1);
break;
}
}
@@ -513,7 +514,7 @@ static int kvm_cmd_run_init(int argc, const char **argv)
%s\n, argv[0]);
usage_with_options(run_usage, options);
free(kvm);
-   return -EINVAL;
+   return ERR_PTR(-EINVAL);
}
if (kvm_run_wrapper == KVM_RUN_SANDBOX) {
/*
@@ -521,7 +522,7 @@ static int kvm_cmd_run_init(int argc, const char **argv)
 * sandbox command
 */
kvm-cfg.sandbox = DEFAULT_SANDBOX_FILENAME;
-   kvm_run_write_sandbox_cmd(argv, argc);
+   kvm_run_write_sandbox_cmd(kvm, argv, argc);
} else {
/*
 * first unhandled parameter is treated as a 
kernel
@@ -542,7 +543,7 @@ static int kvm_cmd_run_init(int argc, const char **argv)
 
if (!kvm-cfg.kernel_filename) {
kernel_usage_with_options();
-   return -EINVAL;
+   return

[PATCH 5/8] kvm tools: initialize the threadpool job iterator before using

This would fix a bug where the exit function of the threadpool would hang
if no jobs were processed yet and a request to exit was received.

Signed-off-by: Sasha Levin levinsasha...@gmail.com
---
 tools/kvm/util/threadpool.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/kvm/util/threadpool.c b/tools/kvm/util/threadpool.c
index 24e2344..85ac7e7 100644
--- a/tools/kvm/util/threadpool.c
+++ b/tools/kvm/util/threadpool.c
@@ -79,7 +79,7 @@ static void *thread_pool__threadfunc(void *param)
pthread_cleanup_push(thread_pool__threadfunc_cleanup, NULL);
 
while (running) {
-   struct thread_pool__job *curjob;
+   struct thread_pool__job *curjob = NULL;
 
mutex_lock(job_mutex);
while (running  (curjob = thread_pool__job_pop_locked()) == 
NULL)
-- 
1.7.12

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 6/8] kvm tools: don't exit on debug ioport write

While it shouldn't happen on regular guests, we sometimes hit it when fuzzing
within the guest, which would cause the lkvm process to exit - which is
undesired.

Signed-off-by: Sasha Levin levinsasha...@gmail.com
---
 tools/kvm/x86/ioport.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/kvm/x86/ioport.c b/tools/kvm/x86/ioport.c
index 4993f9d..e35d0ee 100644
--- a/tools/kvm/x86/ioport.c
+++ b/tools/kvm/x86/ioport.c
@@ -5,7 +5,7 @@
 
 static bool debug_io_out(struct ioport *ioport, struct kvm *kvm, u16 port, 
void *data, int size)
 {
-   exit(EXIT_SUCCESS);
+   return 0;
 }
 
 static struct ioport_operations debug_ops = {
-- 
1.7.12

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 7/8] kvm tools: fix SMP

We accidently broke SMP when we moved mptable init to before we initialize the 
vcpu
count, that means that we always built smptable which was not properly 
initialized
for the given configuration.

Instead of initializing mptable as part of the kvm arch initialization, let it
be initialized on it's own in the firmware initialization level.

Signed-off-by: Sasha Levin levinsasha...@gmail.com
---
 tools/kvm/x86/kvm.c | 14 ++
 tools/kvm/x86/mptable.c |  2 ++
 2 files changed, 4 insertions(+), 12 deletions(-)

diff --git a/tools/kvm/x86/kvm.c b/tools/kvm/x86/kvm.c
index e636d43..ecada45 100644
--- a/tools/kvm/x86/kvm.c
+++ b/tools/kvm/x86/kvm.c
@@ -342,27 +342,17 @@ bool load_bzimage(struct kvm *kvm, int fd_kernel,
  */
 int kvm__arch_setup_firmware(struct kvm *kvm)
 {
-   int r;
-
/* standart minimal configuration */
setup_bios(kvm);
 
/* FIXME: SMP, ACPI and friends here */
 
-   /* MP table */
-   r = mptable__init(kvm);
-
-   return r;
+   return 0;
 }
 
 int kvm__arch_free_firmware(struct kvm *kvm)
 {
-   int r;
-
-   /* MP table */
-   r = mptable__exit(kvm);
-
-   return r;
+   return 0;
 }
 
 void kvm__arch_periodic_poll(struct kvm *kvm)
diff --git a/tools/kvm/x86/mptable.c b/tools/kvm/x86/mptable.c
index 12bdcf8..ea8c6e8 100644
--- a/tools/kvm/x86/mptable.c
+++ b/tools/kvm/x86/mptable.c
@@ -280,8 +280,10 @@ int mptable__init(struct kvm *kvm)
 
return 0;
 }
+firmware_init(mptable__init);
 
 int mptable__exit(struct kvm *kvm)
 {
return 0;
 }
+firmware_exit(mptable__exit);
-- 
1.7.12

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 8/8] kvm tools: fix build optimization

I've accidently changed optimization level to -O0 when testing one of the
patches and commited that. Revert it back to -O2.

Signed-off-by: Sasha Levin levinsasha...@gmail.com
---
 tools/kvm/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/kvm/Makefile b/tools/kvm/Makefile
index 862e76b..d4b5eb3 100644
--- a/tools/kvm/Makefile
+++ b/tools/kvm/Makefile
@@ -244,7 +244,7 @@ DEFINES += -DKVMTOOLS_VERSION='$(KVMTOOLS_VERSION)'
 DEFINES+= -DBUILD_ARCH='$(ARCH)'
 
 KVM_INCLUDE := include
-CFLAGS += $(CPPFLAGS) $(DEFINES) -I$(KVM_INCLUDE) -I$(ARCH_INCLUDE) 
-I$(KINCL_PATH)/include -I$(KINCL_PATH)/arch/$(ARCH)/include/ -O0 
-fno-strict-aliasing -g -flto
+CFLAGS += $(CPPFLAGS) $(DEFINES) -I$(KVM_INCLUDE) -I$(ARCH_INCLUDE) 
-I$(KINCL_PATH)/include -I$(KINCL_PATH)/arch/$(ARCH)/include/ -O2 
-fno-strict-aliasing -g -flto
 
 WARNINGS += -Wall
 WARNINGS += -Wcast-align
-- 
1.7.12

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [RFC][PATCH] Improving directed yield scalability for PLE handler

2012-09-17 Thread Andrew Jones

On Fri, Sep 14, 2012 at 04:34:24PM -0400, Konrad Rzeszutek Wilk wrote:
  The concern I have is that even though we have gone through changes to
  help reduce the candidate vcpus we yield to, we still have a very poor
  idea of which vcpu really needs to run.  The result is high cpu usage in
  the get_pid_task and still some contention in the double runqueue lock.
  To make this scalable, we either need to significantly reduce the
  occurrence of the lock-holder preemption, or do a much better job of
  knowing which vcpu needs to run (and not unnecessarily yielding to vcpus
  which do not need to run).
 
 The patches that Raghavendra  has been posting do accomplish that.
 
  On reducing the occurrence:  The worst case for lock-holder preemption
  is having vcpus of same VM on the same runqueue.  This guarantees the
  situation of 1 vcpu running while another [of the same VM] is not.  To
  prove the point, I ran the same test, but with vcpus restricted to a
  range of host cpus, such that any single VM's vcpus can never be on the
  same runqueue.  In this case, all 10 VMs' vcpu-0's are on host cpus 0-4,
  vcpu-1's are on host cpus 5-9, and so on.  Here is the result:
 
  kvm_cpu_spin, and all
  yield_to changes, plus
  restricted vcpu placement:  8823 +/- 3.20%   much, much better
 
  On picking a better vcpu to yield to:  I really hesitate to rely on
  paravirt hint [telling us which vcpu is holding a lock], but I am not
  sure how else to reduce the candidate vcpus to yield to.  I suspect we
  are yielding to way more vcpus than are prempted lock-holders, and that
  IMO is just work accomplishing nothing.  Trying to think of way to
  further reduce candidate vcpus
 
 ... the patches are posted -  you could try them out?

Radim and I have done some testing with the pvticketlock series. While we
saw a gain over PLE alone, it wasn't huge, and without PLE also enabled it
could hardly support 2.0x overcommit. spinlocks aren't the only place
where cpu_relax() is called within a relatively tight loop, so it's likely
that PLE yielding just generally helps by getting schedule() called more
frequently.

Drew
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [RFC][PATCH] Improving directed yield scalability for PLE handler

2012-09-17 Thread Andrew Jones

On Sun, Sep 16, 2012 at 11:55:28AM +0300, Avi Kivity wrote:
 On 09/14/2012 12:30 AM, Andrew Theurer wrote:
 
  The concern I have is that even though we have gone through changes to
  help reduce the candidate vcpus we yield to, we still have a very poor
  idea of which vcpu really needs to run.  The result is high cpu usage in
  the get_pid_task and still some contention in the double runqueue lock.
  To make this scalable, we either need to significantly reduce the
  occurrence of the lock-holder preemption, or do a much better job of
  knowing which vcpu needs to run (and not unnecessarily yielding to vcpus
  which do not need to run).
  
  On reducing the occurrence:  The worst case for lock-holder preemption
  is having vcpus of same VM on the same runqueue.  This guarantees the
  situation of 1 vcpu running while another [of the same VM] is not.  To
  prove the point, I ran the same test, but with vcpus restricted to a
  range of host cpus, such that any single VM's vcpus can never be on the
  same runqueue.  In this case, all 10 VMs' vcpu-0's are on host cpus 0-4,
  vcpu-1's are on host cpus 5-9, and so on.  Here is the result:
  
  kvm_cpu_spin, and all
  yield_to changes, plus
  restricted vcpu placement:  8823 +/- 3.20%   much, much better
  
  On picking a better vcpu to yield to:  I really hesitate to rely on
  paravirt hint [telling us which vcpu is holding a lock], but I am not
  sure how else to reduce the candidate vcpus to yield to.  I suspect we
  are yielding to way more vcpus than are prempted lock-holders, and that
  IMO is just work accomplishing nothing.  Trying to think of way to
  further reduce candidate vcpus
 
 I wouldn't say that yielding to the wrong vcpu accomplishes nothing.
 That other vcpu gets work done (unless it is in pause loop itself) and
 the yielding vcpu gets put to sleep for a while, so it doesn't spend
 cycles spinning.  While we haven't fixed the problem at least the guest
 is accomplishing work, and meanwhile the real lock holder may get
 naturally scheduled and clear the lock.
 
 The main problem with this theory is that the experiments don't seem to
 bear it out.  So maybe one of the assumptions is wrong - the yielding
 vcpu gets scheduled early.  That could be the case if the two vcpus are
 on different runqueues - you could be changing the relative priority of
 vcpus on the target runqueue, but still remain on top yourself.  Is this
 possible with the current code?
 
 Maybe we should prefer vcpus on the same runqueue as yield_to targets,
 and only fall back to remote vcpus when we see it didn't help.

I thought about this a bit recently too, but didn't pursue it, because I
figured it would actually increase the get_pid_task and double_rq_lock
contention time if we have to hunt too long for a vcpu that matches a more
strict criteria. But, I guess if we can implement a special reschedule
to run on the current cpu which prioritizes runnable/non-running vcpus,
then it should be just as fast or faster for it to look through the
runqueue first, than it is to look through all the vcpus first.

Drew

 
 Let's examine a few cases:
 
 1. spinner on cpu 0, lock holder on cpu 0
 
 win!
 
 2. spinner on cpu 0, random vcpu(s) (or normal processes) on cpu 0
 
 Spinner gets put to sleep, random vcpus get to work, low lock contention
 (no double_rq_lock), by the time spinner gets scheduled we might have won
 
 3. spinner on cpu 0, another spinner on cpu 0
 
 Worst case, we'll just spin some more.  Need to detect this case and
 migrate something in.
 
 4. spinner on cpu 0, alone
 
 Similar
 
 
 It seems we need to tie in to the load balancer.
 
 Would changing the priority of the task while it is spinning help the
 load balancer?
 
 -- 
 error compiling committee.c: too many arguments to function
 --
 To unsubscribe from this list: send the line unsubscribe kvm in
 the body of a message to majord...@vger.kernel.org
 More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH v8 0/3] KVM: perf: kvm events analysis tool

From: Xiao Guangrong xiaoguangr...@linux.vnet.ibm.com

Changelog:
the changes from Avi's comments:
- move the definition of x86 fault vector to asm/kvm.h
- drop the second patch which introduced new tracpoints

the changes from David's comments:
- use scnprintf instead of snprintf
- drop exclusive check for '-a' and '-p', after this, we should
  append -a to track all guest, -p to track the specified guest
- improve the help usage
- fix a possible memory leak
- some cleanups

the changes from Andrew Jones's comments:
- move stat related code to util/stat.c

Thank you very much for your patience!

This patchset introduces a perf-based tool (perf kvm stat record/report)
which can analyze kvm events more smartly. Below is the presentation slice
on 2012 Japan LinuxCon:
http://events.linuxfoundation.org/images/stories/pdf/lcjp2012_guangrong.pdf
You can get more details from it. If any questions/comments, please feel free
to let us know.

This patchset is based on Arnaldo's git tree perf/core branch.

Usage:
- kvm stat
  run a command and gather performance counter statistics, it is the alias of
  perf stat

- trace kvm events:
  perf kvm stat record, or, if other tracepoints are interesting as well, we
  can append the events like this:
  perf kvm stat record -e timer:* -a

  If many guests are running, we can track the specified guest by using -p or
  --pid. -a is used to track events generated by all guests.

- show the result:
  perf kvm stat report

The output example is following:
# pgrep qemu
13005
13059

total 2 guests are running on the host

Then, track the guest whose pid is 13059:
# ./perf kvm stat record -p 13059
^C[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.253 MB perf.data.guest (~11065 samples) ]

See the vmexit events:
# ./perf kvm stat report --event=vmexit


Analyze events for all VCPUs:

 VM-EXITSamples  Samples% Time% Avg time

 APIC_ACCESS46070.55% 0.01% 22.44us ( +-   1.75% )
 HLT 9314.26%99.98% 832077.26us ( +-  10.42% )
  EXTERNAL_INTERRUPT 64 9.82% 0.00% 35.35us ( +-  14.21% )
   PENDING_INTERRUPT 24 3.68% 0.00%  9.29us ( +-  31.39% )
   CR_ACCESS  7 1.07% 0.00%  8.12us ( +-   5.76% )
  IO_INSTRUCTION  3 0.46% 0.00% 18.00us ( +-  11.79% )
   EXCEPTION_NMI  1 0.15% 0.00%  5.83us ( +-   -nan% )

Total Samples:652, Total events handled time:77396109.80us.

See the mmio events:
# ./perf kvm stat report --event=mmio


Analyze events for all VCPUs:

 MMIO AccessSamples  Samples% Time% Avg time

0xfee00380:W38784.31%79.28%  8.29us ( +-   3.32% )
0xfee00300:W 24 5.23% 9.96% 16.79us ( +-   1.97% )
0xfee00300:R 24 5.23% 7.83% 13.20us ( +-   3.00% )
0xfee00310:W 24 5.23% 2.93%  4.94us ( +-   3.84% )

Total Samples:459, Total events handled time:4044.59us.

See the ioport event:
# ./perf kvm stat report --event=ioport


Analyze events for all VCPUs:

  IO Port AccessSamples  Samples% Time% Avg time

 0xc050:POUT  3   100.00%   100.00% 13.75us ( +-  10.83% )

Total Samples:3, Total events handled time:41.26us.

And, --vcpu is used to track the specified vcpu and --key is used to sort the
result:
# ./perf kvm stat report --event=vmexit --vcpu=0 --key=time


Analyze events for VCPU 0:

 VM-EXITSamples  Samples% Time% Avg time

 HLT 2713.85%99.97% 405790.24us ( +-  12.70% )
  EXTERNAL_INTERRUPT 13 6.67% 0.00% 27.94us ( +-  22.26% )
 APIC_ACCESS14674.87% 0.03% 21.69us ( +-   2.91% )
  IO_INSTRUCTION  2 1.03% 0.00% 17.77us ( +-  20.56% )
   CR_ACCESS  2 1.03% 0.00%  8.55us ( +-   6.47% )
   PENDING_INTERRUPT  5 2.56% 0.00%  6.27us ( +-   3.94% )

Total Samples:195, Total events handled time:10959950.90us.


Dong Hao (3):
  KVM: x86: export svm/vmx exit code and vector code to userspace
  perf: move stat related code to util/stat.c
  KVM: perf: kvm events analysis tool

 arch/x86/include/asm/kvm.h|   16 +
 arch/x86/include/asm/kvm_host.h   |   16 -
 arch/x86/include/asm/svm.h|  205 +---
 arch/x86/include/asm/vmx.h|  127 --
 arch/x86/kvm/trace.h  |   89 
 tools/perf/Documentation/perf-kvm.txt |   30 ++-
 tools/perf/MANIFEST   |3 +
 tools/perf/Makefile   |1 +
 tools/perf/builtin-kvm.c  |  840 -
 tools/perf/builtin-stat.c |   56 +---
 tools/perf/util/header.c  |   59 +++-
 tools/perf/util/header.h  |1 +
 tools/perf/util/stat.c

[PATCH v8 1/3] KVM: x86: export svm/vmx exit code and vector code to userspace

From: Xiao Guangrong xiaoguangr...@linux.vnet.ibm.com

Exporting KVM exit information to userspace to be consumed by perf.

[ Dong Hao haod...@linux.vnet.ibm.com: rebase it on acme's git tree ]
Signed-off-by: Dong Hao haod...@linux.vnet.ibm.com
Signed-off-by: Xiao Guangrong xiaoguangr...@linux.vnet.ibm.com
---
 arch/x86/include/asm/kvm.h  |   16 +++
 arch/x86/include/asm/kvm_host.h |   16 ---
 arch/x86/include/asm/svm.h  |  205 +--
 arch/x86/include/asm/vmx.h  |  127 
 arch/x86/kvm/trace.h|   89 -
 5 files changed, 230 insertions(+), 223 deletions(-)

diff --git a/arch/x86/include/asm/kvm.h b/arch/x86/include/asm/kvm.h
index 246617e..41e08cb 100644
--- a/arch/x86/include/asm/kvm.h
+++ b/arch/x86/include/asm/kvm.h
@@ -9,6 +9,22 @@
 #include linux/types.h
 #include linux/ioctl.h
 
+#define DE_VECTOR 0
+#define DB_VECTOR 1
+#define BP_VECTOR 3
+#define OF_VECTOR 4
+#define BR_VECTOR 5
+#define UD_VECTOR 6
+#define NM_VECTOR 7
+#define DF_VECTOR 8
+#define TS_VECTOR 10
+#define NP_VECTOR 11
+#define SS_VECTOR 12
+#define GP_VECTOR 13
+#define PF_VECTOR 14
+#define MF_VECTOR 16
+#define MC_VECTOR 18
+
 /* Select x86 specific features in linux/kvm.h */
 #define __KVM_HAVE_PIT
 #define __KVM_HAVE_IOAPIC
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 09155d6..1eaa6b0 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -75,22 +75,6 @@
 #define KVM_HPAGE_MASK(x)  (~(KVM_HPAGE_SIZE(x) - 1))
 #define KVM_PAGES_PER_HPAGE(x) (KVM_HPAGE_SIZE(x) / PAGE_SIZE)
 
-#define DE_VECTOR 0
-#define DB_VECTOR 1
-#define BP_VECTOR 3
-#define OF_VECTOR 4
-#define BR_VECTOR 5
-#define UD_VECTOR 6
-#define NM_VECTOR 7
-#define DF_VECTOR 8
-#define TS_VECTOR 10
-#define NP_VECTOR 11
-#define SS_VECTOR 12
-#define GP_VECTOR 13
-#define PF_VECTOR 14
-#define MF_VECTOR 16
-#define MC_VECTOR 18
-
 #define SELECTOR_TI_MASK (1  2)
 #define SELECTOR_RPL_MASK 0x03
 
diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h
index f2b83bc..cdf5674 100644
--- a/arch/x86/include/asm/svm.h
+++ b/arch/x86/include/asm/svm.h
@@ -1,6 +1,135 @@
 #ifndef __SVM_H
 #define __SVM_H
 
+#define SVM_EXIT_READ_CR0  0x000
+#define SVM_EXIT_READ_CR3  0x003
+#define SVM_EXIT_READ_CR4  0x004
+#define SVM_EXIT_READ_CR8  0x008
+#define SVM_EXIT_WRITE_CR0 0x010
+#define SVM_EXIT_WRITE_CR3 0x013
+#define SVM_EXIT_WRITE_CR4 0x014
+#define SVM_EXIT_WRITE_CR8 0x018
+#define SVM_EXIT_READ_DR0  0x020
+#define SVM_EXIT_READ_DR1  0x021
+#define SVM_EXIT_READ_DR2  0x022
+#define SVM_EXIT_READ_DR3  0x023
+#define SVM_EXIT_READ_DR4  0x024
+#define SVM_EXIT_READ_DR5  0x025
+#define SVM_EXIT_READ_DR6  0x026
+#define SVM_EXIT_READ_DR7  0x027
+#define SVM_EXIT_WRITE_DR0 0x030
+#define SVM_EXIT_WRITE_DR1 0x031
+#define SVM_EXIT_WRITE_DR2 0x032
+#define SVM_EXIT_WRITE_DR3 0x033
+#define SVM_EXIT_WRITE_DR4 0x034
+#define SVM_EXIT_WRITE_DR5 0x035
+#define SVM_EXIT_WRITE_DR6 0x036
+#define SVM_EXIT_WRITE_DR7 0x037
+#define SVM_EXIT_EXCP_BASE 0x040
+#define SVM_EXIT_INTR  0x060
+#define SVM_EXIT_NMI   0x061
+#define SVM_EXIT_SMI   0x062
+#define SVM_EXIT_INIT  0x063
+#define SVM_EXIT_VINTR 0x064
+#define SVM_EXIT_CR0_SEL_WRITE 0x065
+#define SVM_EXIT_IDTR_READ 0x066
+#define SVM_EXIT_GDTR_READ 0x067
+#define SVM_EXIT_LDTR_READ 0x068
+#define SVM_EXIT_TR_READ   0x069
+#define SVM_EXIT_IDTR_WRITE0x06a
+#define SVM_EXIT_GDTR_WRITE0x06b
+#define SVM_EXIT_LDTR_WRITE0x06c
+#define SVM_EXIT_TR_WRITE  0x06d
+#define SVM_EXIT_RDTSC 0x06e
+#define SVM_EXIT_RDPMC 0x06f
+#define SVM_EXIT_PUSHF 0x070
+#define SVM_EXIT_POPF  0x071
+#define SVM_EXIT_CPUID 0x072
+#define SVM_EXIT_RSM   0x073
+#define SVM_EXIT_IRET  0x074
+#define SVM_EXIT_SWINT 0x075
+#define SVM_EXIT_INVD  0x076
+#define SVM_EXIT_PAUSE 0x077
+#define SVM_EXIT_HLT   0x078
+#define SVM_EXIT_INVLPG0x079
+#define SVM_EXIT_INVLPGA   0x07a
+#define SVM_EXIT_IOIO  0x07b
+#define SVM_EXIT_MSR   0x07c
+#define SVM_EXIT_TASK_SWITCH   0x07d
+#define SVM_EXIT_FERR_FREEZE   0x07e
+#define SVM_EXIT_SHUTDOWN  0x07f
+#define SVM_EXIT_VMRUN 0x080
+#define SVM_EXIT_VMMCALL   0x081
+#define SVM_EXIT_VMLOAD0x082
+#define SVM_EXIT_VMSAVE0x083
+#define SVM_EXIT_STGI  0x084
+#define SVM_EXIT_CLGI  0x085
+#define SVM_EXIT_SKINIT0x086
+#define SVM_EXIT_RDTSCP0x087
+#define SVM_EXIT_ICEBP 0x088
+#define SVM_EXIT_WBINVD0x089
+#define SVM_EXIT_MONITOR   0x08a
+#define SVM_EXIT_MWAIT 0x08b
+#define SVM_EXIT_MWAIT_COND0x08c
+#define SVM_EXIT_XSETBV0x08d
+#define SVM_EXIT_NPF   0x400
+

[PATCH v8 3/3] KVM: perf: kvm events analysis tool

From: Xiao Guangrong xiaoguangr...@linux.vnet.ibm.com

Add 'perf kvm stat' support to analyze kvm vmexit/mmio/ioport smartly

Usage:
- kvm stat
  run a command and gather performance counter statistics, it is the alias of
  perf stat

- trace kvm events:
  perf kvm stat record, or, if other tracepoints are interesting as well, we
  can append the events like this:
  perf kvm stat record -e timer:* -a

  If many guests are running, we can track the specified guest by using -p or
  --pid, -a is used to track events generated by all guests.

- show the result:
  perf kvm stat report

The output example is following:
# pgrep qemu
13005
13059

total 2 guests are running on the host

Then, track the guest whose pid is 13059:
# ./perf kvm stat record -p 13059
^C[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.253 MB perf.data.guest (~11065 samples) ]

See the vmexit events:
# ./perf kvm stat report --event=vmexit


Analyze events for all VCPUs:

 VM-EXITSamples  Samples% Time% Avg time

 APIC_ACCESS46070.55% 0.01% 22.44us ( +-   1.75% )
 HLT 9314.26%99.98% 832077.26us ( +-  10.42% )
  EXTERNAL_INTERRUPT 64 9.82% 0.00% 35.35us ( +-  14.21% )
   PENDING_INTERRUPT 24 3.68% 0.00%  9.29us ( +-  31.39% )
   CR_ACCESS  7 1.07% 0.00%  8.12us ( +-   5.76% )
  IO_INSTRUCTION  3 0.46% 0.00% 18.00us ( +-  11.79% )
   EXCEPTION_NMI  1 0.15% 0.00%  5.83us ( +-   -nan% )

Total Samples:652, Total events handled time:77396109.80us.

See the mmio events:
# ./perf kvm stat report --event=mmio


Analyze events for all VCPUs:

 MMIO AccessSamples  Samples% Time% Avg time

0xfee00380:W38784.31%79.28%  8.29us ( +-   3.32% )
0xfee00300:W 24 5.23% 9.96% 16.79us ( +-   1.97% )
0xfee00300:R 24 5.23% 7.83% 13.20us ( +-   3.00% )
0xfee00310:W 24 5.23% 2.93%  4.94us ( +-   3.84% )

Total Samples:459, Total events handled time:4044.59us.

See the ioport event:
# ./perf kvm stat report --event=ioport


Analyze events for all VCPUs:

  IO Port AccessSamples  Samples% Time% Avg time

 0xc050:POUT  3   100.00%   100.00% 13.75us ( +-  10.83% )

Total Samples:3, Total events handled time:41.26us.

And, --vcpu is used to track the specified vcpu and --key is used to sort the
result:
# ./perf kvm stat report --event=vmexit --vcpu=0 --key=time


Analyze events for VCPU 0:

 VM-EXITSamples  Samples% Time% Avg time

 HLT 2713.85%99.97% 405790.24us ( +-  12.70% )
  EXTERNAL_INTERRUPT 13 6.67% 0.00% 27.94us ( +-  22.26% )
 APIC_ACCESS14674.87% 0.03% 21.69us ( +-   2.91% )
  IO_INSTRUCTION  2 1.03% 0.00% 17.77us ( +-  20.56% )
   CR_ACCESS  2 1.03% 0.00%  8.55us ( +-   6.47% )
   PENDING_INTERRUPT  5 2.56% 0.00%  6.27us ( +-   3.94% )

Total Samples:195, Total events handled time:10959950.90us.

[ Dong Hao haod...@linux.vnet.ibm.com
  Runzhen Wang runz...@linux.vnet.ibm.com:

 - rebase it on current acme's tree

 - fix the compiling-error on i386

]

Signed-off-by: Xiao Guangrong xiaoguangr...@linux.vnet.ibm.com
Signed-off-by: Dong Hao haod...@linux.vnet.ibm.com
Signed-off-by: Runzhen Wang runz...@linux.vnet.ibm.com
---
 tools/perf/Documentation/perf-kvm.txt |   30 ++-
 tools/perf/MANIFEST   |3 +
 tools/perf/builtin-kvm.c  |  840 -
 tools/perf/util/header.c  |   59 +++-
 tools/perf/util/header.h  |1 +
 tools/perf/util/thread.h  |2 +
 6 files changed, 929 insertions(+), 6 deletions(-)

diff --git a/tools/perf/Documentation/perf-kvm.txt 
b/tools/perf/Documentation/perf-kvm.txt
index dd84cb2..326f2cb 100644
--- a/tools/perf/Documentation/perf-kvm.txt
+++ b/tools/perf/Documentation/perf-kvm.txt
@@ -12,7 +12,7 @@ SYNOPSIS
[--guestkallsyms=path --guestmodules=path | --guestvmlinux=path]]
{top|record|report|diff|buildid-list}
 'perf kvm' [--host] [--guest] [--guestkallsyms=path --guestmodules=path
-   | --guestvmlinux=path] {top|record|report|diff|buildid-list}
+   | --guestvmlinux=path] {top|record|report|diff|buildid-list|stat}
 
 DESCRIPTION
 ---
@@ -38,6 +38,18 @@ There are a couple of variants of perf kvm:
   so that other tools can be used to fetch packages with matching symbol tables
   for use by perf report.
 
+  'perf kvm stat command' to run a command and gather performance counter
+  statistics.
+  Especially, perf 'kvm stat record/report' generates a statistical analysis
+  of KVM events. Currently, vmexit, mmio and

[PATCH v8 2/3] perf: move stat related code to util/stat.c

From: Xiao Guangrong xiaoguangr...@linux.vnet.ibm.com

Then, the code can be shared between kvm events and perf stat

[ Dong Hao haod...@linux.vnet.ibm.com: rebase it on acme's git tree ]
Signed-off-by: Xiao Guangrong xiaoguangr...@linux.vnet.ibm.com
Signed-off-by: Dong Hao haod...@linux.vnet.ibm.com
---
 tools/perf/Makefile   |1 +
 tools/perf/builtin-stat.c |   56 +--
 tools/perf/util/stat.c|   57 +
 tools/perf/util/stat.h|   16 
 4 files changed, 76 insertions(+), 54 deletions(-)
 create mode 100644 tools/perf/util/stat.c
 create mode 100644 tools/perf/util/stat.h

diff --git a/tools/perf/Makefile b/tools/perf/Makefile
index 209774b..5077f8e 100644
--- a/tools/perf/Makefile
+++ b/tools/perf/Makefile
@@ -406,6 +406,7 @@ LIB_OBJS += $(OUTPUT)util/target.o
 LIB_OBJS += $(OUTPUT)util/rblist.o
 LIB_OBJS += $(OUTPUT)util/intlist.o
 LIB_OBJS += $(OUTPUT)util/vdso.o
+LIB_OBJS += $(OUTPUT)util/stat.o
 
 LIB_OBJS += $(OUTPUT)ui/helpline.o
 LIB_OBJS += $(OUTPUT)ui/hist.o
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index dab347d..3c43a35 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -51,13 +51,13 @@
 #include util/evsel.h
 #include util/debug.h
 #include util/color.h
+#include util/stat.h
 #include util/header.h
 #include util/cpumap.h
 #include util/thread.h
 #include util/thread_map.h
 
 #include sys/prctl.h
-#include math.h
 #include locale.h
 
 #define DEFAULT_SEPARATOR   
@@ -199,11 +199,6 @@ static int output_fd;
 
 static volatile int done = 0;
 
-struct stats
-{
-   double n, mean, M2;
-};
-
 struct perf_stat {
struct stats  res_stats[3];
 };
@@ -220,50 +215,6 @@ static void perf_evsel__free_stat_priv(struct perf_evsel 
*evsel)
evsel-priv = NULL;
 }
 
-static void update_stats(struct stats *stats, u64 val)
-{
-   double delta;
-
-   stats-n++;
-   delta = val - stats-mean;
-   stats-mean += delta / stats-n;
-   stats-M2 += delta*(val - stats-mean);
-}
-
-static double avg_stats(struct stats *stats)
-{
-   return stats-mean;
-}
-
-/*
- * http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance
- *
- *   (\Sum n_i^2) - ((\Sum n_i)^2)/n
- * s^2 = ---
- *  n - 1
- *
- * http://en.wikipedia.org/wiki/Stddev
- *
- * The std dev of the mean is related to the std dev by:
- *
- * s
- * s_mean = ---
- *  sqrt(n)
- *
- */
-static double stddev_stats(struct stats *stats)
-{
-   double variance, variance_mean;
-
-   if (!stats-n)
-   return 0.0;
-
-   variance = stats-M2 / (stats-n - 1);
-   variance_mean = variance / stats-n;
-
-   return sqrt(variance_mean);
-}
-
 static struct stats runtime_nsecs_stats[MAX_NR_CPUS];
 static struct stats runtime_cycles_stats[MAX_NR_CPUS];
 static struct stats runtime_stalled_cycles_front_stats[MAX_NR_CPUS];
@@ -559,10 +510,7 @@ static int run_perf_stat(int argc __maybe_unused, const 
char **argv)
 
 static void print_noise_pct(double total, double avg)
 {
-   double pct = 0.0;
-
-   if (avg)
-   pct = 100.0*total/avg;
+   double pct = rel_stddev_stats(total, avg);
 
if (csv_output)
fprintf(output, %s%.2f%%, csv_sep, pct);
diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c
new file mode 100644
index 000..2374212
--- /dev/null
+++ b/tools/perf/util/stat.c
@@ -0,0 +1,57 @@
+#include math.h
+
+#include stat.h
+
+void update_stats(struct stats *stats, u64 val)
+{
+   double delta;
+
+   stats-n++;
+   delta = val - stats-mean;
+   stats-mean += delta / stats-n;
+   stats-M2 += delta*(val - stats-mean);
+}
+
+double avg_stats(struct stats *stats)
+{
+   return stats-mean;
+}
+
+/*
+ * http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance
+ *
+ *   (\Sum n_i^2) - ((\Sum n_i)^2)/n
+ * s^2 = ---
+ *  n - 1
+ *
+ * http://en.wikipedia.org/wiki/Stddev
+ *
+ * The std dev of the mean is related to the std dev by:
+ *
+ * s
+ * s_mean = ---
+ *  sqrt(n)
+ *
+ */
+double stddev_stats(struct stats *stats)
+{
+   double variance, variance_mean;
+
+   if (!stats-n)
+   return 0.0;
+
+   variance = stats-M2 / (stats-n - 1);
+   variance_mean = variance / stats-n;
+
+   return sqrt(variance_mean);
+}
+
+double rel_stddev_stats(double stddev, double avg)
+{
+   double pct = 0.0;
+
+   if (avg)
+   pct = 100.0 * stddev/avg;
+
+   return pct;
+}
diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h
new file mode 100644
index 000..588367c
--- /dev/null
+++ b/tools/perf/util/stat.h
@@ -0,0 +1,16 @@
+#ifndef __PERF_STATS_H
+#define __PERF_STATS_H
+
+#include types.h
+
+struct stats
+{
+   double n, mean, M2;
+};
+
+void

Re: [Qemu-devel] Block Migration Assertion in qemu-kvm 1.2.0

2012-09-17 Thread Kevin Wolf

Am 16.09.2012 12:13, schrieb Peter Lieven:
 Hi,
 
 when trying to block migrate a VM from one node to another, the source 
 VM crashed with the following assertion:
 block.c:3829: bdrv_set_in_use: Assertion `bs-in_use != in_use' failed.
 
 Is this sth already addresses/known?

Not that I'm aware of, at least.

Block migration doesn't seem to check whether the device is already in
use, maybe this is the problem. Not sure why it would be in use, though,
and in my quick test it didn't crash.

So we need some more information: What's you command line, did you do
anything specific in the monitor with block devices, what does the
stacktrace look like, etc.?

Kevin
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH] kvm tools: fix SDL build error when libsdl isn't installed

We used wrong prototypes for sdl init/exit when libsdl wasn't installed when
building. This would cause build errors.

Reported-by: Kashyap Chamarthy kashyap...@gmail.com
Signed-off-by: Sasha Levin levinsasha...@gmail.com
---
 tools/kvm/include/kvm/sdl.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tools/kvm/include/kvm/sdl.h b/tools/kvm/include/kvm/sdl.h
index b444333..19e1d74 100644
--- a/tools/kvm/include/kvm/sdl.h
+++ b/tools/kvm/include/kvm/sdl.h
@@ -9,11 +9,11 @@ struct framebuffer;
 int sdl__init(struct kvm *kvm);
 int sdl__exit(struct kvm *kvm);
 #else
-static inline void sdl__init(struct kvm *kvm)
+static inline int sdl__init(struct kvm *kvm)
 {
die(SDL support not compiled in. (install the SDL-dev[el] package));
 }
-static inline void sdl__exit(struct kvm *kvm)
+static inline int sdl__exit(struct kvm *kvm)
 {
die(SDL support not compiled in. (install the SDL-dev[el] package));
 }
-- 
1.7.12

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH] kvm tools: don't die if sdl wasn't compiled in and we don't try using it

If SDL isn't compiled in we shouldn't die unless we actually try using it.

Signed-off-by: Sasha Levin levinsasha...@gmail.com
---
 tools/kvm/include/kvm/sdl.h | 10 --
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/tools/kvm/include/kvm/sdl.h b/tools/kvm/include/kvm/sdl.h
index 19e1d74..2f0c213 100644
--- a/tools/kvm/include/kvm/sdl.h
+++ b/tools/kvm/include/kvm/sdl.h
@@ -11,11 +11,17 @@ int sdl__exit(struct kvm *kvm);
 #else
 static inline int sdl__init(struct kvm *kvm)
 {
-   die(SDL support not compiled in. (install the SDL-dev[el] package));
+   if (kvm-cfg.sdl)
+   die(SDL support not compiled in. (install the SDL-dev[el] 
package));
+
+   return 0;
 }
 static inline int sdl__exit(struct kvm *kvm)
 {
-   die(SDL support not compiled in. (install the SDL-dev[el] package));
+   if (kvm-cfg.sdl)
+   die(SDL support not compiled in. (install the SDL-dev[el] 
package));
+
+   return 0;
 }
 #endif
 
-- 
1.7.12

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH] kvm tools: fix SDL build error when libsdl isn't installed

2012-09-17 Thread Kashyap Chamarthy

On Mon, Sep 17, 2012 at 4:03 PM, Sasha Levin levinsasha...@gmail.com wrote:
 We used wrong prototypes for sdl init/exit when libsdl wasn't installed when
 building. This would cause build errors.

 Reported-by: Kashyap Chamarthy kashyap...@gmail.com
 Signed-off-by: Sasha Levin levinsasha...@gmail.com
 ---
  tools/kvm/include/kvm/sdl.h | 4 ++--
  1 file changed, 2 insertions(+), 2 deletions(-)

 diff --git a/tools/kvm/include/kvm/sdl.h b/tools/kvm/include/kvm/sdl.h
 index b444333..19e1d74 100644
 --- a/tools/kvm/include/kvm/sdl.h
 +++ b/tools/kvm/include/kvm/sdl.h
 @@ -9,11 +9,11 @@ struct framebuffer;
  int sdl__init(struct kvm *kvm);
  int sdl__exit(struct kvm *kvm);
  #else
 -static inline void sdl__init(struct kvm *kvm)
 +static inline int sdl__init(struct kvm *kvm)
  {
 die(SDL support not compiled in. (install the SDL-dev[el] package));
  }
 -static inline void sdl__exit(struct kvm *kvm)
 +static inline int sdl__exit(struct kvm *kvm)
  {
 die(SDL support not compiled in. (install the SDL-dev[el] package));
  }
 --
 1.7.12


Just for info for to the list, this does not apply.

I'm testing the newer patch sent by Sasha.

Thanks.
/kashyap
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

RE: [PATCH 2/5]KVM:x86, apicv: adjust for virtual interrupt delivery

2012-09-17 Thread Li, Jiongxi

 -Original Message-
 From: Li, Jiongxi
 Sent: Friday, September 14, 2012 10:16 PM
 To: 'Avi Kivity'
 Cc: kvm@vger.kernel.org
 Subject: RE: [PATCH 2/5]KVM:x86, apicv: adjust for virtual interrupt delivery

 Sorry for the late response

  -Original Message-
  From: Avi Kivity [mailto:a...@redhat.com]
  Sent: Friday, September 07, 2012 12:22 AM
  To: Li, Jiongxi
  Cc: kvm@vger.kernel.org
  Subject: Re: [PATCH 2/5]KVM:x86, apicv: adjust for virtual interrupt
  delivery

  On 09/05/2012 08:41 AM, Li, Jiongxi wrote:
   Virtual interrupt delivery avoids KVM to inject vAPIC interrupts
   manually, which is fully taken care of by the hardware. This needs
   some special awareness into existing interrupr injection path:

 - for pending interrupt, instead of direct injection, we may need
   update architecture specific indicators before resuming to guest.

 - A pending interrupt, which is masked by ISR, should be also
   considered in above update action, since hardware will decide
   when to inject it at right time. Current has_interrupt and
   get_interrupt only returns a valid vector from injection p.o.v.

   --- a/arch/x86/kvm/x86.c
   +++ b/arch/x86/kvm/x86.c
   @@ -5194,6 +5194,13 @@ static void inject_pending_event(struct
  kvm_vcpu *vcpu)
 vcpu-arch.nmi_injected = true;
 kvm_x86_ops-set_nmi(vcpu);
 }
   + } else if (kvm_apic_vid_enabled(vcpu)) {
   + if (kvm_cpu_has_interrupt_apic_vid(vcpu) 
   + kvm_x86_ops-interrupt_allowed(vcpu)) {
   + kvm_queue_interrupt(vcpu,
   + kvm_cpu_get_interrupt_apic_vid(vcpu), false);
   + kvm_x86_ops-set_irq(vcpu);
   + }

  It may be simpler to change kvm_cpu_{has,get}_interrupt to ignore the
  apic if virtual interrupt delivery is enabled.
 OKs

Kvm_cpu_has_interrupt is also called in other place, no just used to judge 
whether to inject interrupt manually. For instance, it is called in 
kvm_arch_vcpu_runnable. In that case, apic can't be ingored. So for safety, I 
think it is better to use another function here other than change the original 
kvm_cpu_has_interrupt function.

   @@ -5293,16 +5300,27 @@ static int vcpu_enter_guest(struct kvm_vcpu
  *vcpu)
 }

 if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) {
   + /* update archtecture specific hints for APIC virtual interrupt
   +delivery
  */
   + if (kvm_apic_vid_enabled(vcpu))
   + kvm_x86_ops-update_irq(vcpu);
   +

  Not defined.
 This function is defined in patch 3/5. Because virtual interrupt delivery is 
 not
 enabled in this patch. So this function is not called. Since we will enable 
 this
 feature by default, so maybe we can merge PATCH 2,3,4 together into one
 patch.

 inject_pending_event(vcpu);

 /* enable NMI/IRQ window open exits if needed */
 if (vcpu-arch.nmi_pending)
 kvm_x86_ops-enable_nmi_window(vcpu);
   - else if (kvm_cpu_has_interrupt(vcpu) || req_int_win)
   + else if (kvm_apic_vid_enabled(vcpu)) {
   + if (kvm_cpu_has_interrupt_apic_vid(vcpu))
   + kvm_x86_ops-enable_irq_window(vcpu);
   + } else if (kvm_cpu_has_interrupt(vcpu) || req_int_win)
 kvm_x86_ops-enable_irq_window(vcpu);

 if (kvm_lapic_enabled(vcpu)) {
   - update_cr8_intercept(vcpu);
   + /* no need for tpr_threshold update if APIC virtual
   +  * interrupt delivery is enabled
   +  */
   + if (!kvm_apic_vid_enabled(vcpu))
   + update_cr8_intercept(vcpu);

  Perhaps the arch function should do the ignoring.
 You means putting the 'vid_enabled' judgement in
 'kvm_x86_ops-update_cr8_intercept'? Is it just out of the reason that
 reducing the code change in common code?

 kvm_lapic_sync_to_vapic(vcpu);
 }
 }

  --
  error compiling committee.c: too many arguments to function
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

RE: [PATCH 4/5]KVM:x86, apicv: add interface for poking EOI exit bitmap

2012-09-17 Thread Li, Jiongxi

 -Original Message-
 From: Avi Kivity [mailto:a...@redhat.com]
 Sent: Friday, September 07, 2012 12:38 AM
 To: Li, Jiongxi
 Cc: kvm@vger.kernel.org
 Subject: Re: [PATCH 4/5]KVM:x86, apicv: add interface for poking EOI exit
 bitmap

 On 09/05/2012 08:41 AM, Li, Jiongxi wrote:
  With APICv virtual interrupt delivery feature, EOI write from non root
  mode doesn't cause VM-Exit unless set in EOI exit bitmap VMCS field.
  Basically there're two methods to manipulate EOI exit bitmap:

 Should be folded into the previous patch, otherwise the previous patch breaks
 level interrupts.

  [Option 1]
  Ideally only level triggered irq requires a hook in vLAPIC EOI write,
  so that vIOAPIC EOI is triggered and emulated. So the simplest
  approach is to manipulate EOI exit bitmap when vLAPIC acks a new
  interrupt, based on value of TMR. There're several corner cases worthy
  of note though:

- KVM has specific notifier hooks on vIOAPIC EOI path. So far two
  sources use it: INT-based device passthrough and PIT pending
  timers. For the former, it's virtually wired to vIOAPIC and
  thus TMR already covers it. PIT is special here, which is an
  edge triggered source. But since other timer sources like
  vLAPIC timer don't require this notifier hook, possibly PIT
  can be relaxed in the future too.

 I would like to switch to changing the timer frequency when we need to catch
 up.  But that can be done later.

- posted interrupt will update TMR directly, w/o chance for KVM
  to update EOI exit bitmap accordingly. This becomes a gap

 Why not? we know what vector the PIT is wired to.

  [Option 2]
  Indicate EOI exit bitmap requirement ('need_eoi') directly from every
  interrupt source device, and then check this requirement when vLAPIC
  acks a new pending interrupt. This requires more intrusive changes to
  current vLAPIC/vIOAPIC logic, so that the irq_source_id indicating
  source of interrupt is passed through from origination point to vLAPIC
  ack point. For natual requirement like vIOAPIC level triggered
  entries, it can be implicitly deduced.
  On the other hand for non-natural requirements like aformentioned PIT
  or posted interrupt, this approach can handle it efficiently.

  For simplicity reason, now option 1 is used which should be enough to
  test MSI-based device passthrough.

 You can change kvm_register_irq_ack_notifier() to call the ioapic and pic to 
 find
 out what vectors need EOI exits.

 (alternatively, if we fix the PIT, then we only need ack notifiers for level
 interrupts).

  Signed-off-by: Kevin Tian kevin.t...@intel.com
  Signed-off-by: Jiongxi Li jiongxi...@intel.com
  ---
   arch/x86/include/asm/kvm_host.h |1 +
   arch/x86/kvm/lapic.c|7 ++-
   arch/x86/kvm/vmx.c  |   37
 +
   3 files changed, 44 insertions(+), 1 deletions(-)

  diff --git a/arch/x86/include/asm/kvm_host.h
  b/arch/x86/include/asm/kvm_host.h index ef74df5..4e06a82 100644
  --- a/arch/x86/include/asm/kvm_host.h
  +++ b/arch/x86/include/asm/kvm_host.h
  @@ -671,6 +671,7 @@ struct kvm_x86_ops {
  void (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr);
  int (*has_virtual_interrupt_delivery)(struct kvm_vcpu *vcpu);
  void (*update_irq)(struct kvm_vcpu *vcpu);
  +   void (*set_eoi_exitmap)(struct kvm_vcpu *vcpu, int vector, int
  +need_eoi);
  int (*set_tss_addr)(struct kvm *kvm, unsigned int addr);
  int (*get_tdp_level)(void);
  u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio);
  diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index
  d203501..4058384 100644
  --- a/arch/x86/kvm/lapic.c
  +++ b/arch/x86/kvm/lapic.c
  @@ -499,8 +499,13 @@ static int __apic_accept_irq(struct kvm_lapic *apic,
 int delivery_mode,
  if (trig_mode) {
  apic_debug(level trig mode for vector %d, vector);
  apic_set_vector(vector, apic-regs + APIC_TMR);
  -   } else
  +   if (kvm_apic_vid_enabled(vcpu))
  +   kvm_x86_ops-set_eoi_exitmap(vcpu, vector, 1);
  +   } else {
  apic_clear_vector(vector, apic-regs + APIC_TMR);
  +   if (kvm_apic_vid_enabled(vcpu))
  +   kvm_x86_ops-set_eoi_exitmap(vcpu, vector, 0);
  +   }

 This is way too late.  The flow should come from the IOAPIC and PIC, when
 setting up an irq, to the local APIC.

I just wonder why you think it is too late to do that. Out of the reason that 
If post interrupt is enabled, here won't be called? Or because we can't set eoi 
bitmap for PIT here just according to TMR and needs to set it in a place where 
can recognize the PIT vector?

 --
 error compiling committee.c: too many arguments to function
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to

Re: [PATCH] kvm tools: don't die if sdl wasn't compiled in and we don't try using it

2012-09-17 Thread Kashyap Chamarthy

On Mon, Sep 17, 2012 at 4:46 PM, Sasha Levin levinsasha...@gmail.com wrote:
 If SDL isn't compiled in we shouldn't die unless we actually try using it.

 Signed-off-by: Sasha Levin levinsasha...@gmail.com
 ---
  tools/kvm/include/kvm/sdl.h | 10 --
  1 file changed, 8 insertions(+), 2 deletions(-)

 diff --git a/tools/kvm/include/kvm/sdl.h b/tools/kvm/include/kvm/sdl.h
 index 19e1d74..2f0c213 100644
 --- a/tools/kvm/include/kvm/sdl.h
 +++ b/tools/kvm/include/kvm/sdl.h
 @@ -11,11 +11,17 @@ int sdl__exit(struct kvm *kvm);
  #else
  static inline int sdl__init(struct kvm *kvm)
  {
 -   die(SDL support not compiled in. (install the SDL-dev[el] package));
 +   if (kvm-cfg.sdl)
 +   die(SDL support not compiled in. (install the SDL-dev[el] 
 package));
 +
 +   return 0;
  }
  static inline int sdl__exit(struct kvm *kvm)
  {
 -   die(SDL support not compiled in. (install the SDL-dev[el] package));
 +   if (kvm-cfg.sdl)
 +   die(SDL support not compiled in. (install the SDL-dev[el] 
 package));
 +
 +   return 0;
  }
  #endif

 --
 1.7.12


Patch applies.

But when I run the  'lkvm' binary  (after recompiling the kernel and
kvm tool) , I see this:

#---#
[kashyap@moon kvm]$ ./lkvm  run  -k ../../arch/x86/boot/bzImage
  # lkvm run -k ../../arch/x86/boot/bzImage -m 448 -c 4 --name guest-17105
PPrroobbiinngg  EE  ((ee==oo  ttoo  ddiissaabbllee))..  ookk
.
.
.


[1.492530]  host=192.168.33.15, domain=, nis-domain=(none)
[1.493443]  bootserver=192.168.33.1, rootserver=0.0.0.0, rootpath=
[1.494458] ALSA device list:
[1.494901]   No soundcards found.
[1.497106] VFS: Mounted root (9p filesystem) on device 0:13.
[1.498617] devtmpfs: mounted
[1.500555] Freeing unused kernel memory: 512k freed
[1.501981] Write protecting the kernel read-only data: 12288k
[1.511605] Freeing unused kernel memory: 1820k freed
[1.520470] Freeing unused kernel memory: 1576k freed
Mounting...
[1.533811] KGDB: BP remove failed: 8105212f
#---#

Any hints?
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH -v2 2/2] make the compaction skip ahead logic robust

2012-09-17 Thread Mel Gorman

On Sat, Sep 15, 2012 at 04:55:24PM +0100, Richard Davies wrote:
 Hi Rik, Mel and Shaohua,
 
 Thank you for your latest patches. I attach my latest perf report for a slow
 boot with all of these applied.
 

Thanks for testing.

 Mel asked for timings of the slow boots. It's very hard to give anything
 useful here! A normal boot would be a minute or so, and many are like that,
 but the slowest that I have seen (on 3.5.x) was several hours. Basically, I
 just test many times until I get one which is noticeably slow than normal
 and then run perf record on that one.
 

Ok.

 The latest perf report for a slow boot is below. For the fast boots, most of
 the time is in clean_page_c in do_huge_pmd_anonymous_page, but for this slow
 one there is a lot of lock contention above that.
 
 SNIP
 58.49% qemu-kvm  [kernel.kallsyms] [k] _raw_spin_lock_irqsave 

|
--- _raw_spin_lock_irqsave
   |  
   |--95.07%-- compact_checklock_irqsave
   |  |  
   |  |--70.03%-- isolate_migratepages_range
 SNIP
   |   --29.97%-- compaction_alloc
   |  
   |--4.53%-- isolate_migratepages_range

 SNIP

This is going the right direction but usage due to contentions is still
obviously stupidly high.  Compaction features throughout the profile but
staying focused on the lock contention for the moment. Can you try the
following patch? So far I'm not having much luck reproducing this locally.

---8---
mm: compaction: Only release lru_lock every SWAP_CLUSTER_MAX pages if necessary

Commit b2eef8c0 (mm: compaction: minimise the time IRQs are disabled while
isolating pages for migration) releases the lru_lock every SWAP_CLUSTER_MAX
pages that are scanned as it was found at the time that compaction could
contend badly with page reclaim. This can lead to a situation where
compaction contends heavily with itself as it releases and reacquires
the LRU lock.

This patch makes two changes to how the migrate scanner acquires the LRU
lock. First, it only releases the LRU lock every SWAP_CLUSTER_MAX pages if
the lock is contended. This reduces the number of times it unnnecessarily
disables and reenables IRQs. The second is that it defers acquiring the
LRU lock for as long as possible. In cases where transparent hugepages
are encountered the LRU lock will not be acquired at all.

Signed-off-by: Mel Gorman mgor...@suse.de
---
 mm/compaction.c |   65 +--
 1 file changed, 44 insertions(+), 21 deletions(-)

diff --git a/mm/compaction.c b/mm/compaction.c
index 39342ee..1874f23 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -50,6 +50,11 @@ static inline bool migrate_async_suitable(int migratetype)
return is_migrate_cma(migratetype) || migratetype == MIGRATE_MOVABLE;
 }
 
+static inline bool should_release_lock(spinlock_t *lock)
+{
+   return need_resched() || spin_is_contended(lock);
+}
+
 /*
  * Compaction requires the taking of some coarse locks that are potentially
  * very heavily contended. Check if the process needs to be scheduled or
@@ -62,7 +67,7 @@ static inline bool migrate_async_suitable(int migratetype)
 static bool compact_checklock_irqsave(spinlock_t *lock, unsigned long *flags,
  bool locked, struct compact_control *cc)
 {
-   if (need_resched() || spin_is_contended(lock)) {
+   if (should_release_lock(lock)) {
if (locked) {
spin_unlock_irqrestore(lock, *flags);
locked = false;
@@ -275,7 +280,7 @@ isolate_migratepages_range(struct zone *zone, struct 
compact_control *cc,
isolate_mode_t mode = 0;
struct lruvec *lruvec;
unsigned long flags;
-   bool locked;
+   bool locked = false;
 
/*
 * Ensure that there are not too many pages isolated from the LRU
@@ -295,24 +300,17 @@ isolate_migratepages_range(struct zone *zone, struct 
compact_control *cc,
 
/* Time to isolate some pages for migration */
cond_resched();
-   locked = compact_trylock_irqsave(zone-lru_lock, flags, cc);
-   if (!locked)
-   return 0;
for (; low_pfn  end_pfn; low_pfn++) {
struct page *page;
 
/* give a chance to irqs before checking need_resched() */
-   if (!((low_pfn+1) % SWAP_CLUSTER_MAX)) {
-   spin_unlock_irqrestore(zone-lru_lock, flags);
-   locked = false;
+   if (locked  !((low_pfn+1) % SWAP_CLUSTER_MAX)) {
+   if (should_release_lock(zone-lru_lock)) {
+   spin_unlock_irqrestore(zone-lru_lock, flags);
+   locked = false;
+   }
}
 
-

Re: [PATCH] kvm tools: don't die if sdl wasn't compiled in and we don't try using it

2012-09-17 Thread Kashyap Chamarthy

On Mon, Sep 17, 2012 at 5:14 PM, Kashyap Chamarthy kashyap...@gmail.com wrote:
 On Mon, Sep 17, 2012 at 4:46 PM, Sasha Levin levinsasha...@gmail.com wrote:
 If SDL isn't compiled in we shouldn't die unless we actually try using it.

 Signed-off-by: Sasha Levin levinsasha...@gmail.com
 ---
  tools/kvm/include/kvm/sdl.h | 10 --
  1 file changed, 8 insertions(+), 2 deletions(-)

 diff --git a/tools/kvm/include/kvm/sdl.h b/tools/kvm/include/kvm/sdl.h
 index 19e1d74..2f0c213 100644
 --- a/tools/kvm/include/kvm/sdl.h
 +++ b/tools/kvm/include/kvm/sdl.h
 @@ -11,11 +11,17 @@ int sdl__exit(struct kvm *kvm);
  #else
  static inline int sdl__init(struct kvm *kvm)
  {
 -   die(SDL support not compiled in. (install the SDL-dev[el] 
 package));
 +   if (kvm-cfg.sdl)
 +   die(SDL support not compiled in. (install the SDL-dev[el] 
 package));
 +
 +   return 0;
  }
  static inline int sdl__exit(struct kvm *kvm)
  {
 -   die(SDL support not compiled in. (install the SDL-dev[el] 
 package));
 +   if (kvm-cfg.sdl)
 +   die(SDL support not compiled in. (install the SDL-dev[el] 
 package));
 +
 +   return 0;
  }
  #endif

 --
 1.7.12


 Patch applies.

 But when I run the  'lkvm' binary  (after recompiling the kernel and
 kvm tool) , I see this:

 #---#
 [kashyap@moon kvm]$ ./lkvm  run  -k ../../arch/x86/boot/bzImage
   # lkvm run -k ../../arch/x86/boot/bzImage -m 448 -c 4 --name guest-17105
 PPrroobbiinngg  EE  ((ee==oo  ttoo  ddiissaabbllee))..  ookk
 .
 .
 .


 [1.492530]  host=192.168.33.15, domain=, nis-domain=(none)
 [1.493443]  bootserver=192.168.33.1, rootserver=0.0.0.0, rootpath=
 [1.494458] ALSA device list:
 [1.494901]   No soundcards found.
 [1.497106] VFS: Mounted root (9p filesystem) on device 0:13.
 [1.498617] devtmpfs: mounted
 [1.500555] Freeing unused kernel memory: 512k freed
 [1.501981] Write protecting the kernel read-only data: 12288k
 [1.511605] Freeing unused kernel memory: 1820k freed
 [1.520470] Freeing unused kernel memory: 1576k freed
 Mounting...
 [1.533811] KGDB: BP remove failed: 8105212f
 #---#

 Any hints?

Discussed w/ Sasha  on #pv. He pointed  I had the directive
CONFIG_KGDB_TESTS_ON_BOOT set . (Silly me!)

Builds fine, once booted, guest shell access over a serial console is
presented successfully.
 
#---#
[kashyap@moon kvm]$ ./kvm run --balloon -k ../../arch/x86/boot/bzImage
  # kvm run -k ../../arch/x86/boot/bzImage -m 448 -c 4 --name guest-4418
PPrroobbiinngg  EE  ((ee==oo  ttoo  ddiissaabbllee))..  ookk


Decompressing Linux... Parsing ELF... done.
Booting the kernel.
.
.
.

[1.466336]  device=eth0, addr=192.168.33.15,
mask=255.255.255.0, gw=192.168.33.1
[1.467964]  host=192.168.33.15, domain=, nis-domain=(none)
[1.469212]  bootserver=192.168.33.1, rootserver=0.0.0.0, rootpath=
[1.470599] ALSA device list:
[1.471257]   No soundcards found.
[1.473453] VFS: Mounted root (9p filesystem) on device 0:13.
[1.474961] devtmpfs: mounted
[1.477103] Freeing unused kernel memory: 512k freed
[1.478598] Write protecting the kernel read-only data: 12288k
[1.488213] Freeing unused kernel memory: 1820k freed
[1.497327] Freeing unused kernel memory: 1576k freed
Mounting...
sh-4.2#
 
#---#

ACK.
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH v3] kvm/fpu: Enable fully eager restore kvm FPU

On Mon, Sep 17, 2012 at 02:07:43AM +, Hao, Xudong wrote:
  -Original Message-
  From: Avi Kivity [mailto:a...@redhat.com]
  Sent: Friday, September 14, 2012 12:40 AM
  To: Marcelo Tosatti
  Cc: Hao, Xudong; kvm@vger.kernel.org; Zhang, Xiantao
  Subject: Re: [PATCH v3] kvm/fpu: Enable fully eager restore kvm FPU
  
  On 09/13/2012 07:29 PM, Marcelo Tosatti wrote:
   On Thu, Sep 13, 2012 at 01:26:36PM -0300, Marcelo Tosatti wrote:
   On Wed, Sep 12, 2012 at 04:10:24PM +0800, Xudong Hao wrote:
Enable KVM FPU fully eager restore, if there is other FPU state which 
isn't
tracked by CR0.TS bit.
   
v3 changes from v2:
- Make fpu active explicitly while guest xsave is enabling and non-lazy
  xstate bit
exist.
  
   How about a guest_xcr0_can_lazy_saverestore bool to control this?
   It only needs to be updated when guest xcr0 is updated.
  
   That seems cleaner. Avi?
  
   Reasoning below.
  
v2 changes from v1:
- Expand KVM_XSTATE_LAZY to 64 bits before negating it.
   
Signed-off-by: Xudong Hao xudong@intel.com
---
 arch/x86/include/asm/kvm.h |4 
 arch/x86/kvm/vmx.c |2 ++
 arch/x86/kvm/x86.c |   15 ++-
 3 files changed, 20 insertions(+), 1 deletions(-)
   
diff --git a/arch/x86/include/asm/kvm.h b/arch/x86/include/asm/kvm.h
index 521bf25..4c27056 100644
--- a/arch/x86/include/asm/kvm.h
+++ b/arch/x86/include/asm/kvm.h
@@ -8,6 +8,8 @@
   
 #include linux/types.h
 #include linux/ioctl.h
+#include asm/user.h
+#include asm/xsave.h
   
 /* Select x86 specific features in linux/kvm.h */
 #define __KVM_HAVE_PIT
@@ -30,6 +32,8 @@
 /* Architectural interrupt line count. */
 #define KVM_NR_INTERRUPTS 256
   
+#define KVM_XSTATE_LAZY   (XSTATE_FP | XSTATE_SSE | XSTATE_YMM)
+
 struct kvm_memory_alias {
   __u32 slot;  /* this has a different namespace than memory 
slots */
   __u32 flags;
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 248c2b4..853e875 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -3028,6 +3028,8 @@ static void vmx_set_cr0(struct kvm_vcpu *vcpu,
  unsigned long cr0)
   
   if (!vcpu-fpu_active)
   hw_cr0 |= X86_CR0_TS | X86_CR0_MP;
+  else
+  hw_cr0 = ~(X86_CR0_TS | X86_CR0_MP);
   
   vmcs_writel(CR0_READ_SHADOW, cr0);
   vmcs_writel(GUEST_CR0, hw_cr0);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 20f2266..183cf60 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -560,6 +560,8 @@ int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32
  index, u64 xcr)
   return 1;
   if (xcr0  ~host_xcr0)
   return 1;
+  if (xcr0  ~((u64)KVM_XSTATE_LAZY))
+  vcpu-fpu_active = 1;
  
   This is confusing. The variable allows to decrease the number of places
   the decision is made.
  
  Better to have a helper function (lazy_fpu_allowed(), for example).
  Variables raise the question of whether they are maintained correctly.
  
 
 I realized to modifying the fpu_active variable is incorrect, it must update 
 exception bitmap.
 To avoid the cr0 and xcrs setting order for live migrate case, how about 
 calling fpu_activate() in kvm_set_xcr()? I can add code comments in this 
 function calling.

The objective of the change is to disable lazy fpu loading (that is,
host fpu loaded in guest and vice-versa), when some bit except the
initial tree bits set in guest XCR0 (initial tree being XSTATE_FP|XSTATE_SSE|
XSTATE_YMM). Yes?

If i get that right, then the suggestion seems to be:

static bool lazy_fpu_allowed()
{
return (vcpu-arch.xcr0  ~((u64)KVM_XSTATE_LAZY));
}

On guest entry:
if (!lazy_fpu_allowed(vcpu)) 
kvm_x86_ops-fpu_activate(vcpu);
if (vcpu-fpu_active)
kvm_load_guest_fpu(vcpu);


Does that make sense?

 diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
 index be6d549..e4646d9 100644
 --- a/arch/x86/kvm/x86.c
 +++ b/arch/x86/kvm/x86.c
 @@ -574,6 +574,9 @@ int kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr)
 kvm_inject_gp(vcpu, 0);
 return 1;
 }
 +   if (xcr  ~((u64)KVM_XSTATE_LAZY))
 +   /* Allow fpu eager restore */
 +   kvm_x86_ops-fpu_activate(vcpu);
 return 0;
  }
 
 Thanks,
 -Xudong
 
 --
 To unsubscribe from this list: send the line unsubscribe kvm in
 the body of a message to majord...@vger.kernel.org
 More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH v3 00/17] Allow changing of Hypervisor CPUIDs.

Also known as Paravirtualization CPUIDs.

This is primarily done so that the guest will think it is running
under vmware when hypervisor-vendor=vmware is specified as a
property of a cpu.

This depends on:

http://lists.gnu.org/archive/html/qemu-devel/2012-09/msg01400.html

As far as I know it is #4. It depends on (1) and (2) and (3).

This change is based on:

Microsoft Hypervisor CPUID Leaves:

http://msdn.microsoft.com/en-us/library/windows/hardware/ff542428%28v=vs.85%29.aspx

Linux kernel change starts with:
http://fixunix.com/kernel/538707-use-cpuid-communicate-hypervisor.html
Also:
http://lkml.indiana.edu/hypermail/linux/kernel/1205.0/00100.html

VMware documention on CPUIDs (Mechanisms to determine if software is
running in a VMware virtual machine):

http://kb.vmware.com/selfservice/microsites/search.do?language=en_UScmd=displayKCexternalId=1009458

Changes from v1 to v2:

1) Added 1/4 from
http://lists.gnu.org/archive/html/qemu-devel/2012-08/msg05153.html

Because Fred is changing jobs and so will not be pushing to get
this in. It needed to be rebased, And I needed it to complete the
testing of this change.

2) Added 2/4 because of the re-work I needed a way to clear all KVM bits,

3) The rework of v1. Make it fit into the object model re-work of cpu.c for
x86.

4) Added 3/4 -- The split out of the code that is not needed for accel=kvm.

Changes from v2 to v3:

Marcelo Tosatti:
Its one big patch, better split in logically correlated patches
(with better changelog). This would help reviewers.

So split 3 and 4 into 3 to 17. More info in change log.
No code change.

Don Slutz (17):
target-i386: Allow tsc-frequency to be larger then 2.147G
target-i386: Add missing kvm bits.
target-i386: Add Hypervisor level.
target-i386: Add cpu object access routines for Hypervisor level.
target-i386: Add x86_set_hyperv.
target-i386: Use Hypervisor level in -machine pc,accel=kvm.
target-i386: Use Hypervisor level in -machine pc,accel=tcg.
target-i386: Add Hypervisor vendor.
target-i386: Add cpu object access routines for Hypervisor vendor.
target-i386: Use Hypervisor vendor in -machine pc,accel=kvm.
target-i386: Use Hypervisor vendor in -machine pc,accel=tcg.
target-i386: Add some known names to Hypervisor vendor.
target-i386: Add optional Hypervisor leaf extra.
target-i386: Add cpu object access routines for Hypervisor leaf
extra.
target-i386: Add setting of Hypervisor leaf extra for known vmare4.
target-i386: Use Hypervisor leaf extra in -machine pc,accel=kvm.
target-i386: Use Hypervisor leaf extra in -machine pc,accel=tcg.

target-i386/cpu.c | 261 -
target-i386/cpu.h | 21 +
target-i386/kvm.c | 33 ++--
3 files changed, 304 insertions(+), 11 deletions(-)

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html

[PATCH v3 00/17] * SUBJECT HERE *

*** BLURB HERE ***

Don Slutz (17):
  target-i386: Allow tsc-frequency to be larger then 2.147G
  target-i386: Add missing kvm bits.
  target-i386: Add Hypervisor level.
  target-i386: Add cpu object access routines for Hypervisor level.
  target-i386: Add x86_set_hyperv.
  target-i386: Use Hypervisor level in -machine pc,accel=kvm.
  target-i386: Use Hypervisor level in -machine pc,accel=tcg.
  target-i386: Add Hypervisor vendor.
  target-i386: Add cpu object access routines for Hypervisor vendor.
  target-i386: Use Hypervisor vendor in -machine pc,accel=kvm.
  target-i386: Use Hypervisor vendor in -machine pc,accel=tcg.
  target-i386: Add some known names to Hypervisor vendor.
  target-i386: Add optional Hypervisor leaf extra.
  target-i386: Add cpu object access routines for Hypervisor leaf
extra.
  target-i386: Add setting of Hypervisor leaf extra for known vmare4.
  target-i386: Use Hypervisor leaf extra in -machine pc,accel=kvm.
  target-i386: Use Hypervisor leaf extra in -machine pc,accel=tcg.

 target-i386/cpu.c |  261 -
 target-i386/cpu.h |   21 +
 target-i386/kvm.c |   33 ++--
 3 files changed, 304 insertions(+), 11 deletions(-)

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 0/3] Prepare kvm for lto

On Sun, Sep 16, 2012 at 03:10:56PM +0300, Avi Kivity wrote:
 vmx.c has an lto-unfriendly bit, fix it up.
 
 While there, clean up our asm code.
 
 v2: add missing .global in case vmx_return and vmx_set_constant_host_state() 
 become
 separated by lto
 
 Avi Kivity (3):
   KVM: VMX: Make lto-friendly
   KVM: VMX: Make use of asm.h
   KVM: SVM: Make use of asm.h

Applied, thanks.

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH v3 00/17] Allow changing of Hypervisor CPUIDs.

Also known as Paravirtualization CPUIDs.

This is primarily done so that the guest will think it is running
under vmware when hypervisor-vendor=vmware is specified as a
property of a cpu.

This depends on:

http://lists.gnu.org/archive/html/qemu-devel/2012-09/msg01400.html

As far as I know it is #4. It depends on (1) and (2) and (3).

This change is based on:

Microsoft Hypervisor CPUID Leaves:

http://msdn.microsoft.com/en-us/library/windows/hardware/ff542428%28v=vs.85%29.aspx

Linux kernel change starts with:
http://fixunix.com/kernel/538707-use-cpuid-communicate-hypervisor.html
Also:
http://lkml.indiana.edu/hypermail/linux/kernel/1205.0/00100.html

VMware documention on CPUIDs (Mechanisms to determine if software is
running in a VMware virtual machine):

http://kb.vmware.com/selfservice/microsites/search.do?language=en_UScmd=displayKCexternalId=1009458

Changes from v1 to v2:

1) Added 1/4 from
http://lists.gnu.org/archive/html/qemu-devel/2012-08/msg05153.html

Because Fred is changing jobs and so will not be pushing to get
this in. It needed to be rebased, And I needed it to complete the
testing of this change.

2) Added 2/4 because of the re-work I needed a way to clear all KVM bits,

3) The rework of v1. Make it fit into the object model re-work of cpu.c for
x86.

4) Added 3/4 -- The split out of the code that is not needed for accel=kvm.

Changes from v2 to v3:

Marcelo Tosatti:
Its one big patch, better split in logically correlated patches
(with better changelog). This would help reviewers.

So split 3 and 4 into 3 to 17. More info in change log.
No code change.

target-i386/cpu.c | 261 -
target-i386/cpu.h | 21 +
target-i386/kvm.c | 33 ++--
3 files changed, 304 insertions(+), 11 deletions(-)

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html

[PATCH v3 03/17] target-i386: Add Hypervisor level.

This is just the EAX value.

This is based on:

Microsoft Hypervisor CPUID Leaves:
  
http://msdn.microsoft.com/en-us/library/windows/hardware/ff542428%28v=vs.85%29.aspx

Linux kernel change starts with:
  http://fixunix.com/kernel/538707-use-cpuid-communicate-hypervisor.html

VMware documention on CPUIDs (Mechanisms to determine if software is
running in a VMware virtual machine):
  
http://kb.vmware.com/selfservice/microsites/search.do?language=en_UScmd=displayKCexternalId=1009458

QEMU has the value HYPERV_CPUID_MIN defined.

Signed-off-by: Don Slutz d...@cloudswitch.com
---
 target-i386/cpu.h |2 ++
 1 files changed, 2 insertions(+), 0 deletions(-)

diff --git a/target-i386/cpu.h b/target-i386/cpu.h
index 5265c5a..05c0848 100644
--- a/target-i386/cpu.h
+++ b/target-i386/cpu.h
@@ -782,6 +782,8 @@ typedef struct CPUX86State {
 uint32_t cpuid_ext4_features;
 /* Flags from CPUID[EAX=7,ECX=0].EBX */
 uint32_t cpuid_7_0_ebx;
+/* Hypervisor CPUIDs */
+uint32_t cpuid_hv_level;
 
 /* MTRRs */
 uint64_t mtrr_fixed[11];
-- 
1.7.1

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH v3 08/17] target-i386: Add Hypervisor vendor.

Also known as Paravirtualization vendor.
This is EBX, ECX, EDX data for 0x4000.

QEMU knows this is KVM_CPUID_SIGNATURE (0x4000).

This is based on:

Microsoft Hypervisor CPUID Leaves:
  
http://msdn.microsoft.com/en-us/library/windows/hardware/ff542428%28v=vs.85%29.aspx

Linux kernel change starts with:
  http://fixunix.com/kernel/538707-use-cpuid-communicate-hypervisor.html
Also:
  http://lkml.indiana.edu/hypermail/linux/kernel/1205.0/00100.html

VMware documention on CPUIDs (Mechanisms to determine if software is
running in a VMware virtual machine):
  
http://kb.vmware.com/selfservice/microsites/search.do?language=en_UScmd=displayKCexternalId=1009458

Signed-off-by: Don Slutz d...@cloudswitch.com
---
 target-i386/cpu.h |3 +++
 1 files changed, 3 insertions(+), 0 deletions(-)

diff --git a/target-i386/cpu.h b/target-i386/cpu.h
index 05c0848..53ba4cf 100644
--- a/target-i386/cpu.h
+++ b/target-i386/cpu.h
@@ -784,6 +784,9 @@ typedef struct CPUX86State {
 uint32_t cpuid_7_0_ebx;
 /* Hypervisor CPUIDs */
 uint32_t cpuid_hv_level;
+uint32_t cpuid_hv_vendor1;
+uint32_t cpuid_hv_vendor2;
+uint32_t cpuid_hv_vendor3;
 
 /* MTRRs */
 uint64_t mtrr_fixed[11];
-- 
1.7.1

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH v3 10/17] target-i386: Use Hypervisor vendor in -machine pc,accel=kvm.

Also known as Paravirtualization vendor.

This change is based on:

Microsoft Hypervisor CPUID Leaves:
  
http://msdn.microsoft.com/en-us/library/windows/hardware/ff542428%28v=vs.85%29.aspx

Linux kernel change starts with:
  http://fixunix.com/kernel/538707-use-cpuid-communicate-hypervisor.html
Also:
  http://lkml.indiana.edu/hypermail/linux/kernel/1205.0/00100.html

VMware documention on CPUIDs (Mechanisms to determine if software is
running in a VMware virtual machine):
  
http://kb.vmware.com/selfservice/microsites/search.do?language=en_UScmd=displayKCexternalId=1009458

Signed-off-by: Don Slutz d...@cloudswitch.com
---
 target-i386/kvm.c |   10 ++
 1 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/target-i386/kvm.c b/target-i386/kvm.c
index bf27793..b8789f2 100644
--- a/target-i386/kvm.c
+++ b/target-i386/kvm.c
@@ -392,13 +392,15 @@ int kvm_arch_init_vcpu(CPUX86State *env)
 if (env-cpuid_hv_level == 0) {
 memcpy(signature, KVMKVMKVM\0\0\0, 12);
 c-eax = 0;
+c-ebx = signature[0];
+c-ecx = signature[1];
+c-edx = signature[2];
 } else {
-memcpy(signature, Microsoft Hv, 12);
 c-eax = env-cpuid_hv_level;
+c-ebx = env-cpuid_hv_vendor1;
+c-ecx = env-cpuid_hv_vendor2;
+c-edx = env-cpuid_hv_vendor3;
 }
-c-ebx = signature[0];
-c-ecx = signature[1];
-c-edx = signature[2];
 
 c = cpuid_data.entries[cpuid_i++];
 memset(c, 0, sizeof(*c));
-- 
1.7.1

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH v3 11/17] target-i386: Use Hypervisor vendor in -machine pc,accel=tcg.

Also known as Paravirtualization vendor.

This change is based on:

Microsoft Hypervisor CPUID Leaves:
  
http://msdn.microsoft.com/en-us/library/windows/hardware/ff542428%28v=vs.85%29.aspx

Linux kernel change starts with:
  http://fixunix.com/kernel/538707-use-cpuid-communicate-hypervisor.html
Also:
  http://lkml.indiana.edu/hypermail/linux/kernel/1205.0/00100.html

VMware documention on CPUIDs (Mechanisms to determine if software is
running in a VMware virtual machine):
  
http://kb.vmware.com/selfservice/microsites/search.do?language=en_UScmd=displayKCexternalId=1009458

Signed-off-by: Don Slutz d...@cloudswitch.com
---
 target-i386/cpu.c |6 +++---
 1 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/target-i386/cpu.c b/target-i386/cpu.c
index 5afb188..1b3a472 100644
--- a/target-i386/cpu.c
+++ b/target-i386/cpu.c
@@ -1842,9 +1842,9 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, 
uint32_t count,
 break;
 case 0x4000:
 *eax = env-cpuid_hv_level;
-*ebx = 0;
-*ecx = 0;
-*edx = 0;
+*ebx = env-cpuid_hv_vendor1;
+*ecx = env-cpuid_hv_vendor2;
+*edx = env-cpuid_hv_vendor3;
 break;
 case 0x4001:
 *eax = env-cpuid_kvm_features;
-- 
1.7.1

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH v3 17/17] target-i386: Use Hypervisor leaf extra in -machine pc,accel=tcg.

Signed-off-by: Don Slutz d...@cloudswitch.com
---
 target-i386/cpu.c |   11 +++
 1 files changed, 11 insertions(+), 0 deletions(-)

diff --git a/target-i386/cpu.c b/target-i386/cpu.c
index bfaee02..89a45b5 100644
--- a/target-i386/cpu.c
+++ b/target-i386/cpu.c
@@ -1975,6 +1975,17 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, 
uint32_t count,
 *ecx = 0;
 *edx = 0;
 break;
+case 0x4002 ... 0x40FF:
+if (index == env-cpuid_hv_extra) {
+*eax = env-cpuid_hv_extra_a;
+*ebx = env-cpuid_hv_extra_b;
+} else {
+*eax = 0;
+*ebx = 0;
+}
+*ecx = 0;
+*edx = 0;
+break;
 case 0x8000:
 *eax = env-cpuid_xlevel;
 *ebx = env-cpuid_vendor1;
-- 
1.7.1

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH v3 15/17] target-i386: Add setting of Hypervisor leaf extra for known vmare4.

This was taken from:
  http://article.gmane.org/gmane.comp.emulators.kvm.devel/22643

Signed-off-by: Don Slutz d...@cloudswitch.com
---
 target-i386/cpu.c |   32 
 1 files changed, 32 insertions(+), 0 deletions(-)

diff --git a/target-i386/cpu.c b/target-i386/cpu.c
index 34d2291..bfaee02 100644
--- a/target-i386/cpu.c
+++ b/target-i386/cpu.c
@@ -1135,6 +1135,36 @@ static void x86_cpuid_set_model_id(Object *obj, const 
char *model_id,
 }
 }
 
+static void x86_cpuid_set_vmware_extra(Object *obj)
+{
+X86CPU *cpu = X86_CPU(obj);
+
+if ((cpu-env.tsc_khz != 0) 
+(cpu-env.cpuid_hv_level == CPUID_HV_LEVEL_VMARE_4) 
+(cpu-env.cpuid_hv_vendor1 == CPUID_HV_VENDOR_VMWARE_1) 
+(cpu-env.cpuid_hv_vendor2 == CPUID_HV_VENDOR_VMWARE_2) 
+(cpu-env.cpuid_hv_vendor3 == CPUID_HV_VENDOR_VMWARE_3)) {
+const uint32_t apic_khz = 100L;
+
+/*
+ * From article.gmane.org/gmane.comp.emulators.kvm.devel/22643
+ *
+ *Leaf 0x4010, Timing Information.
+ *
+ *VMware has defined the first generic leaf to provide timing
+ *information.  This leaf returns the current TSC frequency and
+ *current Bus frequency in kHz.
+ *
+ *# EAX: (Virtual) TSC frequency in kHz.
+ *# EBX: (Virtual) Bus (local apic timer) frequency in kHz.
+ *# ECX, EDX: RESERVED (Per above, reserved fields are set to 
zero).
+ */
+cpu-env.cpuid_hv_extra = 0x4010;
+cpu-env.cpuid_hv_extra_a = (uint32_t)cpu-env.tsc_khz;
+cpu-env.cpuid_hv_extra_b = apic_khz;
+}
+}
+
 static void x86_cpuid_get_tsc_freq(Object *obj, Visitor *v, void *opaque,
const char *name, Error **errp)
 {
@@ -1164,6 +1194,7 @@ static void x86_cpuid_set_tsc_freq(Object *obj, Visitor 
*v, void *opaque,
 }
 
 cpu-env.tsc_khz = value / 1000;
+x86_cpuid_set_vmware_extra(obj);
 }
 
 static void x86_cpuid_get_hv_level(Object *obj, Visitor *v, void *opaque,
@@ -1263,6 +1294,7 @@ static void x86_cpuid_set_hv_vendor(Object *obj, const 
char *value,
 env-cpuid_hv_vendor2 |= ((uint8_t)adj_value[i + 4])  (8 * i);
 env-cpuid_hv_vendor3 |= ((uint8_t)adj_value[i + 8])  (8 * i);
 }
+x86_cpuid_set_vmware_extra(obj);
 }
 
 static void x86_cpuid_get_hv_extra(Object *obj, Visitor *v, void *opaque,
-- 
1.7.1

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH v3 11/17] target-i386: Use Hypervisor vendor in -machine pc,accel=tcg.

Signed-off-by: Don Slutz d...@cloudswitch.com
---
 target-i386/cpu.c |6 +++---
 1 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/target-i386/cpu.c b/target-i386/cpu.c
index 5afb188..1b3a472 100644
--- a/target-i386/cpu.c
+++ b/target-i386/cpu.c
@@ -1842,9 +1842,9 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, 
uint32_t count,
 break;
 case 0x4000:
 *eax = env-cpuid_hv_level;
-*ebx = 0;
-*ecx = 0;
-*edx = 0;
+*ebx = env-cpuid_hv_vendor1;
+*ecx = env-cpuid_hv_vendor2;
+*edx = env-cpuid_hv_vendor3;
 break;
 case 0x4001:
 *eax = env-cpuid_kvm_features;
-- 
1.7.1

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH v3 14/17] target-i386: Add cpu object access routines for Hypervisor leaf extra.

Signed-off-by: Don Slutz d...@cloudswitch.com
---
 target-i386/cpu.c |   66 +
 1 files changed, 66 insertions(+), 0 deletions(-)

diff --git a/target-i386/cpu.c b/target-i386/cpu.c
index 9ac3076..34d2291 100644
--- a/target-i386/cpu.c
+++ b/target-i386/cpu.c
@@ -1265,6 +1265,63 @@ static void x86_cpuid_set_hv_vendor(Object *obj, const 
char *value,
 }
 }
 
+static void x86_cpuid_get_hv_extra(Object *obj, Visitor *v, void *opaque,
+const char *name, Error **errp)
+{
+X86CPU *cpu = X86_CPU(obj);
+
+visit_type_uint32(v, cpu-env.cpuid_hv_extra, name, errp);
+}
+
+static void x86_cpuid_set_hv_extra(Object *obj, Visitor *v, void *opaque,
+const char *name, Error **errp)
+{
+X86CPU *cpu = X86_CPU(obj);
+uint32_t value;
+
+visit_type_uint32(v, value, name, errp);
+if (error_is_set(errp)) {
+return;
+}
+
+if ((value != 0)  (value  0x4000)) {
+value += 0x4000;
+}
+cpu-env.cpuid_hv_extra = value;
+}
+
+static void x86_cpuid_get_hv_extra_a(Object *obj, Visitor *v, void *opaque,
+const char *name, Error **errp)
+{
+X86CPU *cpu = X86_CPU(obj);
+
+visit_type_uint32(v, cpu-env.cpuid_hv_extra_a, name, errp);
+}
+
+static void x86_cpuid_set_hv_extra_a(Object *obj, Visitor *v, void *opaque,
+const char *name, Error **errp)
+{
+X86CPU *cpu = X86_CPU(obj);
+
+visit_type_uint32(v, cpu-env.cpuid_hv_extra_a, name, errp);
+}
+
+static void x86_cpuid_get_hv_extra_b(Object *obj, Visitor *v, void *opaque,
+const char *name, Error **errp)
+{
+X86CPU *cpu = X86_CPU(obj);
+
+visit_type_uint32(v, cpu-env.cpuid_hv_extra_b, name, errp);
+}
+
+static void x86_cpuid_set_hv_extra_b(Object *obj, Visitor *v, void *opaque,
+const char *name, Error **errp)
+{
+X86CPU *cpu = X86_CPU(obj);
+
+visit_type_uint32(v, cpu-env.cpuid_hv_extra_b, name, errp);
+}
+
 #if !defined(CONFIG_USER_ONLY)
 static void x86_set_hyperv(Object *obj, Error **errp)
 {
@@ -2199,6 +2256,15 @@ static void x86_cpu_initfn(Object *obj)
 object_property_add_str(obj, hypervisor-vendor,
 x86_cpuid_get_hv_vendor,
 x86_cpuid_set_hv_vendor, NULL);
+object_property_add(obj, hypervisor-extra, int,
+x86_cpuid_get_hv_extra,
+x86_cpuid_set_hv_extra, NULL, NULL, NULL);
+object_property_add(obj, hypervisor-extra-a, int,
+x86_cpuid_get_hv_extra_a,
+x86_cpuid_set_hv_extra_a, NULL, NULL, NULL);
+object_property_add(obj, hypervisor-extra-b, int,
+x86_cpuid_get_hv_extra_b,
+x86_cpuid_set_hv_extra_b, NULL, NULL, NULL);
 #if !defined(CONFIG_USER_ONLY)
 object_property_add(obj, hv_spinlocks, int,
 x86_get_hv_spinlocks,
-- 
1.7.1

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH v3 16/17] target-i386: Use Hypervisor leaf extra in -machine pc,accel=kvm.

Signed-off-by: Don Slutz d...@cloudswitch.com
---
 target-i386/kvm.c |   19 +++
 1 files changed, 19 insertions(+), 0 deletions(-)

diff --git a/target-i386/kvm.c b/target-i386/kvm.c
index b8789f2..17c72bc 100644
--- a/target-i386/kvm.c
+++ b/target-i386/kvm.c
@@ -454,6 +454,25 @@ int kvm_arch_init_vcpu(CPUX86State *env)
 c-ebx = signature[0];
 c-ecx = signature[1];
 c-edx = signature[2];
+} else if (env-cpuid_hv_level  0) {
+for (i = KVM_CPUID_FEATURES + 1; i = env-cpuid_hv_level; i++) {
+c = cpuid_data.entries[cpuid_i++];
+memset(c, 0, sizeof(*c));
+c-function = i;
+if (i == env-cpuid_hv_extra) {
+c-eax = env-cpuid_hv_extra_a;
+c-ebx = env-cpuid_hv_extra_b;
+}
+}
+
+c = cpuid_data.entries[cpuid_i++];
+memset(c, 0, sizeof(*c));
+c-function = KVM_CPUID_SIGNATURE_NEXT;
+memcpy(signature, KVMKVMKVM\0\0\0, 12);
+c-eax = 0;
+c-ebx = signature[0];
+c-ecx = signature[1];
+c-edx = signature[2];
 }
 
 has_msr_async_pf_en = c-eax  (1  KVM_FEATURE_ASYNC_PF);
-- 
1.7.1

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH v3 08/17] target-i386: Add Hypervisor vendor.

This is EBX, ECX, EDX data.

This is based on:

Microsoft Hypervisor CPUID Leaves:
  
http://msdn.microsoft.com/en-us/library/windows/hardware/ff542428%28v=vs.85%29.aspx

Linux kernel change starts with:
  http://fixunix.com/kernel/538707-use-cpuid-communicate-hypervisor.html

VMware documention on CPUIDs (Mechanisms to determine if software is
running in a VMware virtual machine):
  
http://kb.vmware.com/selfservice/microsites/search.do?language=en_UScmd=displayKCexternalId=1009458

Signed-off-by: Don Slutz d...@cloudswitch.com
---
 target-i386/cpu.h |3 +++
 1 files changed, 3 insertions(+), 0 deletions(-)

diff --git a/target-i386/cpu.h b/target-i386/cpu.h
index 05c0848..53ba4cf 100644
--- a/target-i386/cpu.h
+++ b/target-i386/cpu.h
@@ -784,6 +784,9 @@ typedef struct CPUX86State {
 uint32_t cpuid_7_0_ebx;
 /* Hypervisor CPUIDs */
 uint32_t cpuid_hv_level;
+uint32_t cpuid_hv_vendor1;
+uint32_t cpuid_hv_vendor2;
+uint32_t cpuid_hv_vendor3;
 
 /* MTRRs */
 uint64_t mtrr_fixed[11];
-- 
1.7.1

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH v3 13/17] target-i386: Add optional Hypervisor leaf extra.

Signed-off-by: Don Slutz d...@cloudswitch.com
---
 target-i386/cpu.h |4 
 1 files changed, 4 insertions(+), 0 deletions(-)

diff --git a/target-i386/cpu.h b/target-i386/cpu.h
index 47bc00c..a2d3588 100644
--- a/target-i386/cpu.h
+++ b/target-i386/cpu.h
@@ -799,6 +799,10 @@ typedef struct CPUX86State {
 uint32_t cpuid_hv_vendor1;
 uint32_t cpuid_hv_vendor2;
 uint32_t cpuid_hv_vendor3;
+/* VMware extra data */
+uint32_t cpuid_hv_extra;
+uint32_t cpuid_hv_extra_a;
+uint32_t cpuid_hv_extra_b;
 
 /* MTRRs */
 uint64_t mtrr_fixed[11];
-- 
1.7.1

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH v3 12/17] target-i386: Add some known names to Hypervisor vendor.

Signed-off-by: Don Slutz d...@cloudswitch.com
---
 target-i386/cpu.c |   36 +++-
 target-i386/cpu.h |   12 
 2 files changed, 47 insertions(+), 1 deletions(-)

diff --git a/target-i386/cpu.c b/target-i386/cpu.c
index 1b3a472..9ac3076 100644
--- a/target-i386/cpu.c
+++ b/target-i386/cpu.c
@@ -1206,6 +1206,20 @@ static char *x86_cpuid_get_hv_vendor(Object *obj, Error 
**errp)
 }
 value[CPUID_VENDOR_SZ] = '\0';
 
+/* Convert known names */
+if (!strcmp(value, CPUID_HV_VENDOR_VMWARE)) {
+if (env-cpuid_hv_level == CPUID_HV_LEVEL_VMARE_4) {
+pstrcpy(value, sizeof(value), vmware4);
+} else if (env-cpuid_hv_level == CPUID_HV_LEVEL_VMARE_3) {
+pstrcpy(value, sizeof(value), vmware3);
+}
+} else if (!strcmp(value, CPUID_HV_VENDOR_XEN) 
+   env-cpuid_hv_level == CPUID_HV_LEVEL_XEN) {
+pstrcpy(value, sizeof(value), xen);
+} else if (!strcmp(value, CPUID_HV_VENDOR_KVM) 
+   env-cpuid_hv_level == 0) {
+pstrcpy(value, sizeof(value), kvm);
+}
 return value;
 }
 
@@ -1219,7 +1233,27 @@ static void x86_cpuid_set_hv_vendor(Object *obj, const 
char *value,
 
 memset(adj_value, 0, sizeof(adj_value));
 
-pstrcpy(adj_value, sizeof(adj_value), value);
+/* Convert known names */
+if (!strcmp(value, vmware) || !strcmp(value, vmware4)) {
+if (env-cpuid_hv_level == 0) {
+env-cpuid_hv_level = CPUID_HV_LEVEL_VMARE_4;
+}
+pstrcpy(adj_value, sizeof(adj_value), CPUID_HV_VENDOR_VMWARE);
+} else if (!strcmp(value, vmware3)) {
+if (env-cpuid_hv_level == 0) {
+env-cpuid_hv_level = CPUID_HV_LEVEL_VMARE_3;
+}
+pstrcpy(adj_value, sizeof(adj_value), CPUID_HV_VENDOR_VMWARE);
+} else if (!strcmp(value, xen)) {
+if (env-cpuid_hv_level == 0) {
+env-cpuid_hv_level = CPUID_HV_LEVEL_XEN;
+}
+pstrcpy(adj_value, sizeof(adj_value), CPUID_HV_VENDOR_XEN);
+} else if (!strcmp(value, kvm)) {
+pstrcpy(adj_value, sizeof(adj_value), CPUID_HV_VENDOR_KVM);
+} else {
+pstrcpy(adj_value, sizeof(adj_value), value);
+}
 
 env-cpuid_hv_vendor1 = 0;
 env-cpuid_hv_vendor2 = 0;
diff --git a/target-i386/cpu.h b/target-i386/cpu.h
index 53ba4cf..47bc00c 100644
--- a/target-i386/cpu.h
+++ b/target-i386/cpu.h
@@ -488,6 +488,18 @@
 
 #define CPUID_VENDOR_VIA   CentaurHauls
 
+#define CPUID_HV_VENDOR_VMWARE_1 0x61774d56 /* VMwa */
+#define CPUID_HV_VENDOR_VMWARE_2 0x4d566572 /* reVM */
+#define CPUID_HV_VENDOR_VMWARE_3 0x65726177 /* ware */
+#define CPUID_HV_VENDOR_VMWARE VMwareVMware
+#define CPUID_HV_LEVEL_VMARE_3 0x4002
+#define CPUID_HV_LEVEL_VMARE_4 0x4010
+
+#define CPUID_HV_VENDOR_XEN XenVMMXenVMM
+#define CPUID_HV_LEVEL_XEN  0x4002
+
+#define CPUID_HV_VENDOR_KVM KVMKVMKVM
+
 #define CPUID_MWAIT_IBE (1  1) /* Interrupts can exit capability */
 #define CPUID_MWAIT_EMX (1  0) /* enumeration supported */
 
-- 
1.7.1

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH v3 10/17] target-i386: Use Hypervisor vendor in -machine pc,accel=kvm.

Signed-off-by: Don Slutz d...@cloudswitch.com
---
 target-i386/kvm.c |   10 ++
 1 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/target-i386/kvm.c b/target-i386/kvm.c
index bf27793..b8789f2 100644
--- a/target-i386/kvm.c
+++ b/target-i386/kvm.c
@@ -392,13 +392,15 @@ int kvm_arch_init_vcpu(CPUX86State *env)
 if (env-cpuid_hv_level == 0) {
 memcpy(signature, KVMKVMKVM\0\0\0, 12);
 c-eax = 0;
+c-ebx = signature[0];
+c-ecx = signature[1];
+c-edx = signature[2];
 } else {
-memcpy(signature, Microsoft Hv, 12);
 c-eax = env-cpuid_hv_level;
+c-ebx = env-cpuid_hv_vendor1;
+c-ecx = env-cpuid_hv_vendor2;
+c-edx = env-cpuid_hv_vendor3;
 }
-c-ebx = signature[0];
-c-ecx = signature[1];
-c-edx = signature[2];
 
 c = cpuid_data.entries[cpuid_i++];
 memset(c, 0, sizeof(*c));
-- 
1.7.1

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH v3 05/17] target-i386: Add x86_set_hyperv.

This is used to set the cpu object's hypervisor level to the default for 
Microsoft's Hypervisor.

Signed-off-by: Don Slutz d...@cloudswitch.com
---
 target-i386/cpu.c |   10 ++
 1 files changed, 10 insertions(+), 0 deletions(-)

diff --git a/target-i386/cpu.c b/target-i386/cpu.c
index 0e4a18d..4120393 100644
--- a/target-i386/cpu.c
+++ b/target-i386/cpu.c
@@ -1192,6 +1192,13 @@ static void x86_cpuid_set_hv_level(Object *obj, Visitor 
*v, void *opaque,
 }
 
 #if !defined(CONFIG_USER_ONLY)
+static void x86_set_hyperv(Object *obj, Error **errp)
+{
+X86CPU *cpu = X86_CPU(obj);
+
+cpu-env.cpuid_hv_level = HYPERV_CPUID_MIN;
+}
+
 static void x86_get_hv_spinlocks(Object *obj, Visitor *v, void *opaque,
  const char *name, Error **errp)
 {
@@ -1214,6 +1221,7 @@ static void x86_set_hv_spinlocks(Object *obj, Visitor *v, 
void *opaque,
 return;
 }
 hyperv_set_spinlock_retries(value);
+x86_set_hyperv(obj, errp);
 }
 
 static void x86_get_hv_relaxed(Object *obj, Visitor *v, void *opaque,
@@ -1234,6 +1242,7 @@ static void x86_set_hv_relaxed(Object *obj, Visitor *v, 
void *opaque,
 return;
 }
 hyperv_enable_relaxed_timing(value);
+x86_set_hyperv(obj, errp);
 }
 
 static void x86_get_hv_vapic(Object *obj, Visitor *v, void *opaque,
@@ -1254,6 +1263,7 @@ static void x86_set_hv_vapic(Object *obj, Visitor *v, 
void *opaque,
 return;
 }
 hyperv_enable_vapic_recommended(value);
+x86_set_hyperv(obj, errp);
 }
 #endif
 
-- 
1.7.1

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH v3 09/17] target-i386: Add cpu object access routines for Hypervisor vendor.

These are modeled after x86_cpuid_set_vendor and x86_cpuid_get_vendor.
Since kvm's vendor is shorter, the test for correct size is removed and zero 
padding is added.

Signed-off-by: Don Slutz d...@cloudswitch.com
---
 target-i386/cpu.c |   44 
 1 files changed, 44 insertions(+), 0 deletions(-)

diff --git a/target-i386/cpu.c b/target-i386/cpu.c
index d3b9bd8..5afb188 100644
--- a/target-i386/cpu.c
+++ b/target-i386/cpu.c
@@ -1191,12 +1191,53 @@ static void x86_cpuid_set_hv_level(Object *obj, Visitor 
*v, void *opaque,
 cpu-env.cpuid_hv_level = value;
 }
 
+static char *x86_cpuid_get_hv_vendor(Object *obj, Error **errp)
+{
+X86CPU *cpu = X86_CPU(obj);
+CPUX86State *env = cpu-env;
+char *value;
+int i;
+
+value = (char *)g_malloc(CPUID_VENDOR_SZ + 1);
+for (i = 0; i  4; i++) {
+value[i + 0] = env-cpuid_hv_vendor1  (8 * i);
+value[i + 4] = env-cpuid_hv_vendor2  (8 * i);
+value[i + 8] = env-cpuid_hv_vendor3  (8 * i);
+}
+value[CPUID_VENDOR_SZ] = '\0';
+
+return value;
+}
+
+static void x86_cpuid_set_hv_vendor(Object *obj, const char *value,
+ Error **errp)
+{
+X86CPU *cpu = X86_CPU(obj);
+CPUX86State *env = cpu-env;
+int i;
+char adj_value[CPUID_VENDOR_SZ + 1];
+
+memset(adj_value, 0, sizeof(adj_value));
+
+pstrcpy(adj_value, sizeof(adj_value), value);
+
+env-cpuid_hv_vendor1 = 0;
+env-cpuid_hv_vendor2 = 0;
+env-cpuid_hv_vendor3 = 0;
+for (i = 0; i  4; i++) {
+env-cpuid_hv_vendor1 |= ((uint8_t)adj_value[i + 0])  (8 * i);
+env-cpuid_hv_vendor2 |= ((uint8_t)adj_value[i + 4])  (8 * i);
+env-cpuid_hv_vendor3 |= ((uint8_t)adj_value[i + 8])  (8 * i);
+}
+}
+
 #if !defined(CONFIG_USER_ONLY)
 static void x86_set_hyperv(Object *obj, Error **errp)
 {
 X86CPU *cpu = X86_CPU(obj);
 
 cpu-env.cpuid_hv_level = HYPERV_CPUID_MIN;
+x86_cpuid_set_hv_vendor(obj, Microsoft Hv, errp);
 }
 
 static void x86_get_hv_spinlocks(Object *obj, Visitor *v, void *opaque,
@@ -2121,6 +2162,9 @@ static void x86_cpu_initfn(Object *obj)
 object_property_add(obj, hypervisor-level, int,
 x86_cpuid_get_hv_level,
 x86_cpuid_set_hv_level, NULL, NULL, NULL);
+object_property_add_str(obj, hypervisor-vendor,
+x86_cpuid_get_hv_vendor,
+x86_cpuid_set_hv_vendor, NULL);
 #if !defined(CONFIG_USER_ONLY)
 object_property_add(obj, hv_spinlocks, int,
 x86_get_hv_spinlocks,
-- 
1.7.1

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH v3 07/17] target-i386: Use Hypervisor level in -machine pc,accel=tcg.

This does not provide vendor support in tcg yet.

Signed-off-by: Don Slutz d...@cloudswitch.com
---
 target-i386/cpu.c |   22 ++
 1 files changed, 22 insertions(+), 0 deletions(-)

diff --git a/target-i386/cpu.c b/target-i386/cpu.c
index 4120393..d3b9bd8 100644
--- a/target-i386/cpu.c
+++ b/target-i386/cpu.c
@@ -1651,6 +1651,16 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, 
uint32_t count,
 index =  env-cpuid_xlevel;
 }
 }
+} else if (index  0x4000) {
+if (env-cpuid_hv_level  0) {
+/* Handle Hypervisor CPUIDs */
+if (index  env-cpuid_hv_level) {
+index = env-cpuid_hv_level;
+}
+} else {
+if (index  env-cpuid_level)
+index = env-cpuid_level;
+}
 } else {
 if (index  env-cpuid_level)
 index = env-cpuid_level;
@@ -1789,6 +1799,18 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, 
uint32_t count,
 *edx = 0;
 }
 break;
+case 0x4000:
+*eax = env-cpuid_hv_level;
+*ebx = 0;
+*ecx = 0;
+*edx = 0;
+break;
+case 0x4001:
+*eax = env-cpuid_kvm_features;
+*ebx = 0;
+*ecx = 0;
+*edx = 0;
+break;
 case 0x8000:
 *eax = env-cpuid_xlevel;
 *ebx = env-cpuid_vendor1;
-- 
1.7.1

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH v3 07/17] target-i386: Use Hypervisor level in -machine pc,accel=tcg.

Also known as Paravirtualization level.

This change is based on:

Microsoft Hypervisor CPUID Leaves:
  
http://msdn.microsoft.com/en-us/library/windows/hardware/ff542428%28v=vs.85%29.aspx

Linux kernel change starts with:
  http://fixunix.com/kernel/538707-use-cpuid-communicate-hypervisor.html
Also:
  http://lkml.indiana.edu/hypermail/linux/kernel/1205.0/00100.html

VMware documention on CPUIDs (Mechanisms to determine if software is
running in a VMware virtual machine):
  
http://kb.vmware.com/selfservice/microsites/search.do?language=en_UScmd=displayKCexternalId=1009458

QEMU knows this is KVM_CPUID_SIGNATURE (0x4000).

This does not provide vendor support in tcg yet.

Signed-off-by: Don Slutz d...@cloudswitch.com
---
 target-i386/cpu.c |   22 ++
 1 files changed, 22 insertions(+), 0 deletions(-)

diff --git a/target-i386/cpu.c b/target-i386/cpu.c
index 4120393..d3b9bd8 100644
--- a/target-i386/cpu.c
+++ b/target-i386/cpu.c
@@ -1651,6 +1651,16 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, 
uint32_t count,
 index =  env-cpuid_xlevel;
 }
 }
+} else if (index  0x4000) {
+if (env-cpuid_hv_level  0) {
+/* Handle Hypervisor CPUIDs */
+if (index  env-cpuid_hv_level) {
+index = env-cpuid_hv_level;
+}
+} else {
+if (index  env-cpuid_level)
+index = env-cpuid_level;
+}
 } else {
 if (index  env-cpuid_level)
 index = env-cpuid_level;
@@ -1789,6 +1799,18 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, 
uint32_t count,
 *edx = 0;
 }
 break;
+case 0x4000:
+*eax = env-cpuid_hv_level;
+*ebx = 0;
+*ecx = 0;
+*edx = 0;
+break;
+case 0x4001:
+*eax = env-cpuid_kvm_features;
+*ebx = 0;
+*ecx = 0;
+*edx = 0;
+break;
 case 0x8000:
 *eax = env-cpuid_xlevel;
 *ebx = env-cpuid_vendor1;
-- 
1.7.1

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH v3 01/17] target-i386: Allow tsc-frequency to be larger then 2.147G

The check using INT_MAX (2147483647) is wrong in this case.

Signed-off-by: Fred Oliveira folive...@cloudswitch.com
Signed-off-by: Don Slutz d...@cloudswitch.com
---
 target-i386/cpu.c |2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/target-i386/cpu.c b/target-i386/cpu.c
index af50a8f..0313cf5 100644
--- a/target-i386/cpu.c
+++ b/target-i386/cpu.c
@@ -1146,7 +1146,7 @@ static void x86_cpuid_set_tsc_freq(Object *obj, Visitor 
*v, void *opaque,
 {
 X86CPU *cpu = X86_CPU(obj);
 const int64_t min = 0;
-const int64_t max = INT_MAX;
+const int64_t max = INT64_MAX;
 int64_t value;
 
 visit_type_freq(v, value, name, errp);
-- 
1.7.1

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH v3 04/17] target-i386: Add cpu object access routines for Hypervisor level.

These are modeled after x86_cpuid_get_xlevel and x86_cpuid_set_xlevel.

Signed-off-by: Don Slutz d...@cloudswitch.com
---
 target-i386/cpu.c |   28 
 1 files changed, 28 insertions(+), 0 deletions(-)

diff --git a/target-i386/cpu.c b/target-i386/cpu.c
index 5f9866a..0e4a18d 100644
--- a/target-i386/cpu.c
+++ b/target-i386/cpu.c
@@ -1166,6 +1166,31 @@ static void x86_cpuid_set_tsc_freq(Object *obj, Visitor 
*v, void *opaque,
 cpu-env.tsc_khz = value / 1000;
 }
 
+static void x86_cpuid_get_hv_level(Object *obj, Visitor *v, void *opaque,
+const char *name, Error **errp)
+{
+X86CPU *cpu = X86_CPU(obj);
+
+visit_type_uint32(v, cpu-env.cpuid_hv_level, name, errp);
+}
+
+static void x86_cpuid_set_hv_level(Object *obj, Visitor *v, void *opaque,
+const char *name, Error **errp)
+{
+X86CPU *cpu = X86_CPU(obj);
+uint32_t value;
+
+visit_type_uint32(v, value, name, errp);
+if (error_is_set(errp)) {
+return;
+}
+
+if ((value != 0)  (value  0x4000)) {
+value += 0x4000;
+}
+cpu-env.cpuid_hv_level = value;
+}
+
 #if !defined(CONFIG_USER_ONLY)
 static void x86_get_hv_spinlocks(Object *obj, Visitor *v, void *opaque,
  const char *name, Error **errp)
@@ -2061,6 +2086,9 @@ static void x86_cpu_initfn(Object *obj)
 object_property_add(obj, enforce, bool,
 x86_cpuid_get_enforce,
 x86_cpuid_set_enforce, NULL, NULL, NULL);
+object_property_add(obj, hypervisor-level, int,
+x86_cpuid_get_hv_level,
+x86_cpuid_set_hv_level, NULL, NULL, NULL);
 #if !defined(CONFIG_USER_ONLY)
 object_property_add(obj, hv_spinlocks, int,
 x86_get_hv_spinlocks,
-- 
1.7.1

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH v3 06/17] target-i386: Use Hypervisor level in -machine pc,accel=kvm.

Signed-off-by: Don Slutz d...@cloudswitch.com
---
 target-i386/kvm.c |4 ++--
 1 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/target-i386/kvm.c b/target-i386/kvm.c
index 895d848..bf27793 100644
--- a/target-i386/kvm.c
+++ b/target-i386/kvm.c
@@ -389,12 +389,12 @@ int kvm_arch_init_vcpu(CPUX86State *env)
 c = cpuid_data.entries[cpuid_i++];
 memset(c, 0, sizeof(*c));
 c-function = KVM_CPUID_SIGNATURE;
-if (!hyperv_enabled()) {
+if (env-cpuid_hv_level == 0) {
 memcpy(signature, KVMKVMKVM\0\0\0, 12);
 c-eax = 0;
 } else {
 memcpy(signature, Microsoft Hv, 12);
-c-eax = HYPERV_CPUID_MIN;
+c-eax = env-cpuid_hv_level;
 }
 c-ebx = signature[0];
 c-ecx = signature[1];
-- 
1.7.1

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH v3 06/17] target-i386: Use Hypervisor level in -machine pc,accel=kvm.

Also known as Paravirtualization level.

This change is based on:

Microsoft Hypervisor CPUID Leaves:
  
http://msdn.microsoft.com/en-us/library/windows/hardware/ff542428%28v=vs.85%29.aspx

Linux kernel change starts with:
  http://fixunix.com/kernel/538707-use-cpuid-communicate-hypervisor.html
Also:
  http://lkml.indiana.edu/hypermail/linux/kernel/1205.0/00100.html

VMware documention on CPUIDs (Mechanisms to determine if software is
running in a VMware virtual machine):
  
http://kb.vmware.com/selfservice/microsites/search.do?language=en_UScmd=displayKCexternalId=1009458

QEMU knows this is KVM_CPUID_SIGNATURE (0x4000).

Signed-off-by: Don Slutz d...@cloudswitch.com
---
 target-i386/kvm.c |4 ++--
 1 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/target-i386/kvm.c b/target-i386/kvm.c
index 895d848..bf27793 100644
--- a/target-i386/kvm.c
+++ b/target-i386/kvm.c
@@ -389,12 +389,12 @@ int kvm_arch_init_vcpu(CPUX86State *env)
 c = cpuid_data.entries[cpuid_i++];
 memset(c, 0, sizeof(*c));
 c-function = KVM_CPUID_SIGNATURE;
-if (!hyperv_enabled()) {
+if (env-cpuid_hv_level == 0) {
 memcpy(signature, KVMKVMKVM\0\0\0, 12);
 c-eax = 0;
 } else {
 memcpy(signature, Microsoft Hv, 12);
-c-eax = HYPERV_CPUID_MIN;
+c-eax = env-cpuid_hv_level;
 }
 c-ebx = signature[0];
 c-ecx = signature[1];
-- 
1.7.1

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH v3 03/17] target-i386: Add Hypervisor level.

Also known as Paravirtualization level or maximim cpuid function present in 
this leaf.
This is just the EAX value for 0x4000.

QEMU knows this is KVM_CPUID_SIGNATURE (0x4000).

This is based on:

Microsoft Hypervisor CPUID Leaves:
  
http://msdn.microsoft.com/en-us/library/windows/hardware/ff542428%28v=vs.85%29.aspx

Linux kernel change starts with:
  http://fixunix.com/kernel/538707-use-cpuid-communicate-hypervisor.html
Also:
  http://lkml.indiana.edu/hypermail/linux/kernel/1205.0/00100.html

VMware documention on CPUIDs (Mechanisms to determine if software is
running in a VMware virtual machine):
  
http://kb.vmware.com/selfservice/microsites/search.do?language=en_UScmd=displayKCexternalId=1009458

QEMU has the value HYPERV_CPUID_MIN defined.

Signed-off-by: Don Slutz d...@cloudswitch.com
---
 target-i386/cpu.h |2 ++
 1 files changed, 2 insertions(+), 0 deletions(-)

diff --git a/target-i386/cpu.h b/target-i386/cpu.h
index 5265c5a..05c0848 100644
--- a/target-i386/cpu.h
+++ b/target-i386/cpu.h
@@ -782,6 +782,8 @@ typedef struct CPUX86State {
 uint32_t cpuid_ext4_features;
 /* Flags from CPUID[EAX=7,ECX=0].EBX */
 uint32_t cpuid_7_0_ebx;
+/* Hypervisor CPUIDs */
+uint32_t cpuid_hv_level;
 
 /* MTRRs */
 uint64_t mtrr_fixed[11];
-- 
1.7.1

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH v3 02/17] target-i386: Add missing kvm bits.

Fix duplicate name (kvmclock = kvm_clock2) also.

Signed-off-by: Don Slutz d...@cloudswitch.com
---
 target-i386/cpu.c |   12 
 1 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/target-i386/cpu.c b/target-i386/cpu.c
index 0313cf5..5f9866a 100644
--- a/target-i386/cpu.c
+++ b/target-i386/cpu.c
@@ -87,10 +87,14 @@ static const char *ext3_feature_name[] = {
 };
 
 static const char *kvm_feature_name[] = {
-kvmclock, kvm_nopiodelay, kvm_mmu, kvmclock, kvm_asyncpf, NULL, 
kvm_pv_eoi, NULL,
-NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
-NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
-NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+kvmclock, kvm_nopiodelay, kvm_mmu, kvm_clock2,
+kvm_asyncpf, kvm_steal_time, kvm_pv_eoi, NULL,
+NULL, NULL, NULL, NULL,
+NULL, NULL, NULL, NULL,
+NULL, NULL, NULL, NULL,
+NULL, NULL, NULL, NULL,
+kvm_clock_stable, NULL, NULL, NULL,
+NULL, NULL, NULL, NULL,
 };
 
 static const char *svm_feature_name[] = {
-- 
1.7.1

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [RFC][PATCH] Improving directed yield scalability for PLE handler

2012-09-17 Thread Andrew Jones

On Sat, Sep 15, 2012 at 09:38:54PM +0530, Raghavendra K T wrote:
 On 09/14/2012 10:40 PM, Andrew Jones wrote:
 On Thu, Sep 13, 2012 at 04:30:58PM -0500, Andrew Theurer wrote:
 On Thu, 2012-09-13 at 17:18 +0530, Raghavendra K T wrote:
 * Andrew Theurerhaban...@linux.vnet.ibm.com  [2012-09-11 13:27:41]:
 
 [...]
 
 On picking a better vcpu to yield to:  I really hesitate to rely on
 paravirt hint [telling us which vcpu is holding a lock], but I am not
 sure how else to reduce the candidate vcpus to yield to.  I suspect we
 are yielding to way more vcpus than are prempted lock-holders, and that
 IMO is just work accomplishing nothing.  Trying to think of way to
 further reduce candidate vcpus
 
 
 wrt to yielding to vcpus for the same cpu, I recently noticed that
 there's a bug in yield_to_task_fair. yield_task_fair() calls
 clear_buddies(), so if we're yielding to a task that has been running on
 the same cpu that we're currently running on, and thus is also on the
 current cfs runqueue, then our 'who to pick next' hint is getting cleared
 right after we set it.
 
 I had hoped that the patch below would show a general improvement in the
 vpu overcommit performance, however the results were variable - no worse,
 no better. Based on your results above showing good improvement from
 interleaving vcpus across the cpus, then that means there was a decent
 percent of these types of yields going on. So since the patch didn't
 change much that indicates that the next hinting isn't generally taken
 too seriously by the scheduler.  Anyway, the patch should correct the
 code per its design, and testing shows that it didn't make anything worse,
 so I'll post it soon. Also, in order to try and improve how far set-next
 can jump ahead in the queue, I tested a kernel with group scheduling
 compiled out (libvirt uses cgroups and I'm not sure autogroups may affect
 things). I did get slight improvement with that, but nothing to write home
 to mom about.
 
 diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
 index c219bf8..7d8a21d 100644
 --- a/kernel/sched/fair.c
 +++ b/kernel/sched/fair.c
 @@ -3037,11 +3037,12 @@ static bool yield_to_task_fair(struct rq *rq, struct 
 task_struct *p, bool preemp
  if (!se-on_rq || throttled_hierarchy(cfs_rq_of(se)))
  return false;
 
 +/* We're yielding, so tell the scheduler we don't want to be picked */
 +yield_task_fair(rq);
 +
  /* Tell the scheduler that we'd really like pse to run next. */
  set_next_buddy(se);
 
 -yield_task_fair(rq);
 -
  return true;
   }
 
 
 Hi Drew,  Agree with your fix and tested the patch too.. results are
 pretty much same.  puzzled why so.

Looking at the code I see that the next hint might be used more frequently
if we bump up sysctl/kernel.sched_wakeup_granularity_ns. I also just found
out that some virt tuned profiles do that, so maybe I should try running
with one of those profiles.

 
 thinking ... may be we hit this when #vcpu (of a  VM)  #pcpu?
 (pigeonhole principle ;)).

Not sure, but I haven't done any experiments where a single VM has 
#vcpus than the system as pcpus. For my vcpu overcommit I increase the
VM count, where each VM has #vcpus = #pcpus.

Drew
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH v3 00/17] * SUBJECT HERE *


forgot to delete the backup versions. :(
-Don
On 09/17/12 09:39, Don Slutz wrote:

*** BLURB HERE ***

Don Slutz (17):
   target-i386: Allow tsc-frequency to be larger then 2.147G
   target-i386: Add missing kvm bits.
   target-i386: Add Hypervisor level.
   target-i386: Add cpu object access routines for Hypervisor level.
   target-i386: Add x86_set_hyperv.
   target-i386: Use Hypervisor level in -machine pc,accel=kvm.
   target-i386: Use Hypervisor level in -machine pc,accel=tcg.
   target-i386: Add Hypervisor vendor.
   target-i386: Add cpu object access routines for Hypervisor vendor.
   target-i386: Use Hypervisor vendor in -machine pc,accel=kvm.
   target-i386: Use Hypervisor vendor in -machine pc,accel=tcg.
   target-i386: Add some known names to Hypervisor vendor.
   target-i386: Add optional Hypervisor leaf extra.
   target-i386: Add cpu object access routines for Hypervisor leaf
 extra.
   target-i386: Add setting of Hypervisor leaf extra for known vmare4.
   target-i386: Use Hypervisor leaf extra in -machine pc,accel=kvm.
   target-i386: Use Hypervisor leaf extra in -machine pc,accel=tcg.

  target-i386/cpu.c |  261 -
  target-i386/cpu.h |   21 +
  target-i386/kvm.c |   33 ++--
  3 files changed, 304 insertions(+), 11 deletions(-)



--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH -v2 2/2] make the compaction skip ahead logic robust

2012-09-17 Thread Rik van Riel


On 09/15/2012 11:55 AM, Richard Davies wrote:

Hi Rik, Mel and Shaohua,

Thank you for your latest patches. I attach my latest perf report for a slow
boot with all of these applied.

Mel asked for timings of the slow boots. It's very hard to give anything
useful here! A normal boot would be a minute or so, and many are like that,
but the slowest that I have seen (on 3.5.x) was several hours. Basically, I
just test many times until I get one which is noticeably slow than normal
and then run perf record on that one.

The latest perf report for a slow boot is below. For the fast boots, most of
the time is in clean_page_c in do_huge_pmd_anonymous_page, but for this slow
one there is a lot of lock contention above that.


How often do you run into slow boots, vs. fast ones?


# Overhead  Command Shared Object   
   Symbol
#   ...    
..
#
 58.49% qemu-kvm  [kernel.kallsyms] [k] _raw_spin_lock_irqsave
|
--- _raw_spin_lock_irqsave
   |
   |--95.07%-- compact_checklock_irqsave
   |  |
   |  |--70.03%-- isolate_migratepages_range
   |  |  compact_zone
   |  |  compact_zone_order
   |  |  try_to_compact_pages
   |  |  __alloc_pages_direct_compact
   |  |  __alloc_pages_nodemask


Looks like it moved from isolate_freepages_block in your last
trace, to isolate_migratepages_range?

Mel, I wonder if we have any quadratic complexity problems
in this part of the code, too?

The isolate_freepages_block CPU use can be fixed by simply
restarting where the last invocation left off, instead of
always starting at the end of the zone.  Could we need
something similar for isolate_migratepages_range?

After all, Richard has a 128GB system, and runs 108GB worth
of KVM guests on it...

--
All rights reversed
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH v3 00/17] * SUBJECT HERE *


On 09/17/12 09:49, Don Slutz wrote:

forgot to delete the backup versions. :(
-Don
On 09/17/12 09:39, Don Slutz wrote:

Here is the planned cover letter:

From 7c0a80d8e870da981786b7235d3a968024c89abb Mon Sep 17 00:00:00 2001
In-Reply-To: 1346354435-21685-1-git-send-email-...@cloudswitch.com
References: 1346354435-21685-1-git-send-email-...@cloudswitch.com
From: Don Slutz d...@cloudswitch.com
Date: Mon, 17 Sep 2012 09:23:29 -0400
Subject: [PATCH v3 00/17] Allow changing of Hypervisor CPUIDs.

Also known as Paravirtualization CPUIDs.

This is primarily done so that the guest will think it is running
under vmware when hypervisor-vendor=vmware is specified as a
property of a cpu.


This depends on:

http://lists.gnu.org/archive/html/qemu-devel/2012-09/msg01400.html

As far as I know it is #4. It depends on (1) and (2) and (3).

This change is based on:

Microsoft Hypervisor CPUID Leaves:
http://msdn.microsoft.com/en-us/library/windows/hardware/ff542428%28v=vs.85%29.aspx

Linux kernel change starts with:
http://fixunix.com/kernel/538707-use-cpuid-communicate-hypervisor.html
Also:
  http://lkml.indiana.edu/hypermail/linux/kernel/1205.0/00100.html

VMware documention on CPUIDs (Mechanisms to determine if software is
running in a VMware virtual machine):
http://kb.vmware.com/selfservice/microsites/search.do?language=en_UScmd=displayKCexternalId=1009458

Changes from v1 to v2:

1) Added 1/4 from 
http://lists.gnu.org/archive/html/qemu-devel/2012-08/msg05153.html


   Because Fred is changing jobs and so will not be pushing to get
   this in. It needed to be rebased, And I needed it to complete the
   testing of this change.

2) Added 2/4 because of the re-work I needed a way to clear all KVM bits,

3) The rework of v1.  Make it fit into the object model re-work of cpu.c 
for x86.


4) Added 3/4 -- The split out of the code that is not needed for accel=kvm.

Changes from v2 to v3:

Marcelo Tosatti:
  Its one big patch, better split in logically correlated patches
  (with better changelog). This would help reviewers.

So split 3 and 4 into 3 to 17.  More info in change log.
No code change.

Don Slutz (17):
  target-i386: Allow tsc-frequency to be larger then 2.147G
  target-i386: Add missing kvm bits.
  target-i386: Add Hypervisor level.
  target-i386: Add cpu object access routines for Hypervisor level.
  target-i386: Add x86_set_hyperv.
  target-i386: Use Hypervisor level in -machine pc,accel=kvm.
  target-i386: Use Hypervisor level in -machine pc,accel=tcg.
  target-i386: Add Hypervisor vendor.
  target-i386: Add cpu object access routines for Hypervisor vendor.
  target-i386: Use Hypervisor vendor in -machine pc,accel=kvm.
  target-i386: Use Hypervisor vendor in -machine pc,accel=tcg.
  target-i386: Add some known names to Hypervisor vendor.
  target-i386: Add optional Hypervisor leaf extra.
  target-i386: Add cpu object access routines for Hypervisor leaf
extra.
  target-i386: Add setting of Hypervisor leaf extra for known vmare4.
  target-i386: Use Hypervisor leaf extra in -machine pc,accel=kvm.
  target-i386: Use Hypervisor leaf extra in -machine pc,accel=tcg.

 target-i386/cpu.c |  261 
-

 target-i386/cpu.h |   21 +
 target-i386/kvm.c |   33 ++--
 3 files changed, 304 insertions(+), 11 deletions(-)

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH v2] target-i386: Report on error during cpu_x86_register().

Send it to stderr before free of the details.

Signed-off-by: Don Slutz d...@cloudswitch.com
---
v2: Change __FUNCTION__ to __func__

 target-i386/cpu.c |1 +
 1 files changed, 1 insertions(+), 0 deletions(-)

diff --git a/target-i386/cpu.c b/target-i386/cpu.c
index 57c064f..760cca4 100644
--- a/target-i386/cpu.c
+++ b/target-i386/cpu.c
@@ -1352,6 +1352,7 @@ int cpu_x86_register(X86CPU *cpu, const char *cpu_model)
 }
 object_property_set_str(OBJECT(cpu), def-model_id, model-id, error);
 if (error_is_set(error)) {
+fprintf(stderr, %s: %s\n, __func__, error_get_pretty(error));
 error_free(error);
 return -1;
 }
-- 
1.7.1

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH v3 00/17] Allow changing of Hypervisor CPUIDs.

Resend with new id so the backup files are not included.

Also known as Paravirtualization CPUIDs.

This is primarily done so that the guest will think it is running
under vmware when hypervisor-vendor=vmware is specified as a
property of a cpu.

This depends on:

http://lists.gnu.org/archive/html/qemu-devel/2012-09/msg01400.html

As far as I know it is #4. It depends on (1) and (2) and (3).

This change is based on:

Microsoft Hypervisor CPUID Leaves:

http://msdn.microsoft.com/en-us/library/windows/hardware/ff542428%28v=vs.85%29.aspx

Linux kernel change starts with:
http://fixunix.com/kernel/538707-use-cpuid-communicate-hypervisor.html
Also:
http://lkml.indiana.edu/hypermail/linux/kernel/1205.0/00100.html

VMware documention on CPUIDs (Mechanisms to determine if software is
running in a VMware virtual machine):

http://kb.vmware.com/selfservice/microsites/search.do?language=en_UScmd=displayKCexternalId=1009458

Changes from v1 to v2:

1) Added 1/4 from
http://lists.gnu.org/archive/html/qemu-devel/2012-08/msg05153.html

Because Fred is changing jobs and so will not be pushing to get
this in. It needed to be rebased, And I needed it to complete the
testing of this change.

2) Added 2/4 because of the re-work I needed a way to clear all KVM bits,

3) The rework of v1. Make it fit into the object model re-work of cpu.c for
x86.

4) Added 3/4 -- The split out of the code that is not needed for accel=kvm.

Changes from v2 to v3:

Marcelo Tosatti:
Its one big patch, better split in logically correlated patches
(with better changelog). This would help reviewers.

So split 3 and 4 into 3 to 17. More info in change log.
No code change.

target-i386/cpu.c | 261 -
target-i386/cpu.h | 21 +
target-i386/kvm.c | 33 ++--
3 files changed, 304 insertions(+), 11 deletions(-)

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html

[PATCH v3 01/17] target-i386: Allow tsc-frequency to be larger then 2.147G

The check using INT_MAX (2147483647) is wrong in this case.

Signed-off-by: Fred Oliveira folive...@cloudswitch.com
Signed-off-by: Don Slutz d...@cloudswitch.com
---
 target-i386/cpu.c |2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/target-i386/cpu.c b/target-i386/cpu.c
index af50a8f..0313cf5 100644
--- a/target-i386/cpu.c
+++ b/target-i386/cpu.c
@@ -1146,7 +1146,7 @@ static void x86_cpuid_set_tsc_freq(Object *obj, Visitor 
*v, void *opaque,
 {
 X86CPU *cpu = X86_CPU(obj);
 const int64_t min = 0;
-const int64_t max = INT_MAX;
+const int64_t max = INT64_MAX;
 int64_t value;
 
 visit_type_freq(v, value, name, errp);
-- 
1.7.1

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH v3 02/17] target-i386: Add missing kvm bits.

Fix duplicate name (kvmclock = kvm_clock2) also.

Signed-off-by: Don Slutz d...@cloudswitch.com
---
 target-i386/cpu.c |   12 
 1 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/target-i386/cpu.c b/target-i386/cpu.c
index 0313cf5..5f9866a 100644
--- a/target-i386/cpu.c
+++ b/target-i386/cpu.c
@@ -87,10 +87,14 @@ static const char *ext3_feature_name[] = {
 };
 
 static const char *kvm_feature_name[] = {
-kvmclock, kvm_nopiodelay, kvm_mmu, kvmclock, kvm_asyncpf, NULL, 
kvm_pv_eoi, NULL,
-NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
-NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
-NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+kvmclock, kvm_nopiodelay, kvm_mmu, kvm_clock2,
+kvm_asyncpf, kvm_steal_time, kvm_pv_eoi, NULL,
+NULL, NULL, NULL, NULL,
+NULL, NULL, NULL, NULL,
+NULL, NULL, NULL, NULL,
+NULL, NULL, NULL, NULL,
+kvm_clock_stable, NULL, NULL, NULL,
+NULL, NULL, NULL, NULL,
 };
 
 static const char *svm_feature_name[] = {
-- 
1.7.1

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH v3 03/17] target-i386: Add Hypervisor level.

Also known as Paravirtualization level or maximim cpuid function present in 
this leaf.
This is just the EAX value for 0x4000.

QEMU knows this is KVM_CPUID_SIGNATURE (0x4000).

This is based on:

Microsoft Hypervisor CPUID Leaves:
  
http://msdn.microsoft.com/en-us/library/windows/hardware/ff542428%28v=vs.85%29.aspx

Linux kernel change starts with:
  http://fixunix.com/kernel/538707-use-cpuid-communicate-hypervisor.html
Also:
  http://lkml.indiana.edu/hypermail/linux/kernel/1205.0/00100.html

VMware documention on CPUIDs (Mechanisms to determine if software is
running in a VMware virtual machine):
  
http://kb.vmware.com/selfservice/microsites/search.do?language=en_UScmd=displayKCexternalId=1009458

QEMU has the value HYPERV_CPUID_MIN defined.

Signed-off-by: Don Slutz d...@cloudswitch.com
---
 target-i386/cpu.h |2 ++
 1 files changed, 2 insertions(+), 0 deletions(-)

diff --git a/target-i386/cpu.h b/target-i386/cpu.h
index 5265c5a..05c0848 100644
--- a/target-i386/cpu.h
+++ b/target-i386/cpu.h
@@ -782,6 +782,8 @@ typedef struct CPUX86State {
 uint32_t cpuid_ext4_features;
 /* Flags from CPUID[EAX=7,ECX=0].EBX */
 uint32_t cpuid_7_0_ebx;
+/* Hypervisor CPUIDs */
+uint32_t cpuid_hv_level;
 
 /* MTRRs */
 uint64_t mtrr_fixed[11];
-- 
1.7.1

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH v3 04/17] target-i386: Add cpu object access routines for Hypervisor level.

These are modeled after x86_cpuid_get_xlevel and x86_cpuid_set_xlevel.

Signed-off-by: Don Slutz d...@cloudswitch.com
---
 target-i386/cpu.c |   28 
 1 files changed, 28 insertions(+), 0 deletions(-)

diff --git a/target-i386/cpu.c b/target-i386/cpu.c
index 5f9866a..0e4a18d 100644
--- a/target-i386/cpu.c
+++ b/target-i386/cpu.c
@@ -1166,6 +1166,31 @@ static void x86_cpuid_set_tsc_freq(Object *obj, Visitor 
*v, void *opaque,
 cpu-env.tsc_khz = value / 1000;
 }
 
+static void x86_cpuid_get_hv_level(Object *obj, Visitor *v, void *opaque,
+const char *name, Error **errp)
+{
+X86CPU *cpu = X86_CPU(obj);
+
+visit_type_uint32(v, cpu-env.cpuid_hv_level, name, errp);
+}
+
+static void x86_cpuid_set_hv_level(Object *obj, Visitor *v, void *opaque,
+const char *name, Error **errp)
+{
+X86CPU *cpu = X86_CPU(obj);
+uint32_t value;
+
+visit_type_uint32(v, value, name, errp);
+if (error_is_set(errp)) {
+return;
+}
+
+if ((value != 0)  (value  0x4000)) {
+value += 0x4000;
+}
+cpu-env.cpuid_hv_level = value;
+}
+
 #if !defined(CONFIG_USER_ONLY)
 static void x86_get_hv_spinlocks(Object *obj, Visitor *v, void *opaque,
  const char *name, Error **errp)
@@ -2061,6 +2086,9 @@ static void x86_cpu_initfn(Object *obj)
 object_property_add(obj, enforce, bool,
 x86_cpuid_get_enforce,
 x86_cpuid_set_enforce, NULL, NULL, NULL);
+object_property_add(obj, hypervisor-level, int,
+x86_cpuid_get_hv_level,
+x86_cpuid_set_hv_level, NULL, NULL, NULL);
 #if !defined(CONFIG_USER_ONLY)
 object_property_add(obj, hv_spinlocks, int,
 x86_get_hv_spinlocks,
-- 
1.7.1

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH v3 05/17] target-i386: Add x86_set_hyperv.

This is used to set the cpu object's hypervisor level to the default for 
Microsoft's Hypervisor.

Signed-off-by: Don Slutz d...@cloudswitch.com
---
 target-i386/cpu.c |   10 ++
 1 files changed, 10 insertions(+), 0 deletions(-)

diff --git a/target-i386/cpu.c b/target-i386/cpu.c
index 0e4a18d..4120393 100644
--- a/target-i386/cpu.c
+++ b/target-i386/cpu.c
@@ -1192,6 +1192,13 @@ static void x86_cpuid_set_hv_level(Object *obj, Visitor 
*v, void *opaque,
 }
 
 #if !defined(CONFIG_USER_ONLY)
+static void x86_set_hyperv(Object *obj, Error **errp)
+{
+X86CPU *cpu = X86_CPU(obj);
+
+cpu-env.cpuid_hv_level = HYPERV_CPUID_MIN;
+}
+
 static void x86_get_hv_spinlocks(Object *obj, Visitor *v, void *opaque,
  const char *name, Error **errp)
 {
@@ -1214,6 +1221,7 @@ static void x86_set_hv_spinlocks(Object *obj, Visitor *v, 
void *opaque,
 return;
 }
 hyperv_set_spinlock_retries(value);
+x86_set_hyperv(obj, errp);
 }
 
 static void x86_get_hv_relaxed(Object *obj, Visitor *v, void *opaque,
@@ -1234,6 +1242,7 @@ static void x86_set_hv_relaxed(Object *obj, Visitor *v, 
void *opaque,
 return;
 }
 hyperv_enable_relaxed_timing(value);
+x86_set_hyperv(obj, errp);
 }
 
 static void x86_get_hv_vapic(Object *obj, Visitor *v, void *opaque,
@@ -1254,6 +1263,7 @@ static void x86_set_hv_vapic(Object *obj, Visitor *v, 
void *opaque,
 return;
 }
 hyperv_enable_vapic_recommended(value);
+x86_set_hyperv(obj, errp);
 }
 #endif
 
-- 
1.7.1

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH v3 06/17] target-i386: Use Hypervisor level in -machine pc,accel=kvm.

Also known as Paravirtualization level.

This change is based on:

Microsoft Hypervisor CPUID Leaves:
  
http://msdn.microsoft.com/en-us/library/windows/hardware/ff542428%28v=vs.85%29.aspx

Linux kernel change starts with:
  http://fixunix.com/kernel/538707-use-cpuid-communicate-hypervisor.html
Also:
  http://lkml.indiana.edu/hypermail/linux/kernel/1205.0/00100.html

VMware documention on CPUIDs (Mechanisms to determine if software is
running in a VMware virtual machine):
  
http://kb.vmware.com/selfservice/microsites/search.do?language=en_UScmd=displayKCexternalId=1009458

QEMU knows this is KVM_CPUID_SIGNATURE (0x4000).

Signed-off-by: Don Slutz d...@cloudswitch.com
---
 target-i386/kvm.c |4 ++--
 1 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/target-i386/kvm.c b/target-i386/kvm.c
index 895d848..bf27793 100644
--- a/target-i386/kvm.c
+++ b/target-i386/kvm.c
@@ -389,12 +389,12 @@ int kvm_arch_init_vcpu(CPUX86State *env)
 c = cpuid_data.entries[cpuid_i++];
 memset(c, 0, sizeof(*c));
 c-function = KVM_CPUID_SIGNATURE;
-if (!hyperv_enabled()) {
+if (env-cpuid_hv_level == 0) {
 memcpy(signature, KVMKVMKVM\0\0\0, 12);
 c-eax = 0;
 } else {
 memcpy(signature, Microsoft Hv, 12);
-c-eax = HYPERV_CPUID_MIN;
+c-eax = env-cpuid_hv_level;
 }
 c-ebx = signature[0];
 c-ecx = signature[1];
-- 
1.7.1

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH v3 07/17] target-i386: Use Hypervisor level in -machine pc,accel=tcg.

Also known as Paravirtualization level.

This change is based on:

Microsoft Hypervisor CPUID Leaves:
  
http://msdn.microsoft.com/en-us/library/windows/hardware/ff542428%28v=vs.85%29.aspx

Linux kernel change starts with:
  http://fixunix.com/kernel/538707-use-cpuid-communicate-hypervisor.html
Also:
  http://lkml.indiana.edu/hypermail/linux/kernel/1205.0/00100.html

VMware documention on CPUIDs (Mechanisms to determine if software is
running in a VMware virtual machine):
  
http://kb.vmware.com/selfservice/microsites/search.do?language=en_UScmd=displayKCexternalId=1009458

QEMU knows this is KVM_CPUID_SIGNATURE (0x4000).

This does not provide vendor support in tcg yet.

Signed-off-by: Don Slutz d...@cloudswitch.com
---
 target-i386/cpu.c |   22 ++
 1 files changed, 22 insertions(+), 0 deletions(-)

diff --git a/target-i386/cpu.c b/target-i386/cpu.c
index 4120393..d3b9bd8 100644
--- a/target-i386/cpu.c
+++ b/target-i386/cpu.c
@@ -1651,6 +1651,16 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, 
uint32_t count,
 index =  env-cpuid_xlevel;
 }
 }
+} else if (index  0x4000) {
+if (env-cpuid_hv_level  0) {
+/* Handle Hypervisor CPUIDs */
+if (index  env-cpuid_hv_level) {
+index = env-cpuid_hv_level;
+}
+} else {
+if (index  env-cpuid_level)
+index = env-cpuid_level;
+}
 } else {
 if (index  env-cpuid_level)
 index = env-cpuid_level;
@@ -1789,6 +1799,18 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, 
uint32_t count,
 *edx = 0;
 }
 break;
+case 0x4000:
+*eax = env-cpuid_hv_level;
+*ebx = 0;
+*ecx = 0;
+*edx = 0;
+break;
+case 0x4001:
+*eax = env-cpuid_kvm_features;
+*ebx = 0;
+*ecx = 0;
+*edx = 0;
+break;
 case 0x8000:
 *eax = env-cpuid_xlevel;
 *ebx = env-cpuid_vendor1;
-- 
1.7.1

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH v3 08/17] target-i386: Add Hypervisor vendor.

Also known as Paravirtualization vendor.
This is EBX, ECX, EDX data for 0x4000.

QEMU knows this is KVM_CPUID_SIGNATURE (0x4000).

This is based on:

Microsoft Hypervisor CPUID Leaves:
  
http://msdn.microsoft.com/en-us/library/windows/hardware/ff542428%28v=vs.85%29.aspx

Linux kernel change starts with:
  http://fixunix.com/kernel/538707-use-cpuid-communicate-hypervisor.html
Also:
  http://lkml.indiana.edu/hypermail/linux/kernel/1205.0/00100.html

VMware documention on CPUIDs (Mechanisms to determine if software is
running in a VMware virtual machine):
  
http://kb.vmware.com/selfservice/microsites/search.do?language=en_UScmd=displayKCexternalId=1009458

Signed-off-by: Don Slutz d...@cloudswitch.com
---
 target-i386/cpu.h |3 +++
 1 files changed, 3 insertions(+), 0 deletions(-)

diff --git a/target-i386/cpu.h b/target-i386/cpu.h
index 05c0848..53ba4cf 100644
--- a/target-i386/cpu.h
+++ b/target-i386/cpu.h
@@ -784,6 +784,9 @@ typedef struct CPUX86State {
 uint32_t cpuid_7_0_ebx;
 /* Hypervisor CPUIDs */
 uint32_t cpuid_hv_level;
+uint32_t cpuid_hv_vendor1;
+uint32_t cpuid_hv_vendor2;
+uint32_t cpuid_hv_vendor3;
 
 /* MTRRs */
 uint64_t mtrr_fixed[11];
-- 
1.7.1

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH v3 09/17] target-i386: Add cpu object access routines for Hypervisor vendor.

These are modeled after x86_cpuid_set_vendor and x86_cpuid_get_vendor.
Since kvm's vendor is shorter, the test for correct size is removed and zero 
padding is added.

Signed-off-by: Don Slutz d...@cloudswitch.com
---
 target-i386/cpu.c |   44 
 1 files changed, 44 insertions(+), 0 deletions(-)

diff --git a/target-i386/cpu.c b/target-i386/cpu.c
index d3b9bd8..5afb188 100644
--- a/target-i386/cpu.c
+++ b/target-i386/cpu.c
@@ -1191,12 +1191,53 @@ static void x86_cpuid_set_hv_level(Object *obj, Visitor 
*v, void *opaque,
 cpu-env.cpuid_hv_level = value;
 }
 
+static char *x86_cpuid_get_hv_vendor(Object *obj, Error **errp)
+{
+X86CPU *cpu = X86_CPU(obj);
+CPUX86State *env = cpu-env;
+char *value;
+int i;
+
+value = (char *)g_malloc(CPUID_VENDOR_SZ + 1);
+for (i = 0; i  4; i++) {
+value[i + 0] = env-cpuid_hv_vendor1  (8 * i);
+value[i + 4] = env-cpuid_hv_vendor2  (8 * i);
+value[i + 8] = env-cpuid_hv_vendor3  (8 * i);
+}
+value[CPUID_VENDOR_SZ] = '\0';
+
+return value;
+}
+
+static void x86_cpuid_set_hv_vendor(Object *obj, const char *value,
+ Error **errp)
+{
+X86CPU *cpu = X86_CPU(obj);
+CPUX86State *env = cpu-env;
+int i;
+char adj_value[CPUID_VENDOR_SZ + 1];
+
+memset(adj_value, 0, sizeof(adj_value));
+
+pstrcpy(adj_value, sizeof(adj_value), value);
+
+env-cpuid_hv_vendor1 = 0;
+env-cpuid_hv_vendor2 = 0;
+env-cpuid_hv_vendor3 = 0;
+for (i = 0; i  4; i++) {
+env-cpuid_hv_vendor1 |= ((uint8_t)adj_value[i + 0])  (8 * i);
+env-cpuid_hv_vendor2 |= ((uint8_t)adj_value[i + 4])  (8 * i);
+env-cpuid_hv_vendor3 |= ((uint8_t)adj_value[i + 8])  (8 * i);
+}
+}
+
 #if !defined(CONFIG_USER_ONLY)
 static void x86_set_hyperv(Object *obj, Error **errp)
 {
 X86CPU *cpu = X86_CPU(obj);
 
 cpu-env.cpuid_hv_level = HYPERV_CPUID_MIN;
+x86_cpuid_set_hv_vendor(obj, Microsoft Hv, errp);
 }
 
 static void x86_get_hv_spinlocks(Object *obj, Visitor *v, void *opaque,
@@ -2121,6 +2162,9 @@ static void x86_cpu_initfn(Object *obj)
 object_property_add(obj, hypervisor-level, int,
 x86_cpuid_get_hv_level,
 x86_cpuid_set_hv_level, NULL, NULL, NULL);
+object_property_add_str(obj, hypervisor-vendor,
+x86_cpuid_get_hv_vendor,
+x86_cpuid_set_hv_vendor, NULL);
 #if !defined(CONFIG_USER_ONLY)
 object_property_add(obj, hv_spinlocks, int,
 x86_get_hv_spinlocks,
-- 
1.7.1

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH v3 11/17] target-i386: Use Hypervisor vendor in -machine pc,accel=tcg.

Also known as Paravirtualization vendor.

This change is based on:

Microsoft Hypervisor CPUID Leaves:
  
http://msdn.microsoft.com/en-us/library/windows/hardware/ff542428%28v=vs.85%29.aspx

Linux kernel change starts with:
  http://fixunix.com/kernel/538707-use-cpuid-communicate-hypervisor.html
Also:
  http://lkml.indiana.edu/hypermail/linux/kernel/1205.0/00100.html

VMware documention on CPUIDs (Mechanisms to determine if software is
running in a VMware virtual machine):
  
http://kb.vmware.com/selfservice/microsites/search.do?language=en_UScmd=displayKCexternalId=1009458

Signed-off-by: Don Slutz d...@cloudswitch.com
---
 target-i386/cpu.c |6 +++---
 1 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/target-i386/cpu.c b/target-i386/cpu.c
index 5afb188..1b3a472 100644
--- a/target-i386/cpu.c
+++ b/target-i386/cpu.c
@@ -1842,9 +1842,9 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, 
uint32_t count,
 break;
 case 0x4000:
 *eax = env-cpuid_hv_level;
-*ebx = 0;
-*ecx = 0;
-*edx = 0;
+*ebx = env-cpuid_hv_vendor1;
+*ecx = env-cpuid_hv_vendor2;
+*edx = env-cpuid_hv_vendor3;
 break;
 case 0x4001:
 *eax = env-cpuid_kvm_features;
-- 
1.7.1

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH v3 12/17] target-i386: Add some known names to Hypervisor vendor.

Signed-off-by: Don Slutz d...@cloudswitch.com
---
 target-i386/cpu.c |   36 +++-
 target-i386/cpu.h |   12 
 2 files changed, 47 insertions(+), 1 deletions(-)

diff --git a/target-i386/cpu.c b/target-i386/cpu.c
index 1b3a472..9ac3076 100644
--- a/target-i386/cpu.c
+++ b/target-i386/cpu.c
@@ -1206,6 +1206,20 @@ static char *x86_cpuid_get_hv_vendor(Object *obj, Error 
**errp)
 }
 value[CPUID_VENDOR_SZ] = '\0';
 
+/* Convert known names */
+if (!strcmp(value, CPUID_HV_VENDOR_VMWARE)) {
+if (env-cpuid_hv_level == CPUID_HV_LEVEL_VMARE_4) {
+pstrcpy(value, sizeof(value), vmware4);
+} else if (env-cpuid_hv_level == CPUID_HV_LEVEL_VMARE_3) {
+pstrcpy(value, sizeof(value), vmware3);
+}
+} else if (!strcmp(value, CPUID_HV_VENDOR_XEN) 
+   env-cpuid_hv_level == CPUID_HV_LEVEL_XEN) {
+pstrcpy(value, sizeof(value), xen);
+} else if (!strcmp(value, CPUID_HV_VENDOR_KVM) 
+   env-cpuid_hv_level == 0) {
+pstrcpy(value, sizeof(value), kvm);
+}
 return value;
 }
 
@@ -1219,7 +1233,27 @@ static void x86_cpuid_set_hv_vendor(Object *obj, const 
char *value,
 
 memset(adj_value, 0, sizeof(adj_value));
 
-pstrcpy(adj_value, sizeof(adj_value), value);
+/* Convert known names */
+if (!strcmp(value, vmware) || !strcmp(value, vmware4)) {
+if (env-cpuid_hv_level == 0) {
+env-cpuid_hv_level = CPUID_HV_LEVEL_VMARE_4;
+}
+pstrcpy(adj_value, sizeof(adj_value), CPUID_HV_VENDOR_VMWARE);
+} else if (!strcmp(value, vmware3)) {
+if (env-cpuid_hv_level == 0) {
+env-cpuid_hv_level = CPUID_HV_LEVEL_VMARE_3;
+}
+pstrcpy(adj_value, sizeof(adj_value), CPUID_HV_VENDOR_VMWARE);
+} else if (!strcmp(value, xen)) {
+if (env-cpuid_hv_level == 0) {
+env-cpuid_hv_level = CPUID_HV_LEVEL_XEN;
+}
+pstrcpy(adj_value, sizeof(adj_value), CPUID_HV_VENDOR_XEN);
+} else if (!strcmp(value, kvm)) {
+pstrcpy(adj_value, sizeof(adj_value), CPUID_HV_VENDOR_KVM);
+} else {
+pstrcpy(adj_value, sizeof(adj_value), value);
+}
 
 env-cpuid_hv_vendor1 = 0;
 env-cpuid_hv_vendor2 = 0;
diff --git a/target-i386/cpu.h b/target-i386/cpu.h
index 53ba4cf..47bc00c 100644
--- a/target-i386/cpu.h
+++ b/target-i386/cpu.h
@@ -488,6 +488,18 @@
 
 #define CPUID_VENDOR_VIA   CentaurHauls
 
+#define CPUID_HV_VENDOR_VMWARE_1 0x61774d56 /* VMwa */
+#define CPUID_HV_VENDOR_VMWARE_2 0x4d566572 /* reVM */
+#define CPUID_HV_VENDOR_VMWARE_3 0x65726177 /* ware */
+#define CPUID_HV_VENDOR_VMWARE VMwareVMware
+#define CPUID_HV_LEVEL_VMARE_3 0x4002
+#define CPUID_HV_LEVEL_VMARE_4 0x4010
+
+#define CPUID_HV_VENDOR_XEN XenVMMXenVMM
+#define CPUID_HV_LEVEL_XEN  0x4002
+
+#define CPUID_HV_VENDOR_KVM KVMKVMKVM
+
 #define CPUID_MWAIT_IBE (1  1) /* Interrupts can exit capability */
 #define CPUID_MWAIT_EMX (1  0) /* enumeration supported */
 
-- 
1.7.1

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH v3 13/17] target-i386: Add optional Hypervisor leaf extra.

Signed-off-by: Don Slutz d...@cloudswitch.com
---
 target-i386/cpu.h |4 
 1 files changed, 4 insertions(+), 0 deletions(-)

diff --git a/target-i386/cpu.h b/target-i386/cpu.h
index 47bc00c..a2d3588 100644
--- a/target-i386/cpu.h
+++ b/target-i386/cpu.h
@@ -799,6 +799,10 @@ typedef struct CPUX86State {
 uint32_t cpuid_hv_vendor1;
 uint32_t cpuid_hv_vendor2;
 uint32_t cpuid_hv_vendor3;
+/* VMware extra data */
+uint32_t cpuid_hv_extra;
+uint32_t cpuid_hv_extra_a;
+uint32_t cpuid_hv_extra_b;
 
 /* MTRRs */
 uint64_t mtrr_fixed[11];
-- 
1.7.1

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH v3 14/17] target-i386: Add cpu object access routines for Hypervisor leaf extra.

Signed-off-by: Don Slutz d...@cloudswitch.com
---
 target-i386/cpu.c |   66 +
 1 files changed, 66 insertions(+), 0 deletions(-)

diff --git a/target-i386/cpu.c b/target-i386/cpu.c
index 9ac3076..34d2291 100644
--- a/target-i386/cpu.c
+++ b/target-i386/cpu.c
@@ -1265,6 +1265,63 @@ static void x86_cpuid_set_hv_vendor(Object *obj, const 
char *value,
 }
 }
 
+static void x86_cpuid_get_hv_extra(Object *obj, Visitor *v, void *opaque,
+const char *name, Error **errp)
+{
+X86CPU *cpu = X86_CPU(obj);
+
+visit_type_uint32(v, cpu-env.cpuid_hv_extra, name, errp);
+}
+
+static void x86_cpuid_set_hv_extra(Object *obj, Visitor *v, void *opaque,
+const char *name, Error **errp)
+{
+X86CPU *cpu = X86_CPU(obj);
+uint32_t value;
+
+visit_type_uint32(v, value, name, errp);
+if (error_is_set(errp)) {
+return;
+}
+
+if ((value != 0)  (value  0x4000)) {
+value += 0x4000;
+}
+cpu-env.cpuid_hv_extra = value;
+}
+
+static void x86_cpuid_get_hv_extra_a(Object *obj, Visitor *v, void *opaque,
+const char *name, Error **errp)
+{
+X86CPU *cpu = X86_CPU(obj);
+
+visit_type_uint32(v, cpu-env.cpuid_hv_extra_a, name, errp);
+}
+
+static void x86_cpuid_set_hv_extra_a(Object *obj, Visitor *v, void *opaque,
+const char *name, Error **errp)
+{
+X86CPU *cpu = X86_CPU(obj);
+
+visit_type_uint32(v, cpu-env.cpuid_hv_extra_a, name, errp);
+}
+
+static void x86_cpuid_get_hv_extra_b(Object *obj, Visitor *v, void *opaque,
+const char *name, Error **errp)
+{
+X86CPU *cpu = X86_CPU(obj);
+
+visit_type_uint32(v, cpu-env.cpuid_hv_extra_b, name, errp);
+}
+
+static void x86_cpuid_set_hv_extra_b(Object *obj, Visitor *v, void *opaque,
+const char *name, Error **errp)
+{
+X86CPU *cpu = X86_CPU(obj);
+
+visit_type_uint32(v, cpu-env.cpuid_hv_extra_b, name, errp);
+}
+
 #if !defined(CONFIG_USER_ONLY)
 static void x86_set_hyperv(Object *obj, Error **errp)
 {
@@ -2199,6 +2256,15 @@ static void x86_cpu_initfn(Object *obj)
 object_property_add_str(obj, hypervisor-vendor,
 x86_cpuid_get_hv_vendor,
 x86_cpuid_set_hv_vendor, NULL);
+object_property_add(obj, hypervisor-extra, int,
+x86_cpuid_get_hv_extra,
+x86_cpuid_set_hv_extra, NULL, NULL, NULL);
+object_property_add(obj, hypervisor-extra-a, int,
+x86_cpuid_get_hv_extra_a,
+x86_cpuid_set_hv_extra_a, NULL, NULL, NULL);
+object_property_add(obj, hypervisor-extra-b, int,
+x86_cpuid_get_hv_extra_b,
+x86_cpuid_set_hv_extra_b, NULL, NULL, NULL);
 #if !defined(CONFIG_USER_ONLY)
 object_property_add(obj, hv_spinlocks, int,
 x86_get_hv_spinlocks,
-- 
1.7.1

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH v3 15/17] target-i386: Add setting of Hypervisor leaf extra for known vmare4.

This was taken from:
  http://article.gmane.org/gmane.comp.emulators.kvm.devel/22643

Signed-off-by: Don Slutz d...@cloudswitch.com
---
 target-i386/cpu.c |   32 
 1 files changed, 32 insertions(+), 0 deletions(-)

diff --git a/target-i386/cpu.c b/target-i386/cpu.c
index 34d2291..bfaee02 100644
--- a/target-i386/cpu.c
+++ b/target-i386/cpu.c
@@ -1135,6 +1135,36 @@ static void x86_cpuid_set_model_id(Object *obj, const 
char *model_id,
 }
 }
 
+static void x86_cpuid_set_vmware_extra(Object *obj)
+{
+X86CPU *cpu = X86_CPU(obj);
+
+if ((cpu-env.tsc_khz != 0) 
+(cpu-env.cpuid_hv_level == CPUID_HV_LEVEL_VMARE_4) 
+(cpu-env.cpuid_hv_vendor1 == CPUID_HV_VENDOR_VMWARE_1) 
+(cpu-env.cpuid_hv_vendor2 == CPUID_HV_VENDOR_VMWARE_2) 
+(cpu-env.cpuid_hv_vendor3 == CPUID_HV_VENDOR_VMWARE_3)) {
+const uint32_t apic_khz = 100L;
+
+/*
+ * From article.gmane.org/gmane.comp.emulators.kvm.devel/22643
+ *
+ *Leaf 0x4010, Timing Information.
+ *
+ *VMware has defined the first generic leaf to provide timing
+ *information.  This leaf returns the current TSC frequency and
+ *current Bus frequency in kHz.
+ *
+ *# EAX: (Virtual) TSC frequency in kHz.
+ *# EBX: (Virtual) Bus (local apic timer) frequency in kHz.
+ *# ECX, EDX: RESERVED (Per above, reserved fields are set to 
zero).
+ */
+cpu-env.cpuid_hv_extra = 0x4010;
+cpu-env.cpuid_hv_extra_a = (uint32_t)cpu-env.tsc_khz;
+cpu-env.cpuid_hv_extra_b = apic_khz;
+}
+}
+
 static void x86_cpuid_get_tsc_freq(Object *obj, Visitor *v, void *opaque,
const char *name, Error **errp)
 {
@@ -1164,6 +1194,7 @@ static void x86_cpuid_set_tsc_freq(Object *obj, Visitor 
*v, void *opaque,
 }
 
 cpu-env.tsc_khz = value / 1000;
+x86_cpuid_set_vmware_extra(obj);
 }
 
 static void x86_cpuid_get_hv_level(Object *obj, Visitor *v, void *opaque,
@@ -1263,6 +1294,7 @@ static void x86_cpuid_set_hv_vendor(Object *obj, const 
char *value,
 env-cpuid_hv_vendor2 |= ((uint8_t)adj_value[i + 4])  (8 * i);
 env-cpuid_hv_vendor3 |= ((uint8_t)adj_value[i + 8])  (8 * i);
 }
+x86_cpuid_set_vmware_extra(obj);
 }
 
 static void x86_cpuid_get_hv_extra(Object *obj, Visitor *v, void *opaque,
-- 
1.7.1

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH v2] target-i386: Report on error during cpu_x86_register().


On 09/17/12 10:00, Don Slutz wrote:

Send it to stderr before free of the details.

Signed-off-by: Don Slutz d...@cloudswitch.com
---
v2: Change __FUNCTION__ to __func__

  target-i386/cpu.c |1 +
  1 files changed, 1 insertions(+), 0 deletions(-)

diff --git a/target-i386/cpu.c b/target-i386/cpu.c
index 57c064f..760cca4 100644
--- a/target-i386/cpu.c
+++ b/target-i386/cpu.c
@@ -1352,6 +1352,7 @@ int cpu_x86_register(X86CPU *cpu, const char *cpu_model)
  }
  object_property_set_str(OBJECT(cpu), def-model_id, model-id, error);
  if (error_is_set(error)) {
+fprintf(stderr, %s: %s\n, __func__, error_get_pretty(error));
  error_free(error);
  return -1;
  }

I am having a lot of mistakes this morning.  Please ignore this e-mail.
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH v3 17/17] target-i386: Use Hypervisor leaf extra in -machine pc,accel=tcg.

Signed-off-by: Don Slutz d...@cloudswitch.com
---
 target-i386/cpu.c |   11 +++
 1 files changed, 11 insertions(+), 0 deletions(-)

diff --git a/target-i386/cpu.c b/target-i386/cpu.c
index bfaee02..89a45b5 100644
--- a/target-i386/cpu.c
+++ b/target-i386/cpu.c
@@ -1975,6 +1975,17 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, 
uint32_t count,
 *ecx = 0;
 *edx = 0;
 break;
+case 0x4002 ... 0x40FF:
+if (index == env-cpuid_hv_extra) {
+*eax = env-cpuid_hv_extra_a;
+*ebx = env-cpuid_hv_extra_b;
+} else {
+*eax = 0;
+*ebx = 0;
+}
+*ecx = 0;
+*edx = 0;
+break;
 case 0x8000:
 *eax = env-cpuid_xlevel;
 *ebx = env-cpuid_vendor1;
-- 
1.7.1

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH v3 16/17] target-i386: Use Hypervisor leaf extra in -machine pc,accel=kvm.

Signed-off-by: Don Slutz d...@cloudswitch.com
---
 target-i386/kvm.c |   19 +++
 1 files changed, 19 insertions(+), 0 deletions(-)

diff --git a/target-i386/kvm.c b/target-i386/kvm.c
index b8789f2..17c72bc 100644
--- a/target-i386/kvm.c
+++ b/target-i386/kvm.c
@@ -454,6 +454,25 @@ int kvm_arch_init_vcpu(CPUX86State *env)
 c-ebx = signature[0];
 c-ecx = signature[1];
 c-edx = signature[2];
+} else if (env-cpuid_hv_level  0) {
+for (i = KVM_CPUID_FEATURES + 1; i = env-cpuid_hv_level; i++) {
+c = cpuid_data.entries[cpuid_i++];
+memset(c, 0, sizeof(*c));
+c-function = i;
+if (i == env-cpuid_hv_extra) {
+c-eax = env-cpuid_hv_extra_a;
+c-ebx = env-cpuid_hv_extra_b;
+}
+}
+
+c = cpuid_data.entries[cpuid_i++];
+memset(c, 0, sizeof(*c));
+c-function = KVM_CPUID_SIGNATURE_NEXT;
+memcpy(signature, KVMKVMKVM\0\0\0, 12);
+c-eax = 0;
+c-ebx = signature[0];
+c-ecx = signature[1];
+c-edx = signature[2];
 }
 
 has_msr_async_pf_en = c-eax  (1  KVM_FEATURE_ASYNC_PF);
-- 
1.7.1

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH -v2 2/2] make the compaction skip ahead logic robust

2012-09-17 Thread Mel Gorman

On Mon, Sep 17, 2012 at 09:50:08AM -0400, Rik van Riel wrote:
 On 09/15/2012 11:55 AM, Richard Davies wrote:
 Hi Rik, Mel and Shaohua,
 
 Thank you for your latest patches. I attach my latest perf report for a slow
 boot with all of these applied.
 
 Mel asked for timings of the slow boots. It's very hard to give anything
 useful here! A normal boot would be a minute or so, and many are like that,
 but the slowest that I have seen (on 3.5.x) was several hours. Basically, I
 just test many times until I get one which is noticeably slow than normal
 and then run perf record on that one.
 
 The latest perf report for a slow boot is below. For the fast boots, most of
 the time is in clean_page_c in do_huge_pmd_anonymous_page, but for this slow
 one there is a lot of lock contention above that.
 
 How often do you run into slow boots, vs. fast ones?
 
 # Overhead  Command Shared Object
   Symbol
 #   ...    
 ..
 #
  58.49% qemu-kvm  [kernel.kallsyms] [k] 
  _raw_spin_lock_irqsave
 |
 --- _raw_spin_lock_irqsave
|
|--95.07%-- compact_checklock_irqsave
|  |
|  |--70.03%-- isolate_migratepages_range
|  |  compact_zone
|  |  compact_zone_order
|  |  try_to_compact_pages
|  |  __alloc_pages_direct_compact
|  |  __alloc_pages_nodemask
 
 Looks like it moved from isolate_freepages_block in your last
 trace, to isolate_migratepages_range?
 
 Mel, I wonder if we have any quadratic complexity problems
 in this part of the code, too?
 

Possibly but right now I'm focusing on the contention even though I recognise
that reducing the amount of scanning implicitly reduces the amount of
contention. I'm running a test at the moment with an additional patch
to record the pageblock being scanned by either the free or migrate page
scanner. This should be enough to both calculate the scanning efficiency
and how many useless blocks are scanned to determine if your skip
patches are behaving as expected and from there decide if the migrate
scanner needs similar logic.

-- 
Mel Gorman
SUSE Labs
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

KVM call for agenda for Tuesday, September 18th

2012-09-17 Thread Juan Quintela


Hi

Please send in any agenda items you are interested in covering.

Thanks, Juan.
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [Qemu-devel] Block Migration Assertion in qemu-kvm 1.2.0

2012-09-17 Thread Peter Lieven


On 09/17/12 10:41, Kevin Wolf wrote:

Am 16.09.2012 12:13, schrieb Peter Lieven:

Hi,

when trying to block migrate a VM from one node to another, the source
VM crashed with the following assertion:
block.c:3829: bdrv_set_in_use: Assertion `bs-in_use != in_use' failed.

Is this sth already addresses/known?

Not that I'm aware of, at least.

Block migration doesn't seem to check whether the device is already in
use, maybe this is the problem. Not sure why it would be in use, though,
and in my quick test it didn't crash.
It seems that it only happens if a vServer that has been block migrated 
earlier is block migrated the next time.

So we need some more information: What's you command line, did you do
anything specific in the monitor with block devices, what does the
stacktrace look like, etc.?

Here is my cmdline:
/usr/bin/qemu-kvm-1.2.0  -net 
tap,vlan=164,script=no,downscript=no,ifname=tap0  -net nic,vlan
=164,model=e1000,macaddr=52:54:00:ff:01:19   -drive 
format=host_device,file=/dev/7cf58855099771c2/lieven-storage-migration-t-hd0,if=virtio,cache=none,aio=nat
ive  -m 2048 -smp 2,sockets=1,cores=2,threads=1  -monitor 
tcp:0:4001,server,nowait -vnc :1 -qmp tcp:0:3001,server,nowait  -name 
'lieven-storage-migration-test'  -boot or
der=dc,menu=off  -k de  -incoming tcp:172.21.55.34:5001  -pidfile 
/var/run/qemu/vm-254.pid  -mem-path /hugepages  -mem-prealloc  -rtc 
base=utc -usb -usbdevice tablet -no
-hpet -vga cirrus  -cpu host,+x2apic,model_id='Intel(R) Xeon(R) 
CPU   L5640  @ 2.27GHz',-tsc


I have seen other errors as well in the meantime:
block-migration.c:471: flush_blks: Assertion `block_mig_state.read_done 
= 0' failed.
qemu-kvm-1.2.0[27851]: segfault at 7f00746e78d7 ip 7f67eca6226d sp 
7fff56ae3340 error 4 in qemu-system-x86_64[7f67ec9e9000+418000]


I will now try to catch the situation in the debugger.

Thanks,
Peter


Kevin


--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH v8 1/3] KVM: x86: export svm/vmx exit code and vector code to userspace

2012-09-17 Thread Arnaldo Carvalho de Melo

Em Mon, Sep 17, 2012 at 04:31:13PM +0800, Dong Hao escreveu:
 From: Xiao Guangrong xiaoguangr...@linux.vnet.ibm.com
 
 Exporting KVM exit information to userspace to be consumed by perf.
 
 [ Dong Hao haod...@linux.vnet.ibm.com: rebase it on acme's git tree ]
 Signed-off-by: Dong Hao haod...@linux.vnet.ibm.com
 Signed-off-by: Xiao Guangrong xiaoguangr...@linux.vnet.ibm.com

Do we have acked/reviewed-by for this parth? Marcelo? Avi?

  arch/x86/include/asm/kvm.h  |   16 +++
  arch/x86/include/asm/kvm_host.h |   16 ---
  arch/x86/include/asm/svm.h  |  205 
 +--
  arch/x86/include/asm/vmx.h  |  127 
  arch/x86/kvm/trace.h|   89 -
  5 files changed, 230 insertions(+), 223 deletions(-)
 
 diff --git a/arch/x86/include/asm/kvm.h b/arch/x86/include/asm/kvm.h
 index 246617e..41e08cb 100644
 --- a/arch/x86/include/asm/kvm.h
 +++ b/arch/x86/include/asm/kvm.h
 @@ -9,6 +9,22 @@
  #include linux/types.h
  #include linux/ioctl.h
  
 +#define DE_VECTOR 0
 +#define DB_VECTOR 1
 +#define BP_VECTOR 3
 +#define OF_VECTOR 4
 +#define BR_VECTOR 5
 +#define UD_VECTOR 6
 +#define NM_VECTOR 7
 +#define DF_VECTOR 8
 +#define TS_VECTOR 10
 +#define NP_VECTOR 11
 +#define SS_VECTOR 12
 +#define GP_VECTOR 13
 +#define PF_VECTOR 14
 +#define MF_VECTOR 16
 +#define MC_VECTOR 18
 +
  /* Select x86 specific features in linux/kvm.h */
  #define __KVM_HAVE_PIT
  #define __KVM_HAVE_IOAPIC
 diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
 index 09155d6..1eaa6b0 100644
 --- a/arch/x86/include/asm/kvm_host.h
 +++ b/arch/x86/include/asm/kvm_host.h
 @@ -75,22 +75,6 @@
  #define KVM_HPAGE_MASK(x)(~(KVM_HPAGE_SIZE(x) - 1))
  #define KVM_PAGES_PER_HPAGE(x)   (KVM_HPAGE_SIZE(x) / PAGE_SIZE)
  
 -#define DE_VECTOR 0
 -#define DB_VECTOR 1
 -#define BP_VECTOR 3
 -#define OF_VECTOR 4
 -#define BR_VECTOR 5
 -#define UD_VECTOR 6
 -#define NM_VECTOR 7
 -#define DF_VECTOR 8
 -#define TS_VECTOR 10
 -#define NP_VECTOR 11
 -#define SS_VECTOR 12
 -#define GP_VECTOR 13
 -#define PF_VECTOR 14
 -#define MF_VECTOR 16
 -#define MC_VECTOR 18
 -
  #define SELECTOR_TI_MASK (1  2)
  #define SELECTOR_RPL_MASK 0x03
  
 diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h
 index f2b83bc..cdf5674 100644
 --- a/arch/x86/include/asm/svm.h
 +++ b/arch/x86/include/asm/svm.h
 @@ -1,6 +1,135 @@
  #ifndef __SVM_H
  #define __SVM_H
  
 +#define SVM_EXIT_READ_CR0  0x000
 +#define SVM_EXIT_READ_CR3  0x003
 +#define SVM_EXIT_READ_CR4  0x004
 +#define SVM_EXIT_READ_CR8  0x008
 +#define SVM_EXIT_WRITE_CR0 0x010
 +#define SVM_EXIT_WRITE_CR3 0x013
 +#define SVM_EXIT_WRITE_CR4 0x014
 +#define SVM_EXIT_WRITE_CR8 0x018
 +#define SVM_EXIT_READ_DR0  0x020
 +#define SVM_EXIT_READ_DR1  0x021
 +#define SVM_EXIT_READ_DR2  0x022
 +#define SVM_EXIT_READ_DR3  0x023
 +#define SVM_EXIT_READ_DR4  0x024
 +#define SVM_EXIT_READ_DR5  0x025
 +#define SVM_EXIT_READ_DR6  0x026
 +#define SVM_EXIT_READ_DR7  0x027
 +#define SVM_EXIT_WRITE_DR0 0x030
 +#define SVM_EXIT_WRITE_DR1 0x031
 +#define SVM_EXIT_WRITE_DR2 0x032
 +#define SVM_EXIT_WRITE_DR3 0x033
 +#define SVM_EXIT_WRITE_DR4 0x034
 +#define SVM_EXIT_WRITE_DR5 0x035
 +#define SVM_EXIT_WRITE_DR6 0x036
 +#define SVM_EXIT_WRITE_DR7 0x037
 +#define SVM_EXIT_EXCP_BASE 0x040
 +#define SVM_EXIT_INTR  0x060
 +#define SVM_EXIT_NMI   0x061
 +#define SVM_EXIT_SMI   0x062
 +#define SVM_EXIT_INIT  0x063
 +#define SVM_EXIT_VINTR 0x064
 +#define SVM_EXIT_CR0_SEL_WRITE 0x065
 +#define SVM_EXIT_IDTR_READ 0x066
 +#define SVM_EXIT_GDTR_READ 0x067
 +#define SVM_EXIT_LDTR_READ 0x068
 +#define SVM_EXIT_TR_READ   0x069
 +#define SVM_EXIT_IDTR_WRITE0x06a
 +#define SVM_EXIT_GDTR_WRITE0x06b
 +#define SVM_EXIT_LDTR_WRITE0x06c
 +#define SVM_EXIT_TR_WRITE  0x06d
 +#define SVM_EXIT_RDTSC 0x06e
 +#define SVM_EXIT_RDPMC 0x06f
 +#define SVM_EXIT_PUSHF 0x070
 +#define SVM_EXIT_POPF  0x071
 +#define SVM_EXIT_CPUID 0x072
 +#define SVM_EXIT_RSM   0x073
 +#define SVM_EXIT_IRET  0x074
 +#define SVM_EXIT_SWINT 0x075
 +#define SVM_EXIT_INVD  0x076
 +#define SVM_EXIT_PAUSE 0x077
 +#define SVM_EXIT_HLT   0x078
 +#define SVM_EXIT_INVLPG0x079
 +#define SVM_EXIT_INVLPGA   0x07a
 +#define SVM_EXIT_IOIO  0x07b
 +#define SVM_EXIT_MSR   0x07c
 +#define SVM_EXIT_TASK_SWITCH   0x07d
 +#define SVM_EXIT_FERR_FREEZE   0x07e
 +#define SVM_EXIT_SHUTDOWN  0x07f
 +#define SVM_EXIT_VMRUN 0x080
 +#define SVM_EXIT_VMMCALL   0x081
 +#define SVM_EXIT_VMLOAD0x082
 +#define SVM_EXIT_VMSAVE0x083
 +#define SVM_EXIT_STGI  0x084
 +#define SVM_EXIT_CLGI  0x085
 +#define SVM_EXIT_SKINIT0x086
 +#define SVM_EXIT_RDTSCP0x087

Re: [PATCH v9 2/2] kvm: On Ack, De-assert Notify KVM_IRQFD extension

On Wed, 2012-09-05 at 17:57 +0300, Avi Kivity wrote:
 On 08/21/2012 10:29 PM, Alex Williamson wrote:
  For VFIO based device assignment we'd like a mechanism to allow level
  triggered interrutps to be directly injected into KVM.  KVM_IRQFD
  already allows this for edge triggered interrupts, but for level, we
  need to watch for acknowledgement of the interrupt from the guest to
  provide us a hint when to test the device and allow it to re-assert
  if necessary.  To do this, we create a new KVM_IRQFD mode called
  On Ack, De-assert  Notify, or OADN.  In this mode, an interrupt
  injection provides only a gsi assertion.  We then hook into the IRQ
  ACK notifier, which when triggered de-asserts the gsi and notifies
  via another eventfd.  It's then the responsibility of the user to
  re-assert the interrupt is service is still required.
  
  
  diff --git a/Documentation/virtual/kvm/api.txt 
  b/Documentation/virtual/kvm/api.txt
  index bf33aaa..87d7321 100644
  --- a/Documentation/virtual/kvm/api.txt
  +++ b/Documentation/virtual/kvm/api.txt
  @@ -1946,6 +1946,19 @@ the guest using the specified gsi pin.  The irqfd is 
  removed using
   the KVM_IRQFD_FLAG_DEASSIGN flag, specifying both kvm_irqfd.fd
   and kvm_irqfd.gsi.
   
  +With KVM_CAP_IRQFD_OADN, KVM_IRQFD supports an On Ack, De-assert 
  +Notify option that allows emulation of level-triggered interrupts.
  +When kvm_irqfd.fd is triggered, the requested gsi is asserted and
  +remains asserted until interaction with the irqchip indicates the
  +VM has acknowledged the interrupt, such as an EOI.  On acknoledgement
  +the gsi is automatically de-asserted and the user is notified via
  +kvm_irqfd.notifyfd.  The user is then required to re-assert the
  +interrupt if the associated device still requires service.  To enable
  +this mode, configure the KVM_IRQFD using the KVM_IRQFD_FLAG_OADN flag
  +and specify kvm_irqfd.notifyfd.  Note that closing kvm_irqfd.notifyfd
  +while configured in this mode does not disable the irqfd.  The
  +KVM_IRQFD_FLAG_OADN flag is only necessary on assignment.
 
 Under my suggested naming, this would be called a resampling irqfd,
 with resampling requested via kvm_irqfd.resamplefd.
 
  diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
  index 2245cfa..dfdb5b2 100644
  --- a/virt/kvm/eventfd.c
  +++ b/virt/kvm/eventfd.c
  @@ -43,6 +43,23 @@
* 
*/
   
  +/*
  + * OADN irqfds (On Ack, De-assert  Notify) are a special variety of
  + * irqfds that assert an interrupt to the irqchip on eventfd trigger,
  + * receieve notification when userspace acknowledges the interrupt,
  + * automatically de-asserts the irqchip level, and notifies userspace
  + * via the oadn_eventfd.  This object helps to provide one-to-many
  + * deassert-to-notify so we can share a single irq source ID per OADN.
  + */
  +struct _irqfd_oadn {
  +   struct kvm *kvm;
  +   int irq_source_id; /* IRQ source ID shared by these irqfds */
  +   struct list_head irqfds; /* list of irqfds using this object */
  +   struct kvm_irq_ack_notifier notifier; /* IRQ ACK notification */
  +   struct kref kref; /* Race-free removal */
  +   struct list_head list;
  +};
 
 
 Why do you need per-gsi irq source IDs?  irq source ids only matter
 within a gsi.  For example KVM_IRQ_LINE shares one source ID for all
 lines (with result that userspace is forced to manage the ORing of
 shared inputs itself).

Right, but locking makes it difficult to tear down a resample irqfd
without potentially racing creation of a new one, which I tried to
explain here:

http://www.spinics.net/lists/kvm/msg78460.html

This can cause a de-assert w/o ack as we briefly have multiple resample
irqfds on the same gsi, irq source id pair.  That can dead lock a vfio
device.  Using a new irq source ID ensures that the old resample irqfd
doesn't interfere with the new one.  We count on the final clear or the
gsi assertion when releasing the irq source id, so we can't share it
among other resample irqfds on other gsis with different life cycles.

Michael has suggested re-architecting the locking around some structure,
but I'm not sure it's worth it.  AFAICT we have more irq source IDs than
we could consume if resample irqfds on the same gsi share an irq source
id.  Thanks,

Alex

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH] kvm: make processes waiting on vcpu mutex killable

On Sun, Sep 16, 2012 at 11:50:30AM +0300, Michael S. Tsirkin wrote:
 vcpu mutex can be held for unlimited time so
 taking it with mutex_lock on an ioctl is wrong:
 one process could be passed a vcpu fd and
 call this ioctl on the vcpu used by another process,
 it will then be unkillable until the owner exits.
 
 Call mutex_lock_killable instead and return status.
 Note: mutex_lock_interruptible would be even nicer,
 but I am not sure all users are prepared to handle EINTR
 from these ioctls. They might misinterpret it as an error.
 
 Cleanup paths expect a vcpu that can't be used by
 any userspace so this will always succeed - catch bugs
 by calling BUG_ON.
 
 Catch callers that don't check return state by adding
 __must_check.
 
 Signed-off-by: Michael S. Tsirkin m...@redhat.com
 ---
 
 It's a minor bugfix - should we put it in 3.6?

Applied to branch 'queue' on the basis its not a regression/critical problem,
thanks.

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH] kvm: Fix kvmclock documentation to match reality

On Sun, Sep 16, 2012 at 12:55:40PM +0200, Stefan Fritsch wrote:
 Hi,
 
 I found the kvmclock documentation to be rather unhelpful. This
 patch should fix it.
 
 Cheers,
 Stefan
 
 Author: Stefan Fritsch s...@sfritsch.de
 Date:   Sun Sep 16 12:30:46 2012 +0200
 
 kvm: Fix kvmclock documentation to match reality
 
 - mention that system time needs to be added to wallclock time
 - positive tsc_shift means left shift, not right
 - mention additional 32bit right shift
 
 Signed-off-by: Stefan Fritsch s...@sfritsch.de
 
 ---
  Documentation/virtual/kvm/msr.txt |   32 
  1 file changed, 20 insertions(+), 12 deletions(-)

Applied, thanks.

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 09/18] perf stat: Move stats related code to util/stat.c

2012-09-17 Thread Arnaldo Carvalho de Melo

From: Xiao Guangrong xiaoguangr...@linux.vnet.ibm.com

Then, the code can be shared between kvm events and perf stat.

Signed-off-by: Xiao Guangrong xiaoguangr...@linux.vnet.ibm.com
[ Dong Hao haod...@linux.vnet.ibm.com: rebase it on acme's git tree ]
Signed-off-by: Dong Hao haod...@linux.vnet.ibm.com
Cc: Avi Kivity a...@redhat.com
Cc: David Ahern dsah...@gmail.com
Cc: Ingo Molnar mi...@kernel.org
Cc: kvm@vger.kernel.org
Cc: Marcelo Tosatti mtosa...@redhat.com
Cc: Runzhen Wang runz...@linux.vnet.ibm.com
Cc: Xiao Guangrong xiaoguangr...@linux.vnet.ibm.com
Link: 
http://lkml.kernel.org/r/1347870675-31495-3-git-send-email-haod...@linux.vnet.ibm.com
Signed-off-by: Arnaldo Carvalho de Melo a...@redhat.com
---
 tools/perf/Makefile   |1 +
 tools/perf/builtin-stat.c |   56 +--
 tools/perf/util/stat.c|   57 +
 tools/perf/util/stat.h|   16 
 4 files changed, 76 insertions(+), 54 deletions(-)
 create mode 100644 tools/perf/util/stat.c
 create mode 100644 tools/perf/util/stat.h

diff --git a/tools/perf/Makefile b/tools/perf/Makefile
index 209774b..5077f8e 100644
--- a/tools/perf/Makefile
+++ b/tools/perf/Makefile
@@ -406,6 +406,7 @@ LIB_OBJS += $(OUTPUT)util/target.o
 LIB_OBJS += $(OUTPUT)util/rblist.o
 LIB_OBJS += $(OUTPUT)util/intlist.o
 LIB_OBJS += $(OUTPUT)util/vdso.o
+LIB_OBJS += $(OUTPUT)util/stat.o
 
 LIB_OBJS += $(OUTPUT)ui/helpline.o
 LIB_OBJS += $(OUTPUT)ui/hist.o
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index dab347d..3c43a35 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -51,13 +51,13 @@
 #include util/evsel.h
 #include util/debug.h
 #include util/color.h
+#include util/stat.h
 #include util/header.h
 #include util/cpumap.h
 #include util/thread.h
 #include util/thread_map.h
 
 #include sys/prctl.h
-#include math.h
 #include locale.h
 
 #define DEFAULT_SEPARATOR   
@@ -199,11 +199,6 @@ static int output_fd;
 
 static volatile int done = 0;
 
-struct stats
-{
-   double n, mean, M2;
-};
-
 struct perf_stat {
struct stats  res_stats[3];
 };
@@ -220,50 +215,6 @@ static void perf_evsel__free_stat_priv(struct perf_evsel 
*evsel)
evsel-priv = NULL;
 }
 
-static void update_stats(struct stats *stats, u64 val)
-{
-   double delta;
-
-   stats-n++;
-   delta = val - stats-mean;
-   stats-mean += delta / stats-n;
-   stats-M2 += delta*(val - stats-mean);
-}
-
-static double avg_stats(struct stats *stats)
-{
-   return stats-mean;
-}
-
-/*
- * http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance
- *
- *   (\Sum n_i^2) - ((\Sum n_i)^2)/n
- * s^2 = ---
- *  n - 1
- *
- * http://en.wikipedia.org/wiki/Stddev
- *
- * The std dev of the mean is related to the std dev by:
- *
- * s
- * s_mean = ---
- *  sqrt(n)
- *
- */
-static double stddev_stats(struct stats *stats)
-{
-   double variance, variance_mean;
-
-   if (!stats-n)
-   return 0.0;
-
-   variance = stats-M2 / (stats-n - 1);
-   variance_mean = variance / stats-n;
-
-   return sqrt(variance_mean);
-}
-
 static struct stats runtime_nsecs_stats[MAX_NR_CPUS];
 static struct stats runtime_cycles_stats[MAX_NR_CPUS];
 static struct stats runtime_stalled_cycles_front_stats[MAX_NR_CPUS];
@@ -559,10 +510,7 @@ static int run_perf_stat(int argc __maybe_unused, const 
char **argv)
 
 static void print_noise_pct(double total, double avg)
 {
-   double pct = 0.0;
-
-   if (avg)
-   pct = 100.0*total/avg;
+   double pct = rel_stddev_stats(total, avg);
 
if (csv_output)
fprintf(output, %s%.2f%%, csv_sep, pct);
diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c
new file mode 100644
index 000..2374212
--- /dev/null
+++ b/tools/perf/util/stat.c
@@ -0,0 +1,57 @@
+#include math.h
+
+#include stat.h
+
+void update_stats(struct stats *stats, u64 val)
+{
+   double delta;
+
+   stats-n++;
+   delta = val - stats-mean;
+   stats-mean += delta / stats-n;
+   stats-M2 += delta*(val - stats-mean);
+}
+
+double avg_stats(struct stats *stats)
+{
+   return stats-mean;
+}
+
+/*
+ * http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance
+ *
+ *   (\Sum n_i^2) - ((\Sum n_i)^2)/n
+ * s^2 = ---
+ *  n - 1
+ *
+ * http://en.wikipedia.org/wiki/Stddev
+ *
+ * The std dev of the mean is related to the std dev by:
+ *
+ * s
+ * s_mean = ---
+ *  sqrt(n)
+ *
+ */
+double stddev_stats(struct stats *stats)
+{
+   double variance, variance_mean;
+
+   if (!stats-n)
+   return 0.0;
+
+   variance = stats-M2 / (stats-n - 1);
+   variance_mean = variance / stats-n;
+
+   return sqrt(variance_mean);
+}
+
+double rel_stddev_stats(double stddev, double avg)
+{
+   double

Re: [Qemu-devel] Block Migration and xbzrle

2012-09-17 Thread Orit Wasserman

On 09/16/2012 01:39 PM, Peter Lieven wrote:
 Hi,
 
 I remember that this was broken some time ago and currently with qemu-kvm 
 1.2.0 I am still not able to use
 block migration plus xbzrle. The migration fails if both are used together. 
 XBZRLE without block migration works.
 
 Can someone please advise what is the current expected behaviour?
XBZRLE only work on guest memory so it shouldn't be effected by block migration.
What is the error you are getting?
What command line ?

Regards,
Orit
 
 Thanks,
 Peter
 
 

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH v9 0/2] kvm: level irqfd support

On Wed, 2012-08-22 at 11:25 +0300, Michael S. Tsirkin wrote:
 On Tue, Aug 21, 2012 at 07:28:15PM -0600, Alex Williamson wrote:
  On Wed, 2012-08-22 at 03:31 +0300, Michael S. Tsirkin wrote:
   On Tue, Aug 21, 2012 at 01:28:57PM -0600, Alex Williamson wrote:
Here's the much anticipated re-write of support for level irqfds.  As
Michael suggested, I've rolled the eoi/ack notification fd into
KVM_IRQFD as a new mode.  For lack of a better name, as there seems to
be objections to associating this specifically with an EOI or an ACK,
I've name this OADN or On Ack, De-assert  Notify.

Patch 1of2 switches current KVM_IRQFDs to use their own IRQ source ID
since we're potentially stepping on KVM_USERSPACE_IRQ_SOURCE_ID.
Unfurtunately I was not able to make 2of2 use a single IRQ source ID,
the reason is it's racy.  Objects to track OADNs are made dynamically,
we look through existing ones for a match under spinlock and setup a
new one if there's no match.  On teardown, we can remove the OADN from
the list under lock, but that same lock prevents us from de-assigning
the IRQ ACK notifier or waiting for an RCU grace period.  We must make
sure that any unused GSI is de-asserted, but the above means it's
possible that another OADN has been created for this source ID/GSI
and de-asserting the GSI could lead to breakage.
   
   I do not see it. What breakage? Could you give an example please?
   
   
   I think what you are saying is last deassign must clear
   since otherwise we never will clear.
   I agree it is either that or delay deassign until ack.
   
   Can it be as simple as this (after all rcu etc dances)?
 lock irqfds
 if no oadns
 set level to 0
 unlock irqfds
   ?
  
  lock irqfds
  remove irqfd from oadn list
  if no oadns
  remove oadn
  set gsi 0
  unlock
  lock irqfds
  new oadn
  unlock irqfds
  
   EOI 
ack notify new 
  oadn
de-assert gsi
notify new oadn
   re-assert irqfd
ack notify old 
  oadn
de-assert gsi
notify old oadn
  
  synchronize_rcu
  
  kvm_unregister_irq_ack_notifier
 
  So, because the unregister is removed from the final clear and because
  we share an IRQ source ID there's a window where we can have two oadns
  registered for the same GSI.  The new one will de-assert and notify
  while the old one has an empty list to notify, but still de-asserts.  We
  can therefore de-assert w/o notify.
  
  By using a new source ID, we separate the two so users of the new oadn
  can't race the old and we can cleanly free the old source ID,
  de-asserting it.
 
 Need to think about it some more but is the problem two
 ack notifiers for the same gsi?

yes

 In that case, how about we add __kvm_unregister_irq_ack_notifier
 with no locking, and do most of the above under
 kvm-irq_lock?

Converting locks makes me nervous, but I'll give it a shot.  I don't
know how easy/possible it is though.  I know in previous iterations I
tried to make something similar to irqfd use a mutex and couldn't, but I
don't remember the details.

 With one change: it is better not to call synchronize_rcu
 under irq lock, I think we can safely move it to after
 __kvm_unregister_irq_ack_notifier.

Yep, that makes the interface pretty ugly though as we then have two
separate, but dependent steps.  Thanks,

Alex

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [Qemu-devel] [PATCH 0/9] [PULL] qemu-kvm.git uq/master queue

2012-09-17 Thread Anthony Liguori

Marcelo Tosatti mtosa...@redhat.com writes:

 The following changes since commit e0a1e32dbc41e6b2aabb436a9417dfd32177a3dc:

   Merge branch 'usb.64' of git://git.kraxel.org/qemu (2012-09-11 18:06:56 
 +0200)

 are available in the git repository at:

   git://git.kernel.org/pub/scm/virt/kvm/qemu-kvm.git uq/master


Pulled. Thanks.

Regards,

Anthony Liguori

 Jan Kiszka (7):
   memory: Flush coalesced MMIO on selected region access
   memory: Use transaction_begin/commit also for single-step operations
   memory: Fold memory_region_update_topology into 
 memory_region_transaction_commit
   memory: Flush coalesced MMIO on mapping and state changes
   VGA: Flush coalesced MMIO on related MMIO/PIO accesses
   kvm: Stop flushing coalesced MMIO on vmexit
   kvm: Rename irqchip_inject_ioctl to irq_set_ioctl

 Peter Maydell (2):
   update-linux-headers.sh: Don't hard code list of architectures
   kvm-all.c: Move init of irqchip_inject_ioctl out of kvm_irqchip_create()

  hw/cirrus_vga.c |7 +++
  hw/qxl.c|1 +
  hw/vga-isa-mm.c |1 +
  hw/vga.c|5 ++
  hw/vmware_vga.c |1 +
  kvm-all.c   |   17 +++---
  memory.c|  104 
 +++
  memory.h|   26 ++
  scripts/update-linux-headers.sh |   16 ++-
  9 files changed, 125 insertions(+), 53 deletions(-)

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [Qemu-devel] [PATCH v3 00/17] Allow changing of Hypervisor CPUIDs.

2012-09-17 Thread Eduardo Habkost

On Mon, Sep 17, 2012 at 10:00:50AM -0400, Don Slutz wrote:
Resend with new id so the backup files are not included.

Also known as Paravirtualization CPUIDs.

This is primarily done so that the guest will think it is running
under vmware when hypervisor-vendor=vmware is specified as a
property of a cpu.

This depends on:

http://lists.gnu.org/archive/html/qemu-devel/2012-09/msg01400.html

As far as I know it is #4. It depends on (1) and (2) and (3).

Correct.

I have removed v2 and added this version to my cpu-queue[1] branch.

[1] https://github.com/ehabkost/qemu/commits/cpu-queue
My branch is now based on Andreas's qom-cpu branch from
https://github.com/afaerber/qemu-cpu/commits/qom-cpu

This change is based on:

Microsoft Hypervisor CPUID Leaves:

http://msdn.microsoft.com/en-us/library/windows/hardware/ff542428%28v=vs.85%29.aspx

Linux kernel change starts with:
http://fixunix.com/kernel/538707-use-cpuid-communicate-hypervisor.html
Also:
http://lkml.indiana.edu/hypermail/linux/kernel/1205.0/00100.html

VMware documention on CPUIDs (Mechanisms to determine if software is
running in a VMware virtual machine):

http://kb.vmware.com/selfservice/microsites/search.do?language=en_UScmd=displayKCexternalId=1009458

Changes from v1 to v2:

1) Added 1/4 from
http://lists.gnu.org/archive/html/qemu-devel/2012-08/msg05153.html

Because Fred is changing jobs and so will not be pushing to get
this in. It needed to be rebased, And I needed it to complete the
testing of this change.

2) Added 2/4 because of the re-work I needed a way to clear all KVM bits,

3) The rework of v1. Make it fit into the object model re-work of cpu.c for
x86.

4) Added 3/4 -- The split out of the code that is not needed for accel=kvm.

Changes from v2 to v3:

Marcelo Tosatti:
Its one big patch, better split in logically correlated patches
(with better changelog). This would help reviewers.

So split 3 and 4 into 3 to 17. More info in change log.
No code change.

target-i386/cpu.c | 261
-
target-i386/cpu.h | 21 +
target-i386/kvm.c | 33 ++--
3 files changed, 304 insertions(+), 11 deletions(-)

--
Eduardo
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html

[KVM] Guest Debugging Facilities in KVM

2012-09-17 Thread Dean Pucsek

Hello,

For my Masters thesis I am investigating the usage of Intel VT-x and branch 
tracing in the domain of malware analysis.  Essentially what I'm aiming to do 
is trace the execution of a guest VM and then pass that trace on to some other 
tools.  I've been playing KVM for a couple weeks now but from comments such as 
(in arch/x86/kvm/vmx.c): 

   /*
* Forward all other exceptions that are valid in real mode.
* FIXME: Breaks guest debugging in real mode, needs to be fixed with
*the required debugging infrastructure rework.
*/

And (from an email sent to the list in July 2008): 

Note that guest debugging in real mode is broken now. This has to be
fixed by the scheduled debugging infrastructure rework (will be done
once base patches for QEMU have been accepted).

it is unclear to me how much support there is for guest debugging in KVM 
currently (I wasn't able to find any recent documentation on it) and what the 
debugging infrastructure referred to by these comments is.  I am interested in 
becoming involved with the KVM project in this respect however some guidance 
and direction on the guest debugging facilities would be greatly appreciated.

Cheers,


Dean Pucsek

Masters Student
Department of Computer Science
University of Victoria, Canada--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCHSET] kthread_worker: reimplement flush_kthread_work() to allow freeing during execution

2012-09-17 Thread Tejun Heo

On Fri, Sep 14, 2012 at 03:50:40PM -0700, Colin Cross wrote:
 This patch set fixes a reproducible crash I'm seeing on a 3.4.10
 kernel.  flush_kthread_worker (which is different from
 flush_kthread_work) is initializing a kthread_work and a completion on
 the stack, then queuing it and calling wait_for_completion.  Once the
 completion is signaled, flush_kthread_worker exits and the stack
 region used by the kthread_work may be immediately reused by another
 object on the stack, but kthread_worker_fn continues accessing its
 work pointer:
 work-func(work); - calls complete,
 effectively frees work
 smp_wmb();  /* wmb worker-b0 paired with flush-b1 */
 work-done_seq = work-queue_seq;   - overwrites a
 new stack object
 smp_mb();   /* mb worker-b1 paired with flush-b0 */
 if (atomic_read(work-flushing))
 wake_up_all(work-done);  - or crashes here
 
 These patches fix the problem by not accessing work after work-func
 is called, and should be backported to stable.  They apply cleanly to
 3.4.10.  Upstream commits are 9a2e03d8ed518a61154f18d83d6466628e519f94
 and 46f3d976213452350f9d10b0c2780c2681f7075b.

Yeah, you're right.  I wonder why this didn't come up before.  Greg,
can you please pick up these two commits?

Thanks.

-- 
tejun
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [Qemu-devel] Block Migration Assertion in qemu-kvm 1.2.0

2012-09-17 Thread Peter Lieven


On 09/17/12 10:41, Kevin Wolf wrote:

Am 16.09.2012 12:13, schrieb Peter Lieven:

Hi,

when trying to block migrate a VM from one node to another, the source
VM crashed with the following assertion:
block.c:3829: bdrv_set_in_use: Assertion `bs-in_use != in_use' failed.

Is this sth already addresses/known?

Not that I'm aware of, at least.

Block migration doesn't seem to check whether the device is already in
use, maybe this is the problem. Not sure why it would be in use, though,
and in my quick test it didn't crash.

So we need some more information: What's you command line, did you do
anything specific in the monitor with block devices, what does the
stacktrace look like, etc.?
i was also able to reproduce a flush_blks: Assertion 
`block_mig_state.read_done = 0' failed. by

cancelling a block migration and restarting it afterwards.
however, how can I grep a stack trace after an assert?

thanks,
peter


Kevin


--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCHSET] kthread_worker: reimplement flush_kthread_work() to allow freeing during execution

2012-09-17 Thread Greg KH

On Mon, Sep 17, 2012 at 12:40:16PM -0700, Tejun Heo wrote:
 On Fri, Sep 14, 2012 at 03:50:40PM -0700, Colin Cross wrote:
  This patch set fixes a reproducible crash I'm seeing on a 3.4.10
  kernel.  flush_kthread_worker (which is different from
  flush_kthread_work) is initializing a kthread_work and a completion on
  the stack, then queuing it and calling wait_for_completion.  Once the
  completion is signaled, flush_kthread_worker exits and the stack
  region used by the kthread_work may be immediately reused by another
  object on the stack, but kthread_worker_fn continues accessing its
  work pointer:
  work-func(work); - calls complete,
  effectively frees work
  smp_wmb();  /* wmb worker-b0 paired with flush-b1 */
  work-done_seq = work-queue_seq;   - overwrites a
  new stack object
  smp_mb();   /* mb worker-b1 paired with flush-b0 */
  if (atomic_read(work-flushing))
  wake_up_all(work-done);  - or crashes here
  
  These patches fix the problem by not accessing work after work-func
  is called, and should be backported to stable.  They apply cleanly to
  3.4.10.  Upstream commits are 9a2e03d8ed518a61154f18d83d6466628e519f94
  and 46f3d976213452350f9d10b0c2780c2681f7075b.
 
 Yeah, you're right.  I wonder why this didn't come up before.  Greg,
 can you please pick up these two commits?

Ok, will do, thanks for letting me know.

greg k-h
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH] kvm tools: support build-time checks

Support using build-time check tools when building lkvm. This allows
using tools such as smatch with the same syntax used with kernel
code.

For example, to build with smatch checks, first make sure you have
smatch installed, then run:

make CHECK=smatch -p=kernel C=1

Signed-off-by: Sasha Levin levinsasha...@gmail.com
---
 tools/kvm/Makefile | 16 
 1 file changed, 16 insertions(+)

diff --git a/tools/kvm/Makefile b/tools/kvm/Makefile
index d4b5eb3..e0d07dc 100644
--- a/tools/kvm/Makefile
+++ b/tools/kvm/Makefile
@@ -322,18 +322,34 @@ $(OBJS):
 # This rule relaxes the -Werror on libfdt, since for now it still has
 # a bunch of warnings. :(
 ../../scripts/dtc/libfdt/%.o: ../../scripts/dtc/libfdt/%.c
+ifeq ($(C),1)
+   $(E)   CHECK$@
+   $(Q) $(CHECK) -c $(CFLAGS_EASYGOING) $ -o $@
+endif
$(E)   CC   $@
$(Q) $(CC) -c $(CFLAGS_EASYGOING) $ -o $@
 
 util/rbtree.static.o util/rbtree.o: ../../lib/rbtree.c
+ifeq ($(C),1)
+   $(E)   CHECK$@
+   $(Q) $(CHECK) -c $(CFLAGS) $ -o $@
+endif
$(E)   CC   $@
$(Q) $(CC) -c $(CFLAGS) $ -o $@
 
 %.static.o: %.c
+ifeq ($(C),1)
+   $(E)   CHECK$@
+   $(Q) $(CHECK) -c $(CFLAGS) $(CFLAGS_STATOPT) $ -o $@
+endif
$(E)   CC   $@
$(Q) $(CC) -c $(CFLAGS) $(CFLAGS_STATOPT)  $ -o $@
 
 %.o: %.c
+ifeq ($(C),1)
+   $(E)   CHECK$@
+   $(Q) $(CHECK) -c $(CFLAGS) $(CFLAGS_DYNOPT) $ -o $@
+endif
$(E)   CC   $@
$(Q) $(CC) -c $(CFLAGS) $(CFLAGS_DYNOPT) $ -o $@
 
-- 
1.7.12

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: NIC emulation with built-in rate limiting?

2012-09-17 Thread Rick Jones

So, while the question includes the stability of how things get 
plumbed for a VM and whether moving some of that into the NIC emulation 
might help :)  I've gone ahead and re-run the experiment with bare-iron. 
 This time just for kicks I used 50 Mbit/s throttle inbound and 
outbound.  The results can be seen in:


ftp://ftp.netperf.org/50_mbits.tgz

Since this is now bare-iron, inbound is ingress and outbound is egress. 
 That is reversed from what it would be for a VM situation where VM 
outbound traverses the ingress filter and VM inbound traverses the 
egress qdisc.


Both systems were running Ubuntu 12.04.01 3.2.0-26 kernels, there was 
plenty of CPU horsepower (2x E5-2680s in this case) and the network 
between them was 10GbE using their 530FLB LOMs (BCM 57810S) connected 
via a ProCurve 6120 10GbE switch.  That simply happened to be the most 
convenient bare-iron hardware I had on hand as one of the cobbler's 
children.  There was no X running on the systems, the only thing of note 
running on them was netperf.


So, is the comparative instability between inbound and outbound 
fundamentally inherent in using ingress policing, or more a matter of 
Silly Rick, you should be using these settings instead?


If the former, is it then worthwhile to try to have NIC emulation only 
pull from the VM at the emulated rate, to keep the queues in the VM 
where it can react to them more directly?  And are there any NIC 
emulations doing that already (as virtio does not seem to at present)?


happy benchmarking,

rick jones
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

RE: [PATCH v3] kvm/fpu: Enable fully eager restore kvm FPU

2012-09-17 Thread Hao, Xudong

 -Original Message-
 From: Marcelo Tosatti [mailto:mtosa...@redhat.com]
 Sent: Monday, September 17, 2012 9:31 PM
 To: Hao, Xudong
 Cc: Avi Kivity; kvm@vger.kernel.org; Zhang, Xiantao
 Subject: Re: [PATCH v3] kvm/fpu: Enable fully eager restore kvm FPU

 On Mon, Sep 17, 2012 at 02:07:43AM +, Hao, Xudong wrote:
   -Original Message-
   From: Avi Kivity [mailto:a...@redhat.com]
   Sent: Friday, September 14, 2012 12:40 AM
   To: Marcelo Tosatti
   Cc: Hao, Xudong; kvm@vger.kernel.org; Zhang, Xiantao
   Subject: Re: [PATCH v3] kvm/fpu: Enable fully eager restore kvm FPU

   On 09/13/2012 07:29 PM, Marcelo Tosatti wrote:
On Thu, Sep 13, 2012 at 01:26:36PM -0300, Marcelo Tosatti wrote:
On Wed, Sep 12, 2012 at 04:10:24PM +0800, Xudong Hao wrote:
 Enable KVM FPU fully eager restore, if there is other FPU state which
 isn't
 tracked by CR0.TS bit.

 v3 changes from v2:
 - Make fpu active explicitly while guest xsave is enabling and 
 non-lazy
   xstate bit
 exist.

How about a guest_xcr0_can_lazy_saverestore bool to control this?
It only needs to be updated when guest xcr0 is updated.

That seems cleaner. Avi?

Reasoning below.

 v2 changes from v1:
 - Expand KVM_XSTATE_LAZY to 64 bits before negating it.

 Signed-off-by: Xudong Hao xudong@intel.com
 ---
  arch/x86/include/asm/kvm.h |4 
  arch/x86/kvm/vmx.c |2 ++
  arch/x86/kvm/x86.c |   15 ++-
  3 files changed, 20 insertions(+), 1 deletions(-)

 diff --git a/arch/x86/include/asm/kvm.h b/arch/x86/include/asm/kvm.h
 index 521bf25..4c27056 100644
 --- a/arch/x86/include/asm/kvm.h
 +++ b/arch/x86/include/asm/kvm.h
 @@ -8,6 +8,8 @@

  #include linux/types.h
  #include linux/ioctl.h
 +#include asm/user.h
 +#include asm/xsave.h

  /* Select x86 specific features in linux/kvm.h */
  #define __KVM_HAVE_PIT
 @@ -30,6 +32,8 @@
  /* Architectural interrupt line count. */
  #define KVM_NR_INTERRUPTS 256

 +#define KVM_XSTATE_LAZY (XSTATE_FP | XSTATE_SSE |
 XSTATE_YMM)
 +
  struct kvm_memory_alias {
  __u32 slot;  /* this has a different namespace than memory
 slots */
  __u32 flags;
 diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
 index 248c2b4..853e875 100644
 --- a/arch/x86/kvm/vmx.c
 +++ b/arch/x86/kvm/vmx.c
 @@ -3028,6 +3028,8 @@ static void vmx_set_cr0(struct kvm_vcpu
 *vcpu,
   unsigned long cr0)

  if (!vcpu-fpu_active)
  hw_cr0 |= X86_CR0_TS | X86_CR0_MP;
 +else
 +hw_cr0 = ~(X86_CR0_TS | X86_CR0_MP);

  vmcs_writel(CR0_READ_SHADOW, cr0);
  vmcs_writel(GUEST_CR0, hw_cr0);
 diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
 index 20f2266..183cf60 100644
 --- a/arch/x86/kvm/x86.c
 +++ b/arch/x86/kvm/x86.c
 @@ -560,6 +560,8 @@ int __kvm_set_xcr(struct kvm_vcpu *vcpu,
 u32
   index, u64 xcr)
  return 1;
  if (xcr0  ~host_xcr0)
  return 1;
 +if (xcr0  ~((u64)KVM_XSTATE_LAZY))
 +vcpu-fpu_active = 1;

This is confusing. The variable allows to decrease the number of places
the decision is made.

   Better to have a helper function (lazy_fpu_allowed(), for example).
   Variables raise the question of whether they are maintained correctly.

  I realized to modifying the fpu_active variable is incorrect, it must update
 exception bitmap.
  To avoid the cr0 and xcrs setting order for live migrate case, how about
 calling fpu_activate() in kvm_set_xcr()? I can add code comments in this
 function calling.

 The objective of the change is to disable lazy fpu loading (that is,
 host fpu loaded in guest and vice-versa), when some bit except the
 initial tree bits set in guest XCR0 (initial tree being XSTATE_FP|XSTATE_SSE|
 XSTATE_YMM). Yes?

Yes, it's just the object.

 If i get that right, then the suggestion seems to be:

 static bool lazy_fpu_allowed()
 {
   return (vcpu-arch.xcr0  ~((u64)KVM_XSTATE_LAZY));
 }

That may be:

static bool lazy_fpu_allowed()
{
return !(vcpu-arch.xcr0  ~((u64)KVM_XSTATE_LAZY));
}

 On guest entry:
 if (!lazy_fpu_allowed(vcpu))
 kvm_x86_ops-fpu_activate(vcpu);

Yes, we can add it into guest entry: kvm_set_xcr(). Avi, other comments?

 if (vcpu-fpu_active)
 kvm_load_guest_fpu(vcpu);

 Does that make sense?

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [RFC][PATCH] Improving directed yield scalability for PLE handler

2012-09-17 Thread Andrew Theurer

On Sun, 2012-09-16 at 11:55 +0300, Avi Kivity wrote:
 On 09/14/2012 12:30 AM, Andrew Theurer wrote:
 
  The concern I have is that even though we have gone through changes to
  help reduce the candidate vcpus we yield to, we still have a very poor
  idea of which vcpu really needs to run.  The result is high cpu usage in
  the get_pid_task and still some contention in the double runqueue lock.
  To make this scalable, we either need to significantly reduce the
  occurrence of the lock-holder preemption, or do a much better job of
  knowing which vcpu needs to run (and not unnecessarily yielding to vcpus
  which do not need to run).
  
  On reducing the occurrence:  The worst case for lock-holder preemption
  is having vcpus of same VM on the same runqueue.  This guarantees the
  situation of 1 vcpu running while another [of the same VM] is not.  To
  prove the point, I ran the same test, but with vcpus restricted to a
  range of host cpus, such that any single VM's vcpus can never be on the
  same runqueue.  In this case, all 10 VMs' vcpu-0's are on host cpus 0-4,
  vcpu-1's are on host cpus 5-9, and so on.  Here is the result:
  
  kvm_cpu_spin, and all
  yield_to changes, plus
  restricted vcpu placement:  8823 +/- 3.20%   much, much better
  
  On picking a better vcpu to yield to:  I really hesitate to rely on
  paravirt hint [telling us which vcpu is holding a lock], but I am not
  sure how else to reduce the candidate vcpus to yield to.  I suspect we
  are yielding to way more vcpus than are prempted lock-holders, and that
  IMO is just work accomplishing nothing.  Trying to think of way to
  further reduce candidate vcpus
 
 I wouldn't say that yielding to the wrong vcpu accomplishes nothing.
 That other vcpu gets work done (unless it is in pause loop itself) and
 the yielding vcpu gets put to sleep for a while, so it doesn't spend
 cycles spinning.  While we haven't fixed the problem at least the guest
 is accomplishing work, and meanwhile the real lock holder may get
 naturally scheduled and clear the lock.

OK, yes, if the other thread gets useful work done, then it is not
wasteful.  I was thinking of the worst case scenario, where any other
vcpu would likely spin as well, and the host side cpu-time for switching
vcpu threads was not all that productive.  Well, I suppose it does help
eliminate potential lock holding vcpus; it just seems to be not that
efficient or fast enough.

 The main problem with this theory is that the experiments don't seem to
 bear it out.

Granted, my test case is quite brutal.  It's nothing but over-committed
VMs which always have some spin lock activity.  However, we really
should try to fix the worst case scenario.

   So maybe one of the assumptions is wrong - the yielding
 vcpu gets scheduled early.  That could be the case if the two vcpus are
 on different runqueues - you could be changing the relative priority of
 vcpus on the target runqueue, but still remain on top yourself.  Is this
 possible with the current code?
 
 Maybe we should prefer vcpus on the same runqueue as yield_to targets,
 and only fall back to remote vcpus when we see it didn't help.
 
 Let's examine a few cases:
 
 1. spinner on cpu 0, lock holder on cpu 0
 
 win!
 
 2. spinner on cpu 0, random vcpu(s) (or normal processes) on cpu 0
 
 Spinner gets put to sleep, random vcpus get to work, low lock contention
 (no double_rq_lock), by the time spinner gets scheduled we might have won
 
 3. spinner on cpu 0, another spinner on cpu 0
 
 Worst case, we'll just spin some more.  Need to detect this case and
 migrate something in.

Well, we can certainly experiment and see what we get.

IMO, the key to getting this working really well on the large VMs is
finding the lock-holding cpu -quickly-.  What I think is happening is
that we go through a relatively long process to get to that one right
vcpu.  I guess I need to find a faster way to get there.

 4. spinner on cpu 0, alone
 
 Similar
 
 
 It seems we need to tie in to the load balancer.
 
 Would changing the priority of the task while it is spinning help the
 load balancer?

Not sure.

-Andrew






--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH v10 0/2] kvm: level irqfd support

Updated with suggestions from Michael and Avi, the OADN option is
now a resample option.  Re-working locking went surprisingly
well, lockdep clean, and now allows us to use a single irq source
ID for all resample irqfds.  I hope we're close.  Thanks,

Alex

---

Alex Williamson (2):
  kvm: Add resampling irqfds for level triggered interrupts
  kvm: Provide pre-locked setup to irq ack notifier


 Documentation/virtual/kvm/api.txt |   13 +++
 arch/x86/kvm/x86.c|4 +
 include/linux/kvm.h   |   12 ++-
 include/linux/kvm_host.h  |8 +-
 virt/kvm/eventfd.c|  175 -
 virt/kvm/irq_comm.c   |   24 +
 virt/kvm/kvm_main.c   |2 
 7 files changed, 230 insertions(+), 8 deletions(-)
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH v10 1/2] kvm: Provide pre-locked setup to irq ack notifier

This enables better integration into irqfd setup where we can adjust
our lock ordering to hold irq_lock, making these callable and avoiding
irq source ID races.

Signed-off-by: Alex Williamson alex.william...@redhat.com
---

 include/linux/kvm_host.h |4 
 virt/kvm/irq_comm.c  |   18 --
 2 files changed, 20 insertions(+), 2 deletions(-)

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index b70b48b..84f6950 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -628,8 +628,12 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 
irq, int level);
 int kvm_set_msi(struct kvm_kernel_irq_routing_entry *irq_entry, struct kvm 
*kvm,
int irq_source_id, int level);
 void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin);
+void __kvm_register_irq_ack_notifier(struct kvm *kvm,
+struct kvm_irq_ack_notifier *kian);
 void kvm_register_irq_ack_notifier(struct kvm *kvm,
   struct kvm_irq_ack_notifier *kian);
+void __kvm_unregister_irq_ack_notifier(struct kvm *kvm,
+  struct kvm_irq_ack_notifier *kian);
 void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
   struct kvm_irq_ack_notifier *kian);
 int kvm_request_irq_source_id(struct kvm *kvm);
diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
index 83402d7..dd0cbf6 100644
--- a/virt/kvm/irq_comm.c
+++ b/virt/kvm/irq_comm.c
@@ -191,19 +191,33 @@ void kvm_notify_acked_irq(struct kvm *kvm, unsigned 
irqchip, unsigned pin)
rcu_read_unlock();
 }
 
+/* hold kvm-irq_lock */
+void __kvm_register_irq_ack_notifier(struct kvm *kvm,
+struct kvm_irq_ack_notifier *kian)
+{
+   hlist_add_head_rcu(kian-link, kvm-irq_ack_notifier_list);
+}
+
 void kvm_register_irq_ack_notifier(struct kvm *kvm,
   struct kvm_irq_ack_notifier *kian)
 {
mutex_lock(kvm-irq_lock);
-   hlist_add_head_rcu(kian-link, kvm-irq_ack_notifier_list);
+   __kvm_register_irq_ack_notifier(kvm, kian);
mutex_unlock(kvm-irq_lock);
 }
 
+/* hold kvm-irq_lock and wait for rcu grace period */
+void __kvm_unregister_irq_ack_notifier(struct kvm *kvm,
+  struct kvm_irq_ack_notifier *kian)
+{
+   hlist_del_init_rcu(kian-link);
+}
+
 void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
struct kvm_irq_ack_notifier *kian)
 {
mutex_lock(kvm-irq_lock);
-   hlist_del_init_rcu(kian-link);
+   __kvm_unregister_irq_ack_notifier(kvm, kian);
mutex_unlock(kvm-irq_lock);
synchronize_rcu();
 }

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH v10 2/2] kvm: Add resampling irqfds for level triggered interrupts