date:20100621

[COMMIT master] device-assignment: Cleanup on exit

2010-06-21 Thread Avi Kivity

From: Alex Williamson alex.william...@redhat.com

close() the resource fd when we're done with it.  Unregister and munmap the
anonymous memory for the MSIX table.

Signed-off-by: Alex Williamson alex.william...@redhat.com
Signed-off-by: Marcelo Tosatti mtosa...@redhat.com

diff --git a/hw/device-assignment.c b/hw/device-assignment.c
index 7e53a95..ba02157 100644
--- a/hw/device-assignment.c
+++ b/hw/device-assignment.c
@@ -60,6 +60,8 @@
 
 static void assigned_dev_load_option_rom(AssignedDevice *dev);
 
+static void assigned_dev_unregister_msix_mmio(AssignedDevice *dev);
+
 static uint32_t guest_to_host_ioport(AssignedDevRegion *region, uint32_t addr)
 {
 return region-u.r_baseport + (addr - region-e_physbase);
@@ -783,10 +785,14 @@ static void free_assigned_device(AssignedDevice *dev)
 fprintf(stderr,
Failed to unmap assigned device region: %s\n,
strerror(errno));
+close(pci_region-resource_fd);
 }
}
 }
 
+if (dev-cap.available  ASSIGNED_DEVICE_CAP_MSIX)
+assigned_dev_unregister_msix_mmio(dev);
+
 if (dev-real_device.config_fd) {
 close(dev-real_device.config_fd);
 dev-real_device.config_fd = 0;
@@ -1370,6 +1376,21 @@ static int 
assigned_dev_register_msix_mmio(AssignedDevice *dev)
 return 0;
 }
 
+static void assigned_dev_unregister_msix_mmio(AssignedDevice *dev)
+{
+if (!dev-msix_table_page)
+return;
+
+cpu_unregister_io_memory(dev-mmio_index);
+dev-mmio_index = 0;
+
+if (munmap(dev-msix_table_page, 0x1000) == -1) {
+fprintf(stderr, error unmapping msix_table_page! %s\n,
+strerror(errno));
+}
+dev-msix_table_page = NULL;
+}
+
 static int assigned_initfn(struct PCIDevice *pci_dev)
 {
 AssignedDevice *dev = DO_UPCAST(AssignedDevice, dev, pci_dev);
--
To unsubscribe from this list: send the line unsubscribe kvm-commits in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[COMMIT master] test: Add XSAVE unit test

2010-06-21 Thread Avi Kivity

From: Sheng Yang sh...@linux.intel.com

Based on IDT test framework.

Signed-off-by: Sheng Yang sh...@linux.intel.com
Signed-off-by: Marcelo Tosatti mtosa...@redhat.com

diff --git a/kvm/test/config-x86-common.mak b/kvm/test/config-x86-common.mak
index 800b635..0e1ccce 100644
--- a/kvm/test/config-x86-common.mak
+++ b/kvm/test/config-x86-common.mak
@@ -61,6 +61,8 @@ $(TEST_DIR)/msr.flat: $(cstart.o) $(TEST_DIR)/msr.o
 
 $(TEST_DIR)/idt_test.flat: $(cstart.o) $(TEST_DIR)/idt.o $(TEST_DIR)/idt_test.o
 
+$(TEST_DIR)/xsave.flat: $(cstart.o) $(TEST_DIR)/idt.o $(TEST_DIR)/xsave.o
+
 arch_clean:
$(RM) $(TEST_DIR)/*.o $(TEST_DIR)/*.flat \
$(TEST_DIR)/.*.d $(TEST_DIR)/lib/.*.d $(TEST_DIR)/lib/*.o
diff --git a/kvm/test/config-x86_64.mak b/kvm/test/config-x86_64.mak
index f9cd121..2da2906 100644
--- a/kvm/test/config-x86_64.mak
+++ b/kvm/test/config-x86_64.mak
@@ -5,6 +5,7 @@ ldarch = elf64-x86-64
 CFLAGS += -D__x86_64__
 
 tests = $(TEST_DIR)/access.flat $(TEST_DIR)/apic.flat \
- $(TEST_DIR)/emulator.flat $(TEST_DIR)/idt_test.flat
+ $(TEST_DIR)/emulator.flat $(TEST_DIR)/idt_test.flat \
+ $(TEST_DIR)/xsave.flat
 
 include config-x86-common.mak
diff --git a/kvm/test/x86/xsave.c b/kvm/test/x86/xsave.c
new file mode 100644
index 000..a22b44c
--- /dev/null
+++ b/kvm/test/x86/xsave.c
@@ -0,0 +1,262 @@
+#include libcflat.h
+#include idt.h
+
+#ifdef __x86_64__
+#define uint64_t unsigned long
+#else
+#define uint64_t unsigned long long
+#endif
+
+static inline void __cpuid(unsigned int *eax, unsigned int *ebx,
+unsigned int *ecx, unsigned int *edx)
+{
+/* ecx is often an input as well as an output. */
+asm volatile(cpuid
+: =a (*eax),
+=b (*ebx),
+=c (*ecx),
+=d (*edx)
+: 0 (*eax), 2 (*ecx));
+}
+
+/*
+ * Generic CPUID function
+ * clear %ecx since some cpus (Cyrix MII) do not set or clear %ecx
+ * resulting in stale register contents being returned.
+ */
+void cpuid(unsigned int op,
+unsigned int *eax, unsigned int *ebx,
+unsigned int *ecx, unsigned int *edx)
+{
+*eax = op;
+*ecx = 0;
+__cpuid(eax, ebx, ecx, edx);
+}
+
+/* Some CPUID calls want 'count' to be placed in ecx */
+void cpuid_count(unsigned int op, int count,
+unsigned int *eax, unsigned int *ebx,
+unsigned int *ecx, unsigned int *edx)
+{
+*eax = op;
+*ecx = count;
+__cpuid(eax, ebx, ecx, edx);
+}
+
+int xgetbv_checking(u32 index, u64 *result)
+{
+u32 eax, edx;
+
+asm volatile(ASM_TRY(1f)
+.byte 0x0f,0x01,0xd0\n\t /* xgetbv */
+1:
+: =a (eax), =d (edx)
+: c (index));
+*result = eax + ((u64)edx  32);
+return exception_vector();
+}
+
+int xsetbv_checking(u32 index, u64 value)
+{
+u32 eax = value;
+u32 edx = value  32;
+
+asm volatile(ASM_TRY(1f)
+.byte 0x0f,0x01,0xd1\n\t /* xsetbv */
+1:
+: : a (eax), d (edx), c (index));
+return exception_vector();
+}
+
+unsigned long read_cr4(void)
+{
+unsigned long val;
+asm volatile(mov %%cr4,%0 : =r (val));
+return val;
+}
+
+int write_cr4_checking(unsigned long val)
+{
+asm volatile(ASM_TRY(1f)
+mov %0,%%cr4\n\t
+1:: : r (val));
+return exception_vector();
+}
+
+#define CPUID_1_ECX_XSAVE  (1  26)
+#define CPUID_1_ECX_OSXSAVE(1  27)
+int check_cpuid_1_ecx(unsigned int bit)
+{
+unsigned int eax, ebx, ecx, edx;
+cpuid(1, eax, ebx, ecx, edx);
+if (ecx  bit)
+return 1;
+return 0;
+}
+
+uint64_t get_supported_xcr0(void)
+{
+unsigned int eax, ebx, ecx, edx;
+cpuid_count(0xd, 0, eax, ebx, ecx, edx);
+printf(eax %x, ebx %x, ecx %x, edx %x\n,
+eax, ebx, ecx, edx);
+return eax + ((u64)edx  32);
+}
+
+#define X86_CR4_OSXSAVE0x0004
+#define XCR_XFEATURE_ENABLED_MASK   0x
+#define XCR_XFEATURE_ILLEGAL_MASK   0x0010
+
+#define XSTATE_FP   0x1
+#define XSTATE_SSE  0x2
+#define XSTATE_YMM  0x4
+
+static int total_tests, fail_tests;
+
+void pass_if(int condition)
+{
+total_tests ++;
+if (condition)
+printf(Pass!\n);
+else {
+printf(Fail!\n);
+fail_tests ++;
+}
+}
+
+void test_xsave(void)
+{
+unsigned long cr4;
+uint64_t supported_xcr0;
+uint64_t test_bits;
+u64 xcr0;
+int r;
+
+printf(Legal instruction testing:\n);
+supported_xcr0 = get_supported_xcr0();
+printf(Supported XCR0 bits: 0x%x\n, supported_xcr0);
+
+printf(Check minimal XSAVE required bits: );
+test_bits = XSTATE_FP | XSTATE_SSE;
+pass_if((supported_xcr0  test_bits) == test_bits);
+
+printf(Set CR4 OSXSAVE: );
+cr4 = read_cr4();
+r = write_cr4_checking(cr4 | X86_CR4_OSXSAVE);
+pass_if(r == 0);
+
+printf(Check CPUID.1.ECX.OSXSAVE - expect 1: );
+pass_if(check_cpuid_1_ecx(CPUID_1_ECX_OSXSAVE));
+
+printf(Legal

[COMMIT master] qemu: kvm: Enable XSAVE live migration support

2010-06-21 Thread Avi Kivity

From: Sheng Yang sh...@linux.intel.com

Signed-off-by: Sheng Yang sh...@linux.intel.com
Signed-off-by: Marcelo Tosatti mtosa...@redhat.com

diff --git a/qemu-kvm-x86.c b/qemu-kvm-x86.c
index 3c33e64..4f0b1d0 100644
--- a/qemu-kvm-x86.c
+++ b/qemu-kvm-x86.c
@@ -772,10 +772,20 @@ static void get_seg(SegmentCache *lhs, const struct 
kvm_segment *rhs)
| (rhs-avl * DESC_AVL_MASK);
 }
 
+#define XSAVE_CWD_RIP 2
+#define XSAVE_CWD_RDP 4
+#define XSAVE_MXCSR   6
+#define XSAVE_ST_SPACE8
+#define XSAVE_XMM_SPACE   40
+#define XSAVE_XSTATE_BV   128
+#define XSAVE_YMMH_SPACE  144
+
 void kvm_arch_load_regs(CPUState *env, int level)
 {
 struct kvm_regs regs;
 struct kvm_fpu fpu;
+struct kvm_xsave* xsave;
+struct kvm_xcrs xcrs;
 struct kvm_sregs sregs;
 struct kvm_msr_entry msrs[100];
 int rc, n, i;
@@ -806,16 +816,47 @@ void kvm_arch_load_regs(CPUState *env, int level)
 
 kvm_set_regs(env, regs);
 
-memset(fpu, 0, sizeof fpu);
-fpu.fsw = env-fpus  ~(7  11);
-fpu.fsw |= (env-fpstt  7)  11;
-fpu.fcw = env-fpuc;
-for (i = 0; i  8; ++i)
-   fpu.ftwx |= (!env-fptags[i])  i;
-memcpy(fpu.fpr, env-fpregs, sizeof env-fpregs);
-memcpy(fpu.xmm, env-xmm_regs, sizeof env-xmm_regs);
-fpu.mxcsr = env-mxcsr;
-kvm_set_fpu(env, fpu);
+if (kvm_check_extension(kvm_state, KVM_CAP_XSAVE)) {
+uint16_t cwd, swd, twd, fop;
+
+xsave = qemu_memalign(4096, sizeof(struct kvm_xsave));
+memset(xsave, 0, sizeof(struct kvm_xsave));
+cwd = swd = twd = fop = 0;
+swd = env-fpus  ~(7  11);
+swd |= (env-fpstt  7)  11;
+cwd = env-fpuc;
+for (i = 0; i  8; ++i)
+twd |= (!env-fptags[i])  i;
+xsave-region[0] = (uint32_t)(swd  16) + cwd;
+xsave-region[1] = (uint32_t)(fop  16) + twd;
+memcpy(xsave-region[XSAVE_ST_SPACE], env-fpregs,
+sizeof env-fpregs);
+memcpy(xsave-region[XSAVE_XMM_SPACE], env-xmm_regs,
+sizeof env-xmm_regs);
+xsave-region[XSAVE_MXCSR] = env-mxcsr;
+*(uint64_t *)xsave-region[XSAVE_XSTATE_BV] = env-xstate_bv;
+memcpy(xsave-region[XSAVE_YMMH_SPACE], env-ymmh_regs,
+sizeof env-ymmh_regs);
+kvm_set_xsave(env, xsave);
+if (kvm_check_extension(kvm_state, KVM_CAP_XCRS)) {
+xcrs.nr_xcrs = 1;
+xcrs.flags = 0;
+xcrs.xcrs[0].xcr = 0;
+xcrs.xcrs[0].value = env-xcr0;
+kvm_set_xcrs(env, xcrs);
+}
+} else {
+memset(fpu, 0, sizeof fpu);
+fpu.fsw = env-fpus  ~(7  11);
+fpu.fsw |= (env-fpstt  7)  11;
+fpu.fcw = env-fpuc;
+for (i = 0; i  8; ++i)
+fpu.ftwx |= (!env-fptags[i])  i;
+memcpy(fpu.fpr, env-fpregs, sizeof env-fpregs);
+memcpy(fpu.xmm, env-xmm_regs, sizeof env-xmm_regs);
+fpu.mxcsr = env-mxcsr;
+kvm_set_fpu(env, fpu);
+}
 
 memset(sregs.interrupt_bitmap, 0, sizeof(sregs.interrupt_bitmap));
 if (env-interrupt_injected = 0) {
@@ -934,6 +975,8 @@ void kvm_arch_save_regs(CPUState *env)
 {
 struct kvm_regs regs;
 struct kvm_fpu fpu;
+struct kvm_xsave* xsave;
+struct kvm_xcrs xcrs;
 struct kvm_sregs sregs;
 struct kvm_msr_entry msrs[100];
 uint32_t hflags;
@@ -965,15 +1008,43 @@ void kvm_arch_save_regs(CPUState *env)
 env-eflags = regs.rflags;
 env-eip = regs.rip;
 
-kvm_get_fpu(env, fpu);
-env-fpstt = (fpu.fsw  11)  7;
-env-fpus = fpu.fsw;
-env-fpuc = fpu.fcw;
-for (i = 0; i  8; ++i)
-   env-fptags[i] = !((fpu.ftwx  i)  1);
-memcpy(env-fpregs, fpu.fpr, sizeof env-fpregs);
-memcpy(env-xmm_regs, fpu.xmm, sizeof env-xmm_regs);
-env-mxcsr = fpu.mxcsr;
+if (kvm_check_extension(kvm_state, KVM_CAP_XSAVE)) {
+uint16_t cwd, swd, twd, fop;
+xsave = qemu_memalign(4096, sizeof(struct kvm_xsave));
+kvm_get_xsave(env, xsave);
+cwd = (uint16_t)xsave-region[0];
+swd = (uint16_t)(xsave-region[0]  16);
+twd = (uint16_t)xsave-region[1];
+fop = (uint16_t)(xsave-region[1]  16);
+env-fpstt = (swd  11)  7;
+env-fpus = swd;
+env-fpuc = cwd;
+for (i = 0; i  8; ++i)
+env-fptags[i] = !((twd  i)  1);
+env-mxcsr = xsave-region[XSAVE_MXCSR];
+memcpy(env-fpregs, xsave-region[XSAVE_ST_SPACE],
+sizeof env-fpregs);
+memcpy(env-xmm_regs, xsave-region[XSAVE_XMM_SPACE],
+sizeof env-xmm_regs);
+env-xstate_bv = *(uint64_t *)xsave-region[XSAVE_XSTATE_BV];
+memcpy(env-ymmh_regs, xsave-region[XSAVE_YMMH_SPACE],
+sizeof env-ymmh_regs);
+if (kvm_check_extension(kvm_state, KVM_CAP_XCRS)) {
+kvm_get_xcrs(env, xcrs);
+if (xcrs.xcrs[0].xcr == 0)
+env-xcr0 = xcrs.xcrs[0].value;
+}
+} else {
+kvm_get_fpu(env, fpu);
+

[COMMIT master] Fix compilation with !KVM_CAP_XSAVE

2010-06-21 Thread Avi Kivity

From: Marcelo Tosatti mtosa...@redhat.com

Signed-off-by: Marcelo Tosatti mtosa...@redhat.com

diff --git a/qemu-kvm-x86.c b/qemu-kvm-x86.c
index 4f0b1d0..1232049 100644
--- a/qemu-kvm-x86.c
+++ b/qemu-kvm-x86.c
@@ -784,8 +784,6 @@ void kvm_arch_load_regs(CPUState *env, int level)
 {
 struct kvm_regs regs;
 struct kvm_fpu fpu;
-struct kvm_xsave* xsave;
-struct kvm_xcrs xcrs;
 struct kvm_sregs sregs;
 struct kvm_msr_entry msrs[100];
 int rc, n, i;
@@ -816,7 +814,10 @@ void kvm_arch_load_regs(CPUState *env, int level)
 
 kvm_set_regs(env, regs);
 
+#ifdef KVM_CAP_XSAVE
 if (kvm_check_extension(kvm_state, KVM_CAP_XSAVE)) {
+struct kvm_xsave* xsave;
+
 uint16_t cwd, swd, twd, fop;
 
 xsave = qemu_memalign(4096, sizeof(struct kvm_xsave));
@@ -839,6 +840,8 @@ void kvm_arch_load_regs(CPUState *env, int level)
 sizeof env-ymmh_regs);
 kvm_set_xsave(env, xsave);
 if (kvm_check_extension(kvm_state, KVM_CAP_XCRS)) {
+struct kvm_xcrs xcrs;
+
 xcrs.nr_xcrs = 1;
 xcrs.flags = 0;
 xcrs.xcrs[0].xcr = 0;
@@ -846,6 +849,7 @@ void kvm_arch_load_regs(CPUState *env, int level)
 kvm_set_xcrs(env, xcrs);
 }
 } else {
+#endif
 memset(fpu, 0, sizeof fpu);
 fpu.fsw = env-fpus  ~(7  11);
 fpu.fsw |= (env-fpstt  7)  11;
@@ -856,7 +860,9 @@ void kvm_arch_load_regs(CPUState *env, int level)
 memcpy(fpu.xmm, env-xmm_regs, sizeof env-xmm_regs);
 fpu.mxcsr = env-mxcsr;
 kvm_set_fpu(env, fpu);
+#ifdef KVM_CAP_XSAVE
 }
+#endif
 
 memset(sregs.interrupt_bitmap, 0, sizeof(sregs.interrupt_bitmap));
 if (env-interrupt_injected = 0) {
@@ -975,8 +981,6 @@ void kvm_arch_save_regs(CPUState *env)
 {
 struct kvm_regs regs;
 struct kvm_fpu fpu;
-struct kvm_xsave* xsave;
-struct kvm_xcrs xcrs;
 struct kvm_sregs sregs;
 struct kvm_msr_entry msrs[100];
 uint32_t hflags;
@@ -1008,7 +1012,9 @@ void kvm_arch_save_regs(CPUState *env)
 env-eflags = regs.rflags;
 env-eip = regs.rip;
 
+#ifdef KVM_CAP_XSAVE
 if (kvm_check_extension(kvm_state, KVM_CAP_XSAVE)) {
+struct kvm_xsave* xsave;
 uint16_t cwd, swd, twd, fop;
 xsave = qemu_memalign(4096, sizeof(struct kvm_xsave));
 kvm_get_xsave(env, xsave);
@@ -1030,11 +1036,14 @@ void kvm_arch_save_regs(CPUState *env)
 memcpy(env-ymmh_regs, xsave-region[XSAVE_YMMH_SPACE],
 sizeof env-ymmh_regs);
 if (kvm_check_extension(kvm_state, KVM_CAP_XCRS)) {
+struct kvm_xcrs xcrs;
+
 kvm_get_xcrs(env, xcrs);
 if (xcrs.xcrs[0].xcr == 0)
 env-xcr0 = xcrs.xcrs[0].value;
 }
 } else {
+#endif
 kvm_get_fpu(env, fpu);
 env-fpstt = (fpu.fsw  11)  7;
 env-fpus = fpu.fsw;
@@ -1044,7 +1053,9 @@ void kvm_arch_save_regs(CPUState *env)
 memcpy(env-fpregs, fpu.fpr, sizeof env-fpregs);
 memcpy(env-xmm_regs, fpu.xmm, sizeof env-xmm_regs);
 env-mxcsr = fpu.mxcsr;
+#ifdef KVM_CAP_XSAVE
 }
+#endif
 
 kvm_get_sregs(env, sregs);
 
--
To unsubscribe from this list: send the line unsubscribe kvm-commits in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[COMMIT master] test: add test for pusha and popa instructions

2010-06-21 Thread Avi Kivity

From: Wei Yongjun yj...@cn.fujitsu.com

This patch add test for pusha and popa instructions.

Signed-off-by: Wei Yongjun yj...@cn.fujitsu.com
Signed-off-by: Marcelo Tosatti mtosa...@redhat.com

diff --git a/kvm/test/x86/realmode.c b/kvm/test/x86/realmode.c
index 70a1e05..bd79348 100644
--- a/kvm/test/x86/realmode.c
+++ b/kvm/test/x86/realmode.c
@@ -820,12 +820,58 @@ void test_null(void)
print_serial(null test: PASS\n);
 }
 
+void test_pusha_popa()
+{
+   struct regs inregs = { .eax = 0, .ebx = 1, .ecx = 2, .edx = 3, .esi = 
4, .edi = 5, .ebp = 6, .esp = 7}, outregs;
+
+   MK_INSN(pusha, pusha\n\t
+  pop %edi\n\t
+  pop %esi\n\t
+  pop %ebp\n\t
+  pop %eax\n\t
+  pop %ebx\n\t
+  pop %edx\n\t
+  pop %ecx\n\t
+  pop %esp\n\t
+  xchg %esp, %eax\n\t
+  );
+
+   MK_INSN(popa, push %eax\n\t
+ push %ecx\n\t
+ push %edx\n\t
+ push %ebx\n\t
+ push %esp\n\t
+ push %ebp\n\t
+ push %esi\n\t
+ push %edi\n\t
+ popa\n\t
+ );
+
+   exec_in_big_real_mode(inregs, outregs,
+ insn_pusha,
+ insn_pusha_end - insn_pusha);
+
+   if (!regs_equal(inregs, outregs, 0))
+   print_serial(Pusha/Popa Test1: FAIL\n);
+   else
+   print_serial(Pusha/Popa Test1: PASS\n);
+
+   exec_in_big_real_mode(inregs, outregs,
+ insn_popa,
+ insn_popa_end - insn_popa);
+   if (!regs_equal(inregs, outregs, 0))
+   print_serial(Pusha/Popa Test2: FAIL\n);
+   else
+   print_serial(Pusha/Popa Test2: PASS\n);
+}
+
 void realmode_start(void)
 {
test_null();
 
test_shld();
test_push_pop();
+   test_pusha_popa();
test_mov_imm();
test_cmp_imm();
test_add_imm();
--
To unsubscribe from this list: send the line unsubscribe kvm-commits in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[COMMIT master] qemu: kvm: Extend kvm_arch_get_supported_cpuid() to support index

2010-06-21 Thread Avi Kivity

From: Sheng Yang sh...@linux.intel.com

Would use it later for XSAVE related CPUID.

Signed-off-by: Sheng Yang sh...@linux.intel.com
Signed-off-by: Marcelo Tosatti mtosa...@redhat.com

diff --git a/kvm.h b/kvm.h
index aab5118..16b06a4 100644
--- a/kvm.h
+++ b/kvm.h
@@ -152,7 +152,7 @@ bool kvm_arch_stop_on_emulation_error(CPUState *env);
 int kvm_check_extension(KVMState *s, unsigned int extension);
 
 uint32_t kvm_arch_get_supported_cpuid(CPUState *env, uint32_t function,
-  int reg);
+  uint32_t index, int reg);
 void kvm_cpu_synchronize_state(CPUState *env);
 void kvm_cpu_synchronize_post_reset(CPUState *env);
 void kvm_cpu_synchronize_post_init(CPUState *env);
diff --git a/target-i386/kvm.c b/target-i386/kvm.c
index 66df142..57327f5 100644
--- a/target-i386/kvm.c
+++ b/target-i386/kvm.c
@@ -71,7 +71,8 @@ static struct kvm_cpuid2 *try_get_cpuid(KVMState *s, int max)
 return cpuid;
 }
 
-uint32_t kvm_arch_get_supported_cpuid(CPUState *env, uint32_t function, int 
reg)
+uint32_t kvm_arch_get_supported_cpuid(CPUState *env, uint32_t function,
+  uint32_t index, int reg)
 {
 struct kvm_cpuid2 *cpuid;
 int i, max;
@@ -88,7 +89,8 @@ uint32_t kvm_arch_get_supported_cpuid(CPUState *env, uint32_t 
function, int reg)
 }
 
 for (i = 0; i  cpuid-nent; ++i) {
-if (cpuid-entries[i].function == function) {
+if (cpuid-entries[i].function == function 
+cpuid-entries[i].index == index) {
 switch (reg) {
 case R_EAX:
 ret = cpuid-entries[i].eax;
@@ -110,7 +112,7 @@ uint32_t kvm_arch_get_supported_cpuid(CPUState *env, 
uint32_t function, int reg)
 /* On Intel, kvm returns cpuid according to the Intel spec,
  * so add missing bits according to the AMD spec:
  */
-cpuid_1_edx = kvm_arch_get_supported_cpuid(env, 1, R_EDX);
+cpuid_1_edx = kvm_arch_get_supported_cpuid(env, 1, 0, 
R_EDX);
 ret |= cpuid_1_edx  0x183f7ff;
 break;
 }
@@ -126,7 +128,8 @@ uint32_t kvm_arch_get_supported_cpuid(CPUState *env, 
uint32_t function, int reg)
 
 #else
 
-uint32_t kvm_arch_get_supported_cpuid(CPUState *env, uint32_t function, int 
reg)
+uint32_t kvm_arch_get_supported_cpuid(CPUState *env, uint32_t function,
+  uint32_t index, int reg)
 {
 return -1U;
 }
@@ -190,16 +193,16 @@ int kvm_arch_init_vcpu(CPUState *env)
 
 #endif
 
-env-cpuid_features = kvm_arch_get_supported_cpuid(env, 1, R_EDX);
+env-cpuid_features = kvm_arch_get_supported_cpuid(env, 1, 0, R_EDX);
 
 i = env-cpuid_ext_features  CPUID_EXT_HYPERVISOR;
-env-cpuid_ext_features = kvm_arch_get_supported_cpuid(env, 1, R_ECX);
+env-cpuid_ext_features = kvm_arch_get_supported_cpuid(env, 1, 0, R_ECX);
 env-cpuid_ext_features |= i;
 
 env-cpuid_ext2_features = kvm_arch_get_supported_cpuid(env, 0x8001,
- R_EDX);
+ 0, R_EDX);
 env-cpuid_ext3_features = kvm_arch_get_supported_cpuid(env, 0x8001,
- R_ECX);
+ 0, R_ECX);
 
 cpuid_i = 0;
 
--
To unsubscribe from this list: send the line unsubscribe kvm-commits in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[COMMIT master] qemu: Enable XSAVE related CPUID

2010-06-21 Thread Avi Kivity

From: Sheng Yang sh...@linux.intel.com

We can support it in KVM now. The 0xd leaf is queried from KVM.

Signed-off-by: Sheng Yang sh...@linux.intel.com
Signed-off-by: Marcelo Tosatti mtosa...@redhat.com

diff --git a/target-i386/cpuid.c b/target-i386/cpuid.c
index fb78061..26e4054 100644
--- a/target-i386/cpuid.c
+++ b/target-i386/cpuid.c
@@ -1081,6 +1081,27 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, 
uint32_t count,
 *ecx = 0;
 *edx = 0;
 break;
+case 0xD:
+/* Processor Extended State */
+if (!(env-cpuid_ext_features  CPUID_EXT_XSAVE)) {
+*eax = 0;
+*ebx = 0;
+*ecx = 0;
+*edx = 0;
+break;
+}
+if (kvm_enabled()) {
+*eax = kvm_arch_get_supported_cpuid(env, 0xd, count, R_EAX);
+*ebx = kvm_arch_get_supported_cpuid(env, 0xd, count, R_EBX);
+*ecx = kvm_arch_get_supported_cpuid(env, 0xd, count, R_ECX);
+*edx = kvm_arch_get_supported_cpuid(env, 0xd, count, R_EDX);
+} else {
+*eax = 0;
+*ebx = 0;
+*ecx = 0;
+*edx = 0;
+}
+break;
 case 0x8000:
 *eax = env-cpuid_xlevel;
 *ebx = env-cpuid_vendor1;
--
To unsubscribe from this list: send the line unsubscribe kvm-commits in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[COMMIT master] device-assignment, msi: PBA is long

2010-06-21 Thread Avi Kivity

From: Hidetoshi Seto seto.hideto...@jp.fujitsu.com

Accidentally a pci_read_long() was replaced with assigned_dev_pci_read_byte()
by the commit:
 commit a81a1f0a7410976be7dbc9a81524a8640f446ab5
 Author: Alex Williamson alex.william...@redhat.com
device-assignment: Don't use libpci

Signed-off-by: Hidetoshi Seto seto.hideto...@jp.fujitsu.com
Acked-by: Alex Williamson alex.william...@redhat.com
Signed-off-by: Marcelo Tosatti mtosa...@redhat.com

diff --git a/hw/device-assignment.c b/hw/device-assignment.c
index ba02157..20ed934 100644
--- a/hw/device-assignment.c
+++ b/hw/device-assignment.c
@@ -1284,7 +1284,7 @@ static int assigned_device_pci_cap_init(PCIDevice 
*pci_dev)
   pci_dev-cap.length + PCI_MSIX_TABLE) = msix_table_entry;
 *(uint32_t *)(pci_dev-config + pci_dev-cap.start +
   pci_dev-cap.length + PCI_MSIX_PBA) =
-assigned_dev_pci_read_byte(pci_dev, pos + PCI_MSIX_PBA);
+assigned_dev_pci_read_long(pci_dev, pos + PCI_MSIX_PBA);
 bar_nr = msix_table_entry  PCI_MSIX_BIR;
 msix_table_entry = ~PCI_MSIX_BIR;
 dev-msix_table_addr = pci_region[bar_nr].base_addr + msix_table_entry;
--
To unsubscribe from this list: send the line unsubscribe kvm-commits in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[COMMIT master] Synchronize kernel headers

2010-06-21 Thread Avi Kivity

From: Marcelo Tosatti mtosa...@redhat.com

Signed-off-by: Marcelo Tosatti mtosa...@redhat.com

diff --git a/kvm/include/linux/kvm.h b/kvm/include/linux/kvm.h
index 6485981..e46729e 100644
--- a/kvm/include/linux/kvm.h
+++ b/kvm/include/linux/kvm.h
@@ -103,7 +103,7 @@ struct kvm_userspace_memory_region {
 
 /* for kvm_memory_region::flags */
 #define KVM_MEM_LOG_DIRTY_PAGES  1UL
-
+#define KVM_MEMSLOT_INVALID  (1UL  1)
 
 /* for KVM_IRQ_LINE */
 struct kvm_irq_level {
@@ -160,6 +160,7 @@ struct kvm_pit_config {
 #define KVM_EXIT_DCR  15
 #define KVM_EXIT_NMI  16
 #define KVM_EXIT_INTERNAL_ERROR   17
+#define KVM_EXIT_OSI  18
 
 /* For KVM_EXIT_INTERNAL_ERROR */
 #define KVM_INTERNAL_ERROR_EMULATION 1
@@ -259,6 +260,10 @@ struct kvm_run {
__u32 ndata;
__u64 data[16];
} internal;
+   /* KVM_EXIT_OSI */
+   struct {
+   __u64 gprs[32];
+   } osi;
/* Fix the size of the union. */
char padding[256];
};
@@ -400,6 +405,15 @@ struct kvm_ioeventfd {
__u8  pad[36];
 };
 
+/* for KVM_ENABLE_CAP */
+struct kvm_enable_cap {
+   /* in */
+   __u32 cap;
+   __u32 flags;
+   __u64 args[4];
+   __u8  pad[64];
+};
+
 #define KVMIO 0xAE
 
 /*
@@ -497,8 +511,25 @@ struct kvm_ioeventfd {
 #endif
 #define KVM_CAP_S390_PSW 42
 #define KVM_CAP_PPC_SEGSTATE 43
-
+#define KVM_CAP_HYPERV 44
+#define KVM_CAP_HYPERV_VAPIC 45
+#define KVM_CAP_HYPERV_SPIN 46
 #define KVM_CAP_PCI_SEGMENT 47
+#define KVM_CAP_PPC_PAIRED_SINGLES 48
+#define KVM_CAP_INTR_SHADOW 49
+#ifdef __KVM_HAVE_DEBUGREGS
+#define KVM_CAP_DEBUGREGS 50
+#endif
+#define KVM_CAP_X86_ROBUST_SINGLESTEP 51
+#define KVM_CAP_PPC_OSI 52
+#define KVM_CAP_PPC_UNSET_IRQ 53
+#define KVM_CAP_ENABLE_CAP 54
+#ifdef __KVM_HAVE_XSAVE
+#define KVM_CAP_XSAVE 55
+#endif
+#ifdef __KVM_HAVE_XCRS
+#define KVM_CAP_XCRS 56
+#endif
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
@@ -685,6 +716,16 @@ struct kvm_clock_data {
 /* Available with KVM_CAP_VCPU_EVENTS */
 #define KVM_GET_VCPU_EVENTS   _IOR(KVMIO,  0x9f, struct kvm_vcpu_events)
 #define KVM_SET_VCPU_EVENTS   _IOW(KVMIO,  0xa0, struct kvm_vcpu_events)
+/* Available with KVM_CAP_DEBUGREGS */
+#define KVM_GET_DEBUGREGS _IOR(KVMIO,  0xa1, struct kvm_debugregs)
+#define KVM_SET_DEBUGREGS _IOW(KVMIO,  0xa2, struct kvm_debugregs)
+#define KVM_ENABLE_CAP_IOW(KVMIO,  0xa3, struct kvm_enable_cap)
+/* Available with KVM_CAP_XSAVE */
+#define KVM_GET_XSAVE_IOR(KVMIO,  0xa4, struct kvm_xsave)
+#define KVM_SET_XSAVE_IOW(KVMIO,  0xa5, struct kvm_xsave)
+/* Available with KVM_CAP_XCRS */
+#define KVM_GET_XCRS _IOR(KVMIO,  0xa6, struct kvm_xcrs)
+#define KVM_SET_XCRS _IOW(KVMIO,  0xa7, struct kvm_xcrs)
 
 #define KVM_DEV_ASSIGN_ENABLE_IOMMU(1  0)
 
diff --git a/kvm/include/x86/asm/kvm.h b/kvm/include/x86/asm/kvm.h
index f46b79f..4d8dcbd 100644
--- a/kvm/include/x86/asm/kvm.h
+++ b/kvm/include/x86/asm/kvm.h
@@ -21,6 +21,9 @@
 #define __KVM_HAVE_PIT_STATE2
 #define __KVM_HAVE_XEN_HVM
 #define __KVM_HAVE_VCPU_EVENTS
+#define __KVM_HAVE_DEBUGREGS
+#define __KVM_HAVE_XSAVE
+#define __KVM_HAVE_XCRS
 
 /* Architectural interrupt line count. */
 #define KVM_NR_INTERRUPTS 256
@@ -257,6 +260,11 @@ struct kvm_reinject_control {
 /* When set in flags, include corresponding fields on KVM_SET_VCPU_EVENTS */
 #define KVM_VCPUEVENT_VALID_NMI_PENDING0x0001
 #define KVM_VCPUEVENT_VALID_SIPI_VECTOR0x0002
+#define KVM_VCPUEVENT_VALID_SHADOW 0x0004
+
+/* Interrupt shadow states */
+#define KVM_X86_SHADOW_INT_MOV_SS  0x01
+#define KVM_X86_SHADOW_INT_STI 0x02
 
 /* for KVM_GET/SET_VCPU_EVENTS */
 struct kvm_vcpu_events {
@@ -271,7 +279,7 @@ struct kvm_vcpu_events {
__u8 injected;
__u8 nr;
__u8 soft;
-   __u8 pad;
+   __u8 shadow;
} interrupt;
struct {
__u8 injected;
@@ -284,4 +292,33 @@ struct kvm_vcpu_events {
__u32 reserved[10];
 };
 
+/* for KVM_GET/SET_DEBUGREGS */
+struct kvm_debugregs {
+   __u64 db[4];
+   __u64 dr6;
+   __u64 dr7;
+   __u64 flags;
+   __u64 reserved[9];
+};
+
+/* for KVM_CAP_XSAVE */
+struct kvm_xsave {
+   __u32 region[1024];
+};
+
+#define KVM_MAX_XCRS   16
+
+struct kvm_xcr {
+   __u32 xcr;
+   __u32 reserved;
+   __u64 value;
+};
+
+struct kvm_xcrs {
+   __u32 nr_xcrs;
+   __u32 flags;
+   struct kvm_xcr xcrs[KVM_MAX_XCRS];
+   __u64 padding[16];
+};
+
 #endif /* _ASM_X86_KVM_H */
--
To unsubscribe from this list: send the line unsubscribe kvm-commits in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[COMMIT master] KVM: Fix mov cr4 #GP at wrong instruction

2010-06-21 Thread Avi Kivity

From: Avi Kivity a...@redhat.com

On Intel, we call skip_emulated_instruction() even if we injected a #GP,
resulting in the #GP pointing at the wrong address.

Fix by injecting the exception and skipping the instruction at the same place,
so we can do just one or the other.

Signed-off-by: Avi Kivity a...@redhat.com
Signed-off-by: Marcelo Tosatti mtosa...@redhat.com

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index b237084..ea8c319 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -599,7 +599,7 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 
tss_selector, int reason,
 
 int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0);
 void kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3);
-void kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4);
+int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4);
 void kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8);
 int kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val);
 int kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val);
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index eb4703f..a6322af 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -3192,8 +3192,8 @@ static int handle_cr(struct kvm_vcpu *vcpu)
skip_emulated_instruction(vcpu);
return 1;
case 4:
-   kvm_set_cr4(vcpu, val);
-   skip_emulated_instruction(vcpu);
+   err = kvm_set_cr4(vcpu, val);
+   complete_insn_gp(vcpu, err);
return 1;
case 8: {
u8 cr8_prev = kvm_get_cr8(vcpu);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 1e0337a..b3eeb24 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -531,7 +531,7 @@ static void update_cpuid(struct kvm_vcpu *vcpu)
}
 }
 
-int __kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
+int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
 {
unsigned long old_cr4 = kvm_read_cr4(vcpu);
unsigned long pdptr_bits = X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE;
@@ -563,12 +563,6 @@ int __kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
 
return 0;
 }
-
-void kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
-{
-   if (__kvm_set_cr4(vcpu, cr4))
-   kvm_inject_gp(vcpu, 0);
-}
 EXPORT_SYMBOL_GPL(kvm_set_cr4);
 
 static int __kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
@@ -3735,7 +3729,7 @@ static int emulator_set_cr(int cr, unsigned long val, 
struct kvm_vcpu *vcpu)
res = __kvm_set_cr3(vcpu, val);
break;
case 4:
-   res = __kvm_set_cr4(vcpu, mk_cr_64(kvm_read_cr4(vcpu), val));
+   res = kvm_set_cr4(vcpu, mk_cr_64(kvm_read_cr4(vcpu), val));
break;
case 8:
res = __kvm_set_cr8(vcpu, val  0xfUL);
--
To unsubscribe from this list: send the line unsubscribe kvm-commits in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[COMMIT master] KVM: Fix mov cr3 #GP at wrong instruction

2010-06-21 Thread Avi Kivity

From: Avi Kivity a...@redhat.com

On Intel, we call skip_emulated_instruction() even if we injected a #GP,
resulting in the #GP pointing at the wrong address.

Fix by injecting the exception and skipping the instruction at the same place,
so we can do just one or the other.

Signed-off-by: Avi Kivity a...@redhat.com
Signed-off-by: Marcelo Tosatti mtosa...@redhat.com

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index ea8c319..c2813d6 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -598,7 +598,7 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 
tss_selector, int reason,
bool has_error_code, u32 error_code);
 
 int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0);
-void kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3);
+int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3);
 int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4);
 void kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8);
 int kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val);
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 75ddaa1..fcf 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -3201,7 +3201,7 @@ static int kvm_pv_mmu_write(struct kvm_vcpu *vcpu,
 
 static int kvm_pv_mmu_flush_tlb(struct kvm_vcpu *vcpu)
 {
-   kvm_set_cr3(vcpu, vcpu-arch.cr3);
+   (void)kvm_set_cr3(vcpu, vcpu-arch.cr3);
return 1;
 }
 
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 6d1616d..f7a6fdc 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1963,7 +1963,7 @@ static int nested_svm_vmexit(struct vcpu_svm *svm)
svm-vmcb-save.cr3 = hsave-save.cr3;
svm-vcpu.arch.cr3 = hsave-save.cr3;
} else {
-   kvm_set_cr3(svm-vcpu, hsave-save.cr3);
+   (void)kvm_set_cr3(svm-vcpu, hsave-save.cr3);
}
kvm_register_write(svm-vcpu, VCPU_REGS_RAX, hsave-save.rax);
kvm_register_write(svm-vcpu, VCPU_REGS_RSP, hsave-save.rsp);
@@ -2086,7 +2086,7 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm)
svm-vmcb-save.cr3 = nested_vmcb-save.cr3;
svm-vcpu.arch.cr3 = nested_vmcb-save.cr3;
} else
-   kvm_set_cr3(svm-vcpu, nested_vmcb-save.cr3);
+   (void)kvm_set_cr3(svm-vcpu, nested_vmcb-save.cr3);
 
/* Guest paging mode is active - reset mmu */
kvm_mmu_reset_context(svm-vcpu);
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index a6322af..6c81f0e 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -3188,8 +3188,8 @@ static int handle_cr(struct kvm_vcpu *vcpu)
complete_insn_gp(vcpu, err);
return 1;
case 3:
-   kvm_set_cr3(vcpu, val);
-   skip_emulated_instruction(vcpu);
+   err = kvm_set_cr3(vcpu, val);
+   complete_insn_gp(vcpu, err);
return 1;
case 4:
err = kvm_set_cr4(vcpu, val);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index b3eeb24..e16a00e 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -565,7 +565,7 @@ int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
 }
 EXPORT_SYMBOL_GPL(kvm_set_cr4);
 
-static int __kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
+int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
 {
if (cr3 == vcpu-arch.cr3  !pdptrs_changed(vcpu)) {
kvm_mmu_sync_roots(vcpu);
@@ -604,12 +604,6 @@ static int __kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned 
long cr3)
vcpu-arch.mmu.new_cr3(vcpu);
return 0;
 }
-
-void kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
-{
-   if (__kvm_set_cr3(vcpu, cr3))
-   kvm_inject_gp(vcpu, 0);
-}
 EXPORT_SYMBOL_GPL(kvm_set_cr3);
 
 int __kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8)
@@ -3726,7 +3720,7 @@ static int emulator_set_cr(int cr, unsigned long val, 
struct kvm_vcpu *vcpu)
vcpu-arch.cr2 = val;
break;
case 3:
-   res = __kvm_set_cr3(vcpu, val);
+   res = kvm_set_cr3(vcpu, val);
break;
case 4:
res = kvm_set_cr4(vcpu, mk_cr_64(kvm_read_cr4(vcpu), val));
--
To unsubscribe from this list: send the line unsubscribe kvm-commits in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[COMMIT master] KVM: x86: XSAVE/XRSTOR live migration support

2010-06-21 Thread Avi Kivity

From: Sheng Yang sh...@linux.intel.com

This patch enable save/restore of xsave state.

Signed-off-by: Sheng Yang sh...@linux.intel.com
Signed-off-by: Marcelo Tosatti mtosa...@redhat.com

diff --git a/Documentation/kvm/api.txt b/Documentation/kvm/api.txt
index 159b4ef..ffba03f 100644
--- a/Documentation/kvm/api.txt
+++ b/Documentation/kvm/api.txt
@@ -922,6 +922,80 @@ Define which vcpu is the Bootstrap Processor (BSP).  
Values are the same
 as the vcpu id in KVM_CREATE_VCPU.  If this ioctl is not called, the default
 is vcpu 0.
 
+4.41 KVM_GET_XSAVE
+
+Capability: KVM_CAP_XSAVE
+Architectures: x86
+Type: vcpu ioctl
+Parameters: struct kvm_xsave (out)
+Returns: 0 on success, -1 on error
+
+struct kvm_xsave {
+   __u32 region[1024];
+};
+
+This ioctl would copy current vcpu's xsave struct to the userspace.
+
+4.42 KVM_SET_XSAVE
+
+Capability: KVM_CAP_XSAVE
+Architectures: x86
+Type: vcpu ioctl
+Parameters: struct kvm_xsave (in)
+Returns: 0 on success, -1 on error
+
+struct kvm_xsave {
+   __u32 region[1024];
+};
+
+This ioctl would copy userspace's xsave struct to the kernel.
+
+4.43 KVM_GET_XCRS
+
+Capability: KVM_CAP_XCRS
+Architectures: x86
+Type: vcpu ioctl
+Parameters: struct kvm_xcrs (out)
+Returns: 0 on success, -1 on error
+
+struct kvm_xcr {
+   __u32 xcr;
+   __u32 reserved;
+   __u64 value;
+};
+
+struct kvm_xcrs {
+   __u32 nr_xcrs;
+   __u32 flags;
+   struct kvm_xcr xcrs[KVM_MAX_XCRS];
+   __u64 padding[16];
+};
+
+This ioctl would copy current vcpu's xcrs to the userspace.
+
+4.44 KVM_SET_XCRS
+
+Capability: KVM_CAP_XCRS
+Architectures: x86
+Type: vcpu ioctl
+Parameters: struct kvm_xcrs (in)
+Returns: 0 on success, -1 on error
+
+struct kvm_xcr {
+   __u32 xcr;
+   __u32 reserved;
+   __u64 value;
+};
+
+struct kvm_xcrs {
+   __u32 nr_xcrs;
+   __u32 flags;
+   struct kvm_xcr xcrs[KVM_MAX_XCRS];
+   __u64 padding[16];
+};
+
+This ioctl would set vcpu's xcr to the value userspace specified.
+
 5. The kvm_run structure
 
 Application code obtains a pointer to the kvm_run structure by
diff --git a/arch/x86/include/asm/kvm.h b/arch/x86/include/asm/kvm.h
index ff90055..4d8dcbd 100644
--- a/arch/x86/include/asm/kvm.h
+++ b/arch/x86/include/asm/kvm.h
@@ -22,6 +22,8 @@
 #define __KVM_HAVE_XEN_HVM
 #define __KVM_HAVE_VCPU_EVENTS
 #define __KVM_HAVE_DEBUGREGS
+#define __KVM_HAVE_XSAVE
+#define __KVM_HAVE_XCRS
 
 /* Architectural interrupt line count. */
 #define KVM_NR_INTERRUPTS 256
@@ -299,4 +301,24 @@ struct kvm_debugregs {
__u64 reserved[9];
 };
 
+/* for KVM_CAP_XSAVE */
+struct kvm_xsave {
+   __u32 region[1024];
+};
+
+#define KVM_MAX_XCRS   16
+
+struct kvm_xcr {
+   __u32 xcr;
+   __u32 reserved;
+   __u64 value;
+};
+
+struct kvm_xcrs {
+   __u32 nr_xcrs;
+   __u32 flags;
+   struct kvm_xcr xcrs[KVM_MAX_XCRS];
+   __u64 padding[16];
+};
+
 #endif /* _ASM_X86_KVM_H */
diff --git a/arch/x86/include/asm/xsave.h b/arch/x86/include/asm/xsave.h
index 29ee4e4..32c3666 100644
--- a/arch/x86/include/asm/xsave.h
+++ b/arch/x86/include/asm/xsave.h
@@ -13,8 +13,11 @@
 
 #define FXSAVE_SIZE512
 
-#define XSTATE_YMM_SIZE 256
-#define XSTATE_YMM_OFFSET (512 + 64)
+#define XSAVE_HDR_SIZE 64
+#define XSAVE_HDR_OFFSETFXSAVE_SIZE
+
+#define XSAVE_YMM_SIZE 256
+#define XSAVE_YMM_OFFSET(XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET)
 
 /*
  * These are the features that the OS can handle currently.
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index e16a00e..d3d008e 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1680,6 +1680,7 @@ int kvm_dev_ioctl_check_extension(long ext)
case KVM_CAP_PCI_SEGMENT:
case KVM_CAP_DEBUGREGS:
case KVM_CAP_X86_ROBUST_SINGLESTEP:
+   case KVM_CAP_XSAVE:
r = 1;
break;
case KVM_CAP_COALESCED_MMIO:
@@ -1703,6 +1704,9 @@ int kvm_dev_ioctl_check_extension(long ext)
case KVM_CAP_MCE:
r = KVM_MAX_MCE_BANKS;
break;
+   case KVM_CAP_XCRS:
+   r = cpu_has_xsave;
+   break;
default:
r = 0;
break;
@@ -2355,6 +2359,77 @@ static int kvm_vcpu_ioctl_x86_set_debugregs(struct 
kvm_vcpu *vcpu,
return 0;
 }
 
+static void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu,
+struct kvm_xsave *guest_xsave)
+{
+   if (cpu_has_xsave)
+   memcpy(guest_xsave-region,
+   vcpu-arch.guest_fpu.state-xsave,
+   sizeof(struct xsave_struct));
+   else {
+   memcpy(guest_xsave-region,
+   vcpu-arch.guest_fpu.state-fxsave,
+   sizeof(struct i387_fxsave_struct));
+   *(u64 *)guest_xsave-region[XSAVE_HDR_OFFSET / sizeof(u32)] =
+   XSTATE_FPSSE;
+   }
+}
+
+static int kvm_vcpu_ioctl_x86_set_xsave(struct kvm_vcpu *vcpu,
+

[COMMIT master] KVM: MMU: rename 'page' and 'shadow_page' to 'sp'

2010-06-21 Thread Avi Kivity

From: Xiao Guangrong xiaoguangr...@cn.fujitsu.com

Rename 'page' and 'shadow_page' to 'sp' to better fit the context

Signed-off-by: Xiao Guangrong xiaoguangr...@cn.fujitsu.com
Signed-off-by: Marcelo Tosatti mtosa...@redhat.com

diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 6cd318d..8d00bb2 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -253,7 +253,7 @@ err:
return 0;
 }
 
-static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page,
+static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
  u64 *spte, const void *pte)
 {
pt_element_t gpte;
@@ -264,7 +264,7 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct 
kvm_mmu_page *page,
gpte = *(const pt_element_t *)pte;
if (~gpte  (PT_PRESENT_MASK | PT_ACCESSED_MASK)) {
if (!is_present_gpte(gpte)) {
-   if (page-unsync)
+   if (sp-unsync)
new_spte = shadow_trap_nonpresent_pte;
else
new_spte = shadow_notrap_nonpresent_pte;
@@ -273,7 +273,7 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct 
kvm_mmu_page *page,
return;
}
pgprintk(%s: gpte %llx spte %p\n, __func__, (u64)gpte, spte);
-   pte_access = page-role.access  FNAME(gpte_access)(vcpu, gpte);
+   pte_access = sp-role.access  FNAME(gpte_access)(vcpu, gpte);
if (gpte_to_gfn(gpte) != vcpu-arch.update_pte.gfn)
return;
pfn = vcpu-arch.update_pte.pfn;
@@ -286,7 +286,7 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct 
kvm_mmu_page *page,
 * we call mmu_set_spte() with reset_host_protection = true beacuse that
 * vcpu-arch.update_pte.pfn was fetched from get_user_pages(write = 1).
 */
-   mmu_set_spte(vcpu, spte, page-role.access, pte_access, 0, 0,
+   mmu_set_spte(vcpu, spte, sp-role.access, pte_access, 0, 0,
 gpte  PT_DIRTY_MASK, NULL, PT_PAGE_TABLE_LEVEL,
 gpte_to_gfn(gpte), pfn, true, true);
 }
@@ -300,7 +300,7 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
 int *ptwrite, pfn_t pfn)
 {
unsigned access = gw-pt_access;
-   struct kvm_mmu_page *shadow_page;
+   struct kvm_mmu_page *sp;
u64 spte, *sptep = NULL;
int direct;
gfn_t table_gfn;
@@ -341,30 +341,30 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t 
addr,
access = ~ACC_WRITE_MASK;
/*
 * It is a large guest pages backed by small host pages,
-* So we set @direct(@shadow_page-role.direct)=1, and
-* set @table_gfn(@shadow_page-gfn)=the base page frame
-* for linear translations.
+* So we set @direct(@sp-role.direct)=1, and set
+* @table_gfn(@sp-gfn)=the base page frame for linear
+* translations.
 */
table_gfn = gw-gfn  ~(KVM_PAGES_PER_HPAGE(level) - 1);
} else {
direct = 0;
table_gfn = gw-table_gfn[level - 2];
}
-   shadow_page = kvm_mmu_get_page(vcpu, table_gfn, addr, level-1,
+   sp = kvm_mmu_get_page(vcpu, table_gfn, addr, level-1,
   direct, access, sptep);
if (!direct) {
r = kvm_read_guest_atomic(vcpu-kvm,
  gw-pte_gpa[level - 2],
  curr_pte, sizeof(curr_pte));
if (r || curr_pte != gw-ptes[level - 2]) {
-   kvm_mmu_put_page(shadow_page, sptep);
+   kvm_mmu_put_page(sp, sptep);
kvm_release_pfn_clean(pfn);
sptep = NULL;
break;
}
}
 
-   spte = __pa(shadow_page-spt)
+   spte = __pa(sp-spt)
| PT_PRESENT_MASK | PT_ACCESSED_MASK
| PT_WRITABLE_MASK | PT_USER_MASK;
*sptep = spte;
--
To unsubscribe from this list: send the line unsubscribe kvm-commits in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[COMMIT master] KVM: MMU: clear unsync_child_bitmap completely

2010-06-21 Thread Avi Kivity

From: Xiao Guangrong xiaoguangr...@cn.fujitsu.com

In current code, some page's unsync_child_bitmap is not cleared completely
in mmu_sync_children(), for example, if two PDPEs shard one PDT, one of
PDPE's unsync_child_bitmap is not cleared.

Currently, it not harm anything just little overload, but it's the prepare
work for the later patch

Signed-off-by: Xiao Guangrong xiaoguangr...@cn.fujitsu.com
Signed-off-by: Marcelo Tosatti mtosa...@redhat.com

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 6230c38..951af3a 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1149,33 +1149,38 @@ static int __mmu_unsync_walk(struct kvm_mmu_page *sp,
int i, ret, nr_unsync_leaf = 0;
 
for_each_unsync_children(sp-unsync_child_bitmap, i) {
+   struct kvm_mmu_page *child;
u64 ent = sp-spt[i];
 
-   if (is_shadow_present_pte(ent)  !is_large_pte(ent)) {
-   struct kvm_mmu_page *child;
-   child = page_header(ent  PT64_BASE_ADDR_MASK);
-
-   if (child-unsync_children) {
-   if (mmu_pages_add(pvec, child, i))
-   return -ENOSPC;
-
-   ret = __mmu_unsync_walk(child, pvec);
-   if (!ret) {
-   __clear_bit(i, sp-unsync_child_bitmap);
-   sp-unsync_children--;
-   WARN_ON((int)sp-unsync_children  0);
-   } else if (ret  0)
-   nr_unsync_leaf += ret;
-   else
-   return ret;
-   }
+   if (!is_shadow_present_pte(ent) || is_large_pte(ent))
+   goto clear_child_bitmap;
+
+   child = page_header(ent  PT64_BASE_ADDR_MASK);
+
+   if (child-unsync_children) {
+   if (mmu_pages_add(pvec, child, i))
+   return -ENOSPC;
+
+   ret = __mmu_unsync_walk(child, pvec);
+   if (!ret)
+   goto clear_child_bitmap;
+   else if (ret  0)
+   nr_unsync_leaf += ret;
+   else
+   return ret;
+   } else if (child-unsync) {
+   nr_unsync_leaf++;
+   if (mmu_pages_add(pvec, child, i))
+   return -ENOSPC;
+   } else
+goto clear_child_bitmap;
 
-   if (child-unsync) {
-   nr_unsync_leaf++;
-   if (mmu_pages_add(pvec, child, i))
-   return -ENOSPC;
-   }
-   }
+   continue;
+
+clear_child_bitmap:
+   __clear_bit(i, sp-unsync_child_bitmap);
+   sp-unsync_children--;
+   WARN_ON((int)sp-unsync_children  0);
}
 
 
--
To unsubscribe from this list: send the line unsubscribe kvm-commits in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[COMMIT master] KVM: MMU: avoid double write protected in sync page path

2010-06-21 Thread Avi Kivity

From: Xiao Guangrong xiaoguangr...@cn.fujitsu.com

The sync page is already write protected in mmu_sync_children(), don't
write protected it again

Signed-off-by: Xiao Guangrong xiaoguangr...@cn.fujitsu.com
Signed-off-by: Marcelo Tosatti mtosa...@redhat.com

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index fcf..d60bf90 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1216,6 +1216,7 @@ static void kvm_mmu_commit_zap_page(struct kvm *kvm,
if ((sp)-gfn != (gfn) || (sp)-role.direct ||  \
(sp)-role.invalid) {} else
 
+/* @sp-gfn should be write-protected at the call site */
 static int __kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
   struct list_head *invalid_list, bool clear_unsync)
 {
@@ -1224,11 +1225,8 @@ static int __kvm_sync_page(struct kvm_vcpu *vcpu, struct 
kvm_mmu_page *sp,
return 1;
}
 
-   if (clear_unsync) {
-   if (rmap_write_protect(vcpu-kvm, sp-gfn))
-   kvm_flush_remote_tlbs(vcpu-kvm);
+   if (clear_unsync)
kvm_unlink_unsync_page(vcpu-kvm, sp);
-   }
 
if (vcpu-arch.mmu.sync_page(vcpu, sp)) {
kvm_mmu_prepare_zap_page(vcpu-kvm, sp, invalid_list);
--
To unsubscribe from this list: send the line unsubscribe kvm-commits in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[COMMIT master] KVM: MMU: cleanup for dirty page judgment

2010-06-21 Thread Avi Kivity

From: Xiao Guangrong xiaoguangr...@cn.fujitsu.com

Using wrap function to cleanup page dirty judgment

Signed-off-by: Xiao Guangrong xiaoguangr...@cn.fujitsu.com
Signed-off-by: Marcelo Tosatti mtosa...@redhat.com

diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 8d00bb2..876e705 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -287,7 +287,7 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct 
kvm_mmu_page *sp,
 * vcpu-arch.update_pte.pfn was fetched from get_user_pages(write = 1).
 */
mmu_set_spte(vcpu, spte, sp-role.access, pte_access, 0, 0,
-gpte  PT_DIRTY_MASK, NULL, PT_PAGE_TABLE_LEVEL,
+is_dirty_gpte(gpte), NULL, PT_PAGE_TABLE_LEVEL,
 gpte_to_gfn(gpte), pfn, true, true);
 }
 
@@ -319,7 +319,7 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
mmu_set_spte(vcpu, sptep, access,
 gw-pte_access  access,
 user_fault, write_fault,
-gw-ptes[gw-level-1]  PT_DIRTY_MASK,
+is_dirty_gpte(gw-ptes[gw-level-1]),
 ptwrite, level,
 gw-gfn, pfn, false, true);
break;
--
To unsubscribe from this list: send the line unsubscribe kvm-commits in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[COMMIT master] KVM: MMU: cleanup for __mmu_unsync_walk()

2010-06-21 Thread Avi Kivity

From: Xiao Guangrong xiaoguangr...@cn.fujitsu.com

Decrease sp-unsync_children after clear unsync_child_bitmap bit

Signed-off-by: Xiao Guangrong xiaoguangr...@cn.fujitsu.com
Signed-off-by: Marcelo Tosatti mtosa...@redhat.com

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 8bfcb32..6230c38 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1160,9 +1160,11 @@ static int __mmu_unsync_walk(struct kvm_mmu_page *sp,
return -ENOSPC;
 
ret = __mmu_unsync_walk(child, pvec);
-   if (!ret)
+   if (!ret) {
__clear_bit(i, sp-unsync_child_bitmap);
-   else if (ret  0)
+   sp-unsync_children--;
+   WARN_ON((int)sp-unsync_children  0);
+   } else if (ret  0)
nr_unsync_leaf += ret;
else
return ret;
@@ -1176,8 +1178,6 @@ static int __mmu_unsync_walk(struct kvm_mmu_page *sp,
}
}
 
-   if (find_first_bit(sp-unsync_child_bitmap, 512) == 512)
-   sp-unsync_children = 0;
 
return nr_unsync_leaf;
 }
--
To unsubscribe from this list: send the line unsubscribe kvm-commits in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[COMMIT master] Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6

2010-06-21 Thread Avi Kivity

From: Marcelo Tosatti mtosa...@redhat.com

Signed-off-by: Marcelo Tosatti mtosa...@redhat.com
--
To unsubscribe from this list: send the line unsubscribe kvm-commits in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[COMMIT master] KVM: Fix typos in Documentation/kvm/mmu.txt

2010-06-21 Thread Avi Kivity

From: Jason Wang jasow...@redhat.com

Signed-off-by: Jason Wang jasow...@redhat.com
Signed-off-by: Marcelo Tosatti mtosa...@redhat.com

diff --git a/Documentation/kvm/mmu.txt b/Documentation/kvm/mmu.txt
index 8cb42b9..142cc51 100644
--- a/Documentation/kvm/mmu.txt
+++ b/Documentation/kvm/mmu.txt
@@ -77,10 +77,10 @@ Memory
 
 Guest memory (gpa) is part of the user address space of the process that is
 using kvm.  Userspace defines the translation between guest addresses and user
-addresses (gpa-hva); note that two gpas may alias to the same gva, but not
+addresses (gpa-hva); note that two gpas may alias to the same hva, but not
 vice versa.
 
-These gvas may be backed using any method available to the host: anonymous
+These hvas may be backed using any method available to the host: anonymous
 memory, file backed memory, and device memory.  Memory might be paged by the
 host at any time.
 
--
To unsubscribe from this list: send the line unsubscribe kvm-commits in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[COMMIT master] KVM: x86 emulator: fix pusha instruction emulation

2010-06-21 Thread Avi Kivity

From: Wei Yongjun yj...@cn.fujitsu.com

emulate pusha instruction only writeback the last
EDI register, but the other registers which need
to be writeback is ignored. This patch fixed it.

Signed-off-by: Wei Yongjun yj...@cn.fujitsu.com
Signed-off-by: Marcelo Tosatti mtosa...@redhat.com

diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index a4c2dcd..c990db0 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -1553,6 +1553,64 @@ exception:
return X86EMUL_PROPAGATE_FAULT;
 }
 
+static inline int writeback(struct x86_emulate_ctxt *ctxt,
+   struct x86_emulate_ops *ops)
+{
+   int rc;
+   struct decode_cache *c = ctxt-decode;
+   u32 err;
+
+   switch (c-dst.type) {
+   case OP_REG:
+   /* The 4-byte case *is* correct:
+* in 64-bit mode we zero-extend.
+*/
+   switch (c-dst.bytes) {
+   case 1:
+   *(u8 *)c-dst.ptr = (u8)c-dst.val;
+   break;
+   case 2:
+   *(u16 *)c-dst.ptr = (u16)c-dst.val;
+   break;
+   case 4:
+   *c-dst.ptr = (u32)c-dst.val;
+   break;  /* 64b: zero-ext */
+   case 8:
+   *c-dst.ptr = c-dst.val;
+   break;
+   }
+   break;
+   case OP_MEM:
+   if (c-lock_prefix)
+   rc = ops-cmpxchg_emulated(
+   (unsigned long)c-dst.ptr,
+   c-dst.orig_val,
+   c-dst.val,
+   c-dst.bytes,
+   err,
+   ctxt-vcpu);
+   else
+   rc = ops-write_emulated(
+   (unsigned long)c-dst.ptr,
+   c-dst.val,
+   c-dst.bytes,
+   err,
+   ctxt-vcpu);
+   if (rc == X86EMUL_PROPAGATE_FAULT)
+   emulate_pf(ctxt,
+ (unsigned long)c-dst.ptr, err);
+   if (rc != X86EMUL_CONTINUE)
+   return rc;
+   break;
+   case OP_NONE:
+   /* no writeback */
+   break;
+   default:
+   break;
+   }
+   return X86EMUL_CONTINUE;
+}
+
 static inline void emulate_push(struct x86_emulate_ctxt *ctxt,
struct x86_emulate_ops *ops)
 {
@@ -1651,11 +1709,12 @@ static int emulate_pop_sreg(struct x86_emulate_ctxt 
*ctxt,
return rc;
 }
 
-static void emulate_pusha(struct x86_emulate_ctxt *ctxt,
+static int emulate_pusha(struct x86_emulate_ctxt *ctxt,
  struct x86_emulate_ops *ops)
 {
struct decode_cache *c = ctxt-decode;
unsigned long old_esp = c-regs[VCPU_REGS_RSP];
+   int rc = X86EMUL_CONTINUE;
int reg = VCPU_REGS_RAX;
 
while (reg = VCPU_REGS_RDI) {
@@ -1663,8 +1722,18 @@ static void emulate_pusha(struct x86_emulate_ctxt *ctxt,
(c-src.val = old_esp) : (c-src.val = c-regs[reg]);
 
emulate_push(ctxt, ops);
+
+   rc = writeback(ctxt, ops);
+   if (rc != X86EMUL_CONTINUE)
+   return rc;
+
++reg;
}
+
+   /* Disable writeback. */
+   c-dst.type = OP_NONE;
+
+   return rc;
 }
 
 static int emulate_popa(struct x86_emulate_ctxt *ctxt,
@@ -1817,64 +1886,6 @@ static int emulate_ret_far(struct x86_emulate_ctxt *ctxt,
return rc;
 }
 
-static inline int writeback(struct x86_emulate_ctxt *ctxt,
-   struct x86_emulate_ops *ops)
-{
-   int rc;
-   struct decode_cache *c = ctxt-decode;
-   u32 err;
-
-   switch (c-dst.type) {
-   case OP_REG:
-   /* The 4-byte case *is* correct:
-* in 64-bit mode we zero-extend.
-*/
-   switch (c-dst.bytes) {
-   case 1:
-   *(u8 *)c-dst.ptr = (u8)c-dst.val;
-   break;
-   case 2:
-   *(u16 *)c-dst.ptr = (u16)c-dst.val;
-   break;
-   case 4:
-   *c-dst.ptr = (u32)c-dst.val;
-   break;  /* 64b: zero-ext */
-   case 8:
-   *c-dst.ptr = c-dst.val;
-   break;
-   }
-   break;
-   case OP_MEM:
-   if (c-lock_prefix)
-   rc = ops-cmpxchg_emulated(
-   (unsigned long)c-dst.ptr,
-   c-dst.orig_val,
-

[COMMIT master] KVM: x86 emulator: fix group3 instruction decoding

2010-06-21 Thread Avi Kivity

From: Wei Yongjun yj...@cn.fujitsu.com

Group 3 instruction with ModRM reg field as 001 is
defined as test instruction under AMD arch, and
emulate_grp3() is ready for emulate it, so fix the
decoding.

static inline int emulate_grp3(...)
{
...
switch (c-modrm_reg) {
case 0 ... 1:   /* test */
emulate_2op_SrcV(test, c-src, c-dst, ctxt-eflags);
...
}

Signed-off-by: Wei Yongjun yj...@cn.fujitsu.com
Signed-off-by: Avi Kivity a...@redhat.com

diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index c990db0..abb8cec 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -336,11 +336,11 @@ static u32 group_table[] = {
[Group1A*8] =
DstMem | SrcNone | ModRM | Mov | Stack, 0, 0, 0, 0, 0, 0, 0,
[Group3_Byte*8] =
-   ByteOp | SrcImm | DstMem | ModRM, 0,
+   ByteOp | SrcImm | DstMem | ModRM, ByteOp | SrcImm | DstMem | ModRM,
ByteOp | DstMem | SrcNone | ModRM, ByteOp | DstMem | SrcNone | ModRM,
0, 0, 0, 0,
[Group3*8] =
-   DstMem | SrcImm | ModRM, 0,
+   DstMem | SrcImm | ModRM, DstMem | SrcImm | ModRM,
DstMem | SrcNone | ModRM, DstMem | SrcNone | ModRM,
0, 0, 0, 0,
[Group4*8] =
--
To unsubscribe from this list: send the line unsubscribe kvm-commits in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[COMMIT master] KVM: x86: Allow any LAPIC to accept PIC interrupts

2010-06-21 Thread Avi Kivity

From: Chris Lalancette clala...@redhat.com

If the guest wants to accept timer interrupts on a CPU other
than the BSP, we need to remove this gate.

Signed-off-by: Chris Lalancette clala...@redhat.com
Signed-off-by: Marcelo Tosatti mtosa...@redhat.com

diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 024f6d1..49573c7 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -1107,13 +1107,11 @@ int kvm_apic_accept_pic_intr(struct kvm_vcpu *vcpu)
u32 lvt0 = apic_get_reg(vcpu-arch.apic, APIC_LVT0);
int r = 0;
 
-   if (kvm_vcpu_is_bsp(vcpu)) {
-   if (!apic_hw_enabled(vcpu-arch.apic))
-   r = 1;
-   if ((lvt0  APIC_LVT_MASKED) == 0 
-   GET_APIC_DELIVERY_MODE(lvt0) == APIC_MODE_EXTINT)
-   r = 1;
-   }
+   if (!apic_hw_enabled(vcpu-arch.apic))
+   r = 1;
+   if ((lvt0  APIC_LVT_MASKED) == 0 
+   GET_APIC_DELIVERY_MODE(lvt0) == APIC_MODE_EXTINT)
+   r = 1;
return r;
 }
 
--
To unsubscribe from this list: send the line unsubscribe kvm-commits in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[COMMIT master] net: fix deliver_no_wcard regression on loopback device

2010-06-21 Thread Avi Kivity

From: John Fastabend john.r.fastab...@intel.com

deliver_no_wcard is not being set in skb_copy_header.
In the skb_cloned case it is not being cleared and
may cause the skb to be dropped when the loopback device
pushes it back up the stack.

Signed-off-by: John Fastabend john.r.fastab...@intel.com
Acked-by: Eric Dumazet eric.duma...@gmail.com
Tested-by: Markus Trippelsdorf mar...@trippelsdorf.de
Signed-off-by: David S. Miller da...@davemloft.net

diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 9f07e74..bcf2fa3 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -532,6 +532,7 @@ static void __copy_skb_header(struct sk_buff *new, const 
struct sk_buff *old)
new-ip_summed  = old-ip_summed;
skb_copy_queue_mapping(new, old);
new-priority   = old-priority;
+   new-deliver_no_wcard   = old-deliver_no_wcard;
 #if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE)
new-ipvs_property  = old-ipvs_property;
 #endif
--
To unsubscribe from this list: send the line unsubscribe kvm-commits in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[COMMIT master] KVM: Fix xsave and xcr save/restore memory leak

2010-06-21 Thread Avi Kivity

From: Avi Kivity a...@redhat.com

We allocate temporary kernel buffers for these structures, but never free them.

Signed-off-by: Avi Kivity a...@redhat.com

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index d3d008e..d513e57 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2437,6 +2437,8 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
void __user *argp = (void __user *)arg;
int r;
struct kvm_lapic_state *lapic = NULL;
+   struct kvm_xsave *xsave = NULL;
+   struct kvm_xcrs *xcrs = NULL;
 
switch (ioctl) {
case KVM_GET_LAPIC: {
@@ -2632,8 +2634,6 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
break;
}
case KVM_GET_XSAVE: {
-   struct kvm_xsave *xsave;
-
xsave = kzalloc(sizeof(struct kvm_xsave), GFP_KERNEL);
r = -ENOMEM;
if (!xsave)
@@ -2648,8 +2648,6 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
break;
}
case KVM_SET_XSAVE: {
-   struct kvm_xsave *xsave;
-
xsave = kzalloc(sizeof(struct kvm_xsave), GFP_KERNEL);
r = -ENOMEM;
if (!xsave)
@@ -2663,8 +2661,6 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
break;
}
case KVM_GET_XCRS: {
-   struct kvm_xcrs *xcrs;
-
xcrs = kzalloc(sizeof(struct kvm_xcrs), GFP_KERNEL);
r = -ENOMEM;
if (!xcrs)
@@ -2680,8 +2676,6 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
break;
}
case KVM_SET_XCRS: {
-   struct kvm_xcrs *xcrs;
-
xcrs = kzalloc(sizeof(struct kvm_xcrs), GFP_KERNEL);
r = -ENOMEM;
if (!xcrs)
@@ -2700,6 +2694,8 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
}
 out:
kfree(lapic);
+   kfree(xsave);
+   kfree(xcrs);
return r;
 }
 
--
To unsubscribe from this list: send the line unsubscribe kvm-commits in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[COMMIT master] KVM: x86: In DM_LOWEST, only deliver interrupts to vcpus with enabled LAPIC's

2010-06-21 Thread Avi Kivity

From: Chris Lalancette clala...@redhat.com

Otherwise we might try to deliver a timer interrupt to a cpu that
can't possibly handle it.

Signed-off-by: Chris Lalancette clala...@redhat.com
Signed-off-by: Marcelo Tosatti mtosa...@redhat.com

diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
index 52f412f..06cf61e 100644
--- a/virt/kvm/irq_comm.c
+++ b/virt/kvm/irq_comm.c
@@ -100,7 +100,7 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct 
kvm_lapic *src,
if (r  0)
r = 0;
r += kvm_apic_set_irq(vcpu, irq);
-   } else {
+   } else if (kvm_lapic_enabled(vcpu)) {
if (!lowest)
lowest = vcpu;
else if (kvm_apic_compare_prio(vcpu, lowest)  0)
--
To unsubscribe from this list: send the line unsubscribe kvm-commits in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[COMMIT master] device-assignment: Don't deassign when the assignment fails

2010-06-21 Thread Avi Kivity

From: Alex Williamson alex.william...@redhat.com

The last thing assign_device() does is call into KVM_ASSIGN_PCI_DEVICE.
If that fails, the device is not assigned, so we shouldn't then try to
deassign it.  If you try to assign the same device multiple times, you
can get into a nasty fail-succeed-fail-succeed loop.  And we certainly
shouldn't take the assigned_out branch before we've even attempted
to assign the device.

Signed-off-by: Alex Williamson alex.william...@redhat.com
Signed-off-by: Marcelo Tosatti mtosa...@redhat.com

diff --git a/hw/device-assignment.c b/hw/device-assignment.c
index 2b963b5..7e53a95 100644
--- a/hw/device-assignment.c
+++ b/hw/device-assignment.c
@@ -1407,12 +1407,12 @@ static int assigned_initfn(struct PCIDevice *pci_dev)
 if (pci_enable_capability_support(pci_dev, 0, NULL,
 assigned_device_pci_cap_write_config,
 assigned_device_pci_cap_init)  0)
-goto assigned_out;
+goto out;
 
 /* assign device to guest */
 r = assign_device(dev);
 if (r  0)
-goto assigned_out;
+goto out;
 
 /* assign irq for the device */
 r = assign_irq(dev);
--
To unsubscribe from this list: send the line unsubscribe kvm-commits in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[COMMIT master] test: Add IDT framework

2010-06-21 Thread Avi Kivity

From: Avi Kivity a...@redhat.com

Signed-off-by: Sheng Yang sh...@linux.intel.com
Signed-off-by: Avi Kivity a...@redhat.com
Signed-off-by: Marcelo Tosatti mtosa...@redhat.com

diff --git a/kvm/test/config-x86-common.mak b/kvm/test/config-x86-common.mak
index c97de52..800b635 100644
--- a/kvm/test/config-x86-common.mak
+++ b/kvm/test/config-x86-common.mak
@@ -59,6 +59,8 @@ $(TEST_DIR)/realmode.o: bits = 32
 
 $(TEST_DIR)/msr.flat: $(cstart.o) $(TEST_DIR)/msr.o
 
+$(TEST_DIR)/idt_test.flat: $(cstart.o) $(TEST_DIR)/idt.o $(TEST_DIR)/idt_test.o
+
 arch_clean:
$(RM) $(TEST_DIR)/*.o $(TEST_DIR)/*.flat \
$(TEST_DIR)/.*.d $(TEST_DIR)/lib/.*.d $(TEST_DIR)/lib/*.o
diff --git a/kvm/test/config-x86_64.mak b/kvm/test/config-x86_64.mak
index d8fd2b5..f9cd121 100644
--- a/kvm/test/config-x86_64.mak
+++ b/kvm/test/config-x86_64.mak
@@ -5,6 +5,6 @@ ldarch = elf64-x86-64
 CFLAGS += -D__x86_64__
 
 tests = $(TEST_DIR)/access.flat $(TEST_DIR)/apic.flat \
- $(TEST_DIR)/emulator.flat
+ $(TEST_DIR)/emulator.flat $(TEST_DIR)/idt_test.flat
 
 include config-x86-common.mak
diff --git a/kvm/test/flat.lds b/kvm/test/flat.lds
index 4120595..4888f3a 100644
--- a/kvm/test/flat.lds
+++ b/kvm/test/flat.lds
@@ -4,7 +4,12 @@ SECTIONS
 stext = .;
 .text : { *(.init) *(.text) *(.text.*) }
 . = ALIGN(4K);
-.data : { *(.data) }
+.data : {
+  *(.data)
+  exception_table_start = .;
+  *(.data.ex)
+ exception_table_end = .;
+ }
 . = ALIGN(16);
 .rodata : { *(.rodata) }
 . = ALIGN(16);
diff --git a/kvm/test/lib/x86/idt.h b/kvm/test/lib/x86/idt.h
new file mode 100644
index 000..6babcb4
--- /dev/null
+++ b/kvm/test/lib/x86/idt.h
@@ -0,0 +1,19 @@
+#ifndef __IDT_TEST__
+#define __IDT_TEST__
+
+void setup_idt(void);
+
+#define ASM_TRY(catch)  \
+movl $0, %%gs:4 \n\t  \
+.pushsection .data.ex \n\t\
+.quad f,  catch \n\t\
+.popsection \n\t  \
+:
+
+#define UD_VECTOR   6
+#define GP_VECTOR   13
+
+unsigned exception_vector(void);
+unsigned exception_error_code(void);
+
+#endif
diff --git a/kvm/test/x86/idt.c b/kvm/test/x86/idt.c
new file mode 100644
index 000..999b3f0
--- /dev/null
+++ b/kvm/test/x86/idt.c
@@ -0,0 +1,150 @@
+#include idt.h
+#include libcflat.h
+
+typedef struct {
+unsigned short offset0;
+unsigned short selector;
+unsigned short ist : 3;
+unsigned short : 5;
+unsigned short type : 4;
+unsigned short : 1;
+unsigned short dpl : 2;
+unsigned short p : 1;
+unsigned short offset1;
+unsigned offset2;
+unsigned reserved;
+} idt_entry_t;
+
+static idt_entry_t idt[256];
+
+typedef struct {
+unsigned short limit;
+unsigned long linear_addr;
+} __attribute__((packed)) descriptor_table_t;
+
+void lidt(idt_entry_t *idt, int nentries)
+{
+descriptor_table_t dt;
+
+dt.limit = nentries * sizeof(*idt) - 1;
+dt.linear_addr = (unsigned long)idt;
+asm volatile (lidt %0 : : m(dt));
+}
+
+unsigned short read_cs()
+{
+unsigned short r;
+
+asm volatile (mov %%cs, %0 : =r(r));
+return r;
+}
+
+void memset(void *a, unsigned char v, int n)
+{
+unsigned char *x = a;
+
+while (n--)
+   *x++ = v;
+}
+
+void set_idt_entry(idt_entry_t *e, void *addr, int dpl)
+{
+memset(e, 0, sizeof *e);
+e-offset0 = (unsigned long)addr;
+e-selector = read_cs();
+e-ist = 0;
+e-type = 14;
+e-dpl = dpl;
+e-p = 1;
+e-offset1 = (unsigned long)addr  16;
+e-offset2 = (unsigned long)addr  32;
+}
+
+struct ex_regs {
+unsigned long rax, rcx, rdx, rbx;
+unsigned long dummy, rbp, rsi, rdi;
+unsigned long r8, r9, r10, r11;
+unsigned long r12, r13, r14, r15;
+unsigned long vector;
+unsigned long error_code;
+unsigned long rip;
+unsigned long cs;
+unsigned long rflags;
+};
+
+struct ex_record {
+unsigned long rip;
+unsigned long handler;
+};
+
+extern struct ex_record exception_table_start, exception_table_end;
+
+void do_handle_exception(struct ex_regs *regs)
+{
+struct ex_record *ex;
+unsigned ex_val;
+
+ex_val = regs-vector | (regs-error_code  16);
+
+asm(mov %0, %%gs:4 : : r(ex_val));
+
+for (ex = exception_table_start; ex != exception_table_end; ++ex) {
+if (ex-rip == regs-rip) {
+regs-rip = ex-handler;
+return;
+}
+}
+printf(unhandled excecption\n);
+exit(7);
+}
+
+asm (.pushsection .text \n\t
+ ud_fault: \n\t
+ pushq $0 \n\t
+ pushq $6 \n\t
+ jmp handle_exception \n\t
+
+ gp_fault: \n\t
+ pushq $13 \n\t
+ jmp handle_exception \n\t
+
+ handle_exception: \n\t
+ push %r15; push %r14; push %r13; push %r12 \n\t
+ push %r11; push %r10; push %r9; push %r8 \n\t
+ push %rdi; push %rsi; push %rbp; sub $8, %rsp \n\t
+ push %rbx; push %rdx;

Re: [PATCH 1/2] Add 'serial' attribute to virtio-blk devices

2010-06-21 Thread john cooper

Rusty Russell wrote:
 On Sat, 19 Jun 2010 04:08:02 am Ryan Harper wrote:
 Create a new attribute for virtio-blk devices that will fetch the serial 
 number
 of the block device.  This attribute can be used by udev to create disk/by-id
 symlinks for devices that don't have a UUID (filesystem) associated with 
 them.

 ATA_IDENTIFY strings are special in that they can be up to 20 chars long
 and aren't required to be NULL-terminated.  The buffer is also zero-padded
 meaning that if the serial is 19 chars or less that we get a NULL terminated
 string.  When copying this value into a string buffer, we must be careful to
 copy up to the NULL (if it present) and only 20 if it is longer and not to
 attempt to NULL terminate; this isn't needed.

 Signed-off-by: Ryan Harper ry...@us.ibm.com
 Signed-off-by: john cooper john.coo...@redhat.com
 ---
  drivers/block/virtio_blk.c |   32 
  1 files changed, 32 insertions(+), 0 deletions(-)

 diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
 index 258bc2a..f1ef26f 100644
 --- a/drivers/block/virtio_blk.c
 +++ b/drivers/block/virtio_blk.c
 @@ -281,6 +281,31 @@ static int index_to_minor(int index)
  return index  PART_BITS;
  }
  
 +/* Copy serial number from *s to *d.  Copy operation terminates on either
 + * encountering a nul in *s or after n bytes have been copied, whichever
 + * occurs first.  *d is not forcibly nul terminated.  Return # of bytes 
 copied.
 + */
 +static inline int serial_sysfs(char *d, char *s, int n)
 +{
 +char *di = d;
 +
 +while (*s  n--)
 +*d++ = *s++;
 +return d - di;
 +}
 +
 +static ssize_t virtblk_serial_show(struct device *dev,
 +struct device_attribute *attr, char *buf)
 +{
 +struct gendisk *disk = dev_to_disk(dev);
 +char id_str[VIRTIO_BLK_ID_BYTES];
 +
 +if (IS_ERR(virtblk_get_id(disk, id_str)))
 +return 0;
 
 0?  Really?  That doesn't seem very informative.

Propagating a prospective error from virtblk_get_id() should
be possible.  Unsure if doing so is more useful from the
user's perspective compared to just a nul id string.

 +return serial_sysfs(buf, id_str, min(VIRTIO_BLK_ID_BYTES, PAGE_SIZE));
 
 How about something like this:
 
   BUILD_BUG_ON(PAGE_SIZE  VIRTIO_BLK_ID_BYTES + 1);

Agreed, that's a better wrench in the gearworks.
Note padding buf[] by 1 isn't necessary as indicated
below.

   /* id_str is not necessarily nul-terminated! */
   buf[VIRTIO_BLK_ID_BYTES] = '\0';
   return virtblk_get_id(disk, buf);

The /sys file is rendered according to the length
returned from this function and the trailing nul
is not interpreted in this context.  In fact if a
nul is added and included in the byte count of the
string it will appear in the /sys file.

Thanks,

-john


-- 
john.coo...@redhat.com
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 1/2] KVM: Keep slot ID in memory slot structure

2010-06-21 Thread Avi Kivity

May be used for distinguishing between internal and user slots, or for sorting
slots in size order.

Signed-off-by: Avi Kivity a...@redhat.com
---
 include/linux/kvm_host.h |1 +
 virt/kvm/kvm_main.c  |1 +
 2 files changed, 2 insertions(+), 0 deletions(-)

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 2d96555..d84bf40 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -124,6 +124,7 @@ struct kvm_memory_slot {
} *lpage_info[KVM_NR_PAGE_SIZES - 1];
unsigned long userspace_addr;
int user_alloc;
+   int id;
 };
 
 static inline unsigned long kvm_dirty_bitmap_bytes(struct kvm_memory_slot 
*memslot)
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 84a0906..add43a3 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -570,6 +570,7 @@ int __kvm_set_memory_region(struct kvm *kvm,
 
new = old = *memslot;
 
+   new.id = mem-slot;
new.base_gfn = base_gfn;
new.npages = npages;
new.flags = mem-flags;
-- 
1.7.1

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 2/2] KVM: Prevent internal slots from being COWed

2010-06-21 Thread Avi Kivity

If a process with a memory slot is COWed, the page will change its address
(despite having an elevated reference count).  This breaks internal memory
slots which have their physical addresses loaded into vmcs registers (see
the APIC access memory slot).

Signed-off-by: Avi Kivity a...@redhat.com
---
 arch/x86/kvm/x86.c |5 +
 1 files changed, 5 insertions(+), 0 deletions(-)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 33156a3..d9a33e6 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -5633,6 +5633,11 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
int user_alloc)
 {
int npages = memslot-npages;
+   int map_flags = MAP_PRIVATE | MAP_ANONYMOUS;
+
+   /* Prevent internal slot pages from being moved by fork()/COW. */
+   if (memslot-id = KVM_MEMORY_SLOTS)
+   map_flags = MAP_SHARED | MAP_ANONYMOUS;
 
/*To keep backward compatibility with older userspace,
 *x86 needs to hanlde !user_alloc case.
-- 
1.7.1

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 0/2] Fix failures caused by fork() interaction with internal slots

2010-06-21 Thread Avi Kivity

fork() has a WONTFIX bug where a page with an elevated reference count will
be COWed such that the page address changes even in the process which has
taken the reference.  This interacts badly with internal memory slots
that install pages in vmcs registers, such as the APIC access page.

This patchset disables fork() for these slots.

Avi Kivity (2):
  KVM: Keep slot ID in memory slot structure
  KVM: Prevent internal slots from being COWed

 arch/x86/kvm/x86.c   |5 +
 include/linux/kvm_host.h |1 +
 virt/kvm/kvm_main.c  |1 +
 3 files changed, 7 insertions(+), 0 deletions(-)

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH] Update .gitignore

2010-06-21 Thread Avi Kivity

On 06/21/2010 08:24 AM, Hidetoshi Seto wrote:
 I think some people have noticed that:

   
 $ ./configure
 $ make
 $ git status
 # On branch master
 # Untracked files:
 #   (use git add file... to include in what will be committed)
 #
 #   QMP/qmp-commands.txt
 #   libdis-user/
 #   libdis/
 #   pc-bios/optionrom/vapic.bin
 nothing added to commit but untracked files present (use git add to track)
 
 Please consider applying this patch to qemu-kvm.git.

   

This is equally applicable to qemu.git, so please sent it to the qemu
mailing list, qemu-de...@nongnu.org.

-- 
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: kvm-s390: Dont exit SIE on SIGP sense running

2010-06-21 Thread Avi Kivity


On 06/18/2010 12:16 AM, Christian Borntraeger wrote:

Avi, Marcello,

Newer (guest) kernels use sigp sense running in their spinlock
implementation to check if the other cpu is running before yielding
the processor. This revealed some wrong guest settings, causing
unnecessary exits for every sigp sense running.
   


Applied, thanks.


  void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
  {
VCPU_EVENT(vcpu, 3, %s, free cpu);
+   clear_bit(63 - vcpu-vcpu_id, (unsigned long 
*)vcpu-kvm-arch.sca-mcn);
if (vcpu-kvm-arch.sca-cpu[vcpu-vcpu_id].sda ==
(__u64) vcpu-arch.sie_block)
vcpu-kvm-arch.sca-cpu[vcpu-vcpu_id].sda = 0;
   


Unrelated, do these VCPU_EVENTs want to become ftrace tracepoints?

--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 0/2] July 2010 feature removal

2010-06-21 Thread Avi Kivity

As advertised, two features are scheduled for removal now: aliases and
kernel-allocated memory regions.  Remove them.

Avi Kivity (2):
  KVM: Remove memory alias support
  KVM: Remove kernel-allocated memory regions

 Documentation/feature-removal-schedule.txt |   21 
 Documentation/kvm/api.txt  |   36 +---
 arch/ia64/kvm/kvm-ia64.c   |5 -
 arch/powerpc/kvm/powerpc.c |5 -
 arch/s390/kvm/kvm-s390.c   |5 -
 arch/x86/include/asm/kvm_host.h|   21 
 arch/x86/kvm/mmu.c |   17 +---
 arch/x86/kvm/paging_tmpl.h |3 +-
 arch/x86/kvm/x86.c |  141 
 arch/x86/kvm/x86.h |7 --
 include/linux/kvm.h|1 +
 include/linux/kvm_host.h   |6 -
 virt/kvm/kvm_main.c|   18 +---
 13 files changed, 12 insertions(+), 274 deletions(-)

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 2/2] KVM: Remove kernel-allocated memory regions

2010-06-21 Thread Avi Kivity

Equivalent (and better) functionality is provided by user-allocated memory
regions.

Signed-off-by: Avi Kivity a...@redhat.com
---
 Documentation/feature-removal-schedule.txt |   10 --
 Documentation/kvm/api.txt  |   24 +---
 arch/x86/kvm/x86.c |   16 
 3 files changed, 1 insertions(+), 49 deletions(-)

diff --git a/Documentation/feature-removal-schedule.txt 
b/Documentation/feature-removal-schedule.txt
index e0ffe8d..c8bc454 100644
--- a/Documentation/feature-removal-schedule.txt
+++ b/Documentation/feature-removal-schedule.txt
@@ -548,16 +548,6 @@ Who:   John Stultz johns...@us.ibm.com
 
 
 
-What:  KVM kernel-allocated memory slots
-When:  July 2010
-Why:   Since 2.6.25, kvm supports user-allocated memory slots, which are
-   much more flexible than kernel-allocated slots.  All current userspace
-   supports the newer interface and this code can be removed with no
-   impact.
-Who:   Avi Kivity a...@redhat.com
-
-
-
 What:  KVM paravirt mmu host support
 When:  January 2011
 Why:   The paravirt mmu host support is slower than non-paravirt mmu, both
diff --git a/Documentation/kvm/api.txt b/Documentation/kvm/api.txt
index 7e41594..d9b00f1 100644
--- a/Documentation/kvm/api.txt
+++ b/Documentation/kvm/api.txt
@@ -160,29 +160,7 @@ Type: vm ioctl
 Parameters: struct kvm_memory_region (in)
 Returns: 0 on success, -1 on error
 
-struct kvm_memory_region {
-   __u32 slot;
-   __u32 flags;
-   __u64 guest_phys_addr;
-   __u64 memory_size; /* bytes */
-};
-
-/* for kvm_memory_region::flags */
-#define KVM_MEM_LOG_DIRTY_PAGES  1UL
-
-This ioctl allows the user to create or modify a guest physical memory
-slot.  When changing an existing slot, it may be moved in the guest
-physical memory space, or its flags may be modified.  It may not be
-resized.  Slots may not overlap.
-
-The flags field supports just one flag, KVM_MEM_LOG_DIRTY_PAGES, which
-instructs kvm to keep track of writes to memory within the slot.  See
-the KVM_GET_DIRTY_LOG ioctl.
-
-It is recommended to use the KVM_SET_USER_MEMORY_REGION ioctl instead
-of this API, if available.  This newer API allows placing guest memory
-at specified locations in the host address space, yielding better
-control and easy access.
+This ioctl is obsolete and has been removed.
 
 4.6 KVM_CREATE_VCPU
 
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 7d7558e..a962307 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2967,22 +2967,6 @@ long kvm_arch_vm_ioctl(struct file *filp,
goto out;
break;
}
-   case KVM_SET_MEMORY_REGION: {
-   struct kvm_memory_region kvm_mem;
-   struct kvm_userspace_memory_region kvm_userspace_mem;
-
-   r = -EFAULT;
-   if (copy_from_user(kvm_mem, argp, sizeof kvm_mem))
-   goto out;
-   kvm_userspace_mem.slot = kvm_mem.slot;
-   kvm_userspace_mem.flags = kvm_mem.flags;
-   kvm_userspace_mem.guest_phys_addr = kvm_mem.guest_phys_addr;
-   kvm_userspace_mem.memory_size = kvm_mem.memory_size;
-   r = kvm_vm_ioctl_set_memory_region(kvm, kvm_userspace_mem, 0);
-   if (r)
-   goto out;
-   break;
-   }
case KVM_SET_NR_MMU_PAGES:
r = kvm_vm_ioctl_set_nr_mmu_pages(kvm, arg);
if (r)
-- 
1.7.1

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 1/2] KVM: Remove memory alias support

2010-06-21 Thread Avi Kivity

As advertised in feature-removal-schedule.txt.  Equivalent support is provided
by overlapping memory regions.

Signed-off-by: Avi Kivity a...@redhat.com
---
 Documentation/feature-removal-schedule.txt |   11 ---
 Documentation/kvm/api.txt  |   12 +---
 arch/ia64/kvm/kvm-ia64.c   |5 -
 arch/powerpc/kvm/powerpc.c |5 -
 arch/s390/kvm/kvm-s390.c   |5 -
 arch/x86/include/asm/kvm_host.h|   21 -
 arch/x86/kvm/mmu.c |   17 +---
 arch/x86/kvm/paging_tmpl.h |3 +-
 arch/x86/kvm/x86.c |  125 
 arch/x86/kvm/x86.h |7 --
 include/linux/kvm.h|1 +
 include/linux/kvm_host.h   |6 --
 virt/kvm/kvm_main.c|   18 +---
 13 files changed, 11 insertions(+), 225 deletions(-)

diff --git a/Documentation/feature-removal-schedule.txt 
b/Documentation/feature-removal-schedule.txt
index c268783..e0ffe8d 100644
--- a/Documentation/feature-removal-schedule.txt
+++ b/Documentation/feature-removal-schedule.txt
@@ -538,17 +538,6 @@ Who:   Jan Kiszka jan.kis...@web.de
 
 
 
-What:  KVM memory aliases support
-When:  July 2010
-Why:   Memory aliasing support is used for speeding up guest vga access
-   through the vga windows.
-
-   Modern userspace no longer uses this feature, so it's just bitrotted
-   code and can be removed with no impact.
-Who:   Avi Kivity a...@redhat.com
-
-
-
 What:  xtime, wall_to_monotonic
 When:  2.6.36+
 Files: kernel/time/timekeeping.c include/linux/time.h
diff --git a/Documentation/kvm/api.txt b/Documentation/kvm/api.txt
index ffba03f..7e41594 100644
--- a/Documentation/kvm/api.txt
+++ b/Documentation/kvm/api.txt
@@ -226,17 +226,7 @@ Type: vm ioctl
 Parameters: struct kvm_memory_alias (in)
 Returns: 0 (success), -1 (error)
 
-struct kvm_memory_alias {
-   __u32 slot;  /* this has a different namespace than memory slots */
-   __u32 flags;
-   __u64 guest_phys_addr;
-   __u64 memory_size;
-   __u64 target_phys_addr;
-};
-
-Defines a guest physical address space region as an alias to another
-region.  Useful for aliased address, for example the VGA low memory
-window. Should not be used with userspace memory.
+This ioctl is obsolete and has been removed.
 
 4.9 KVM_RUN
 
diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
index 91760e8..bd510be 100644
--- a/arch/ia64/kvm/kvm-ia64.c
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -1946,11 +1946,6 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
return vcpu-arch.timer_fired;
 }
 
-gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn)
-{
-   return gfn;
-}
-
 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
 {
return (vcpu-arch.mp_state == KVM_MP_STATE_RUNNABLE) ||
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index b5ebdfb..72a4ad8 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -36,11 +36,6 @@
 #define CREATE_TRACE_POINTS
 #include trace.h
 
-gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn)
-{
-   return gfn;
-}
-
 int kvm_arch_vcpu_runnable(struct kvm_vcpu *v)
 {
return !(v-arch.msr  MSR_WE) || !!(v-arch.pending_exceptions);
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index da1508d..47acdb1 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -723,11 +723,6 @@ void kvm_arch_flush_shadow(struct kvm *kvm)
 {
 }
 
-gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn)
-{
-   return gfn;
-}
-
 static int __init kvm_s390_init(void)
 {
int ret;
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 2ec2e27..a57cdea 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -69,8 +69,6 @@
 
 #define IOPL_SHIFT 12
 
-#define KVM_ALIAS_SLOTS 4
-
 #define KVM_PERMILLE_MMU_PAGES 20
 #define KVM_MIN_ALLOC_MMU_PAGES 64
 #define KVM_MMU_HASH_SHIFT 10
@@ -362,24 +360,7 @@ struct kvm_vcpu_arch {
u64 hv_vapic;
 };
 
-struct kvm_mem_alias {
-   gfn_t base_gfn;
-   unsigned long npages;
-   gfn_t target_gfn;
-#define KVM_ALIAS_INVALID 1UL
-   unsigned long flags;
-};
-
-#define KVM_ARCH_HAS_UNALIAS_INSTANTIATION
-
-struct kvm_mem_aliases {
-   struct kvm_mem_alias aliases[KVM_ALIAS_SLOTS];
-   int naliases;
-};
-
 struct kvm_arch {
-   struct kvm_mem_aliases *aliases;
-
unsigned int n_free_mmu_pages;
unsigned int n_requested_mmu_pages;
unsigned int n_alloc_mmu_pages;
@@ -655,8 +636,6 @@ void kvm_disable_tdp(void);
 int complete_pio(struct kvm_vcpu *vcpu);
 bool kvm_check_iopl(struct kvm_vcpu *vcpu);
 
-struct kvm_memory_slot *gfn_to_memslot_unaliased(struct kvm *kvm, gfn_t gfn);
-
 static inline struct kvm_mmu_page *page_header(hpa_t shadow_page)
 {
struct page

[PATCH] do not enter vcpu again if it was stopped during IO

2010-06-21 Thread Gleb Natapov

To prevent reentering vcpu after IO completion it is not enough
to set env-stopped since it is checked only in main loop but control
will not get there until next non-IO exit since kvm_run() will reenter
vcpu to complete IO instruction. Solve this by sending self-signal to
request exit after IO instruction completion.

Signed-off-by: Gleb Natapov g...@redhat.com
diff --git a/qemu-kvm.c b/qemu-kvm.c
index be1dac2..4f7cf6d 100644
--- a/qemu-kvm.c
+++ b/qemu-kvm.c
@@ -603,6 +603,10 @@ int kvm_run(CPUState *env)
 r = pre_kvm_run(kvm, env);
 if (r)
 return r;
+if (env-exit_request) {
+env-exit_request = 0;
+pthread_kill(env-kvm_cpu_state.thread, SIG_IPI);
+}
 r = ioctl(fd, KVM_RUN, 0);
 
 if (r == -1  errno != EINTR  errno != EAGAIN) {
diff --git a/vl.c b/vl.c
index 9e9c176..dcfab13 100644
--- a/vl.c
+++ b/vl.c
@@ -1817,6 +1817,7 @@ void qemu_system_reset_request(void)
 }
 if (cpu_single_env) {
 cpu_single_env-stopped = 1;
+cpu_exit(cpu_single_env);
 }
 qemu_notify_event();
 }
--
Gleb.
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH] QEMU: Update .gitignore

2010-06-21 Thread Hidetoshi Seto

(2010/06/21 17:19), Avi Kivity wrote:
 On 06/21/2010 08:24 AM, Hidetoshi Seto wrote:
 I think some people have noticed that:

 $ ./configure
 $ make
 $ git status
 # On branch master
 # Untracked files:
 #   (use git add file... to include in what will be committed)
 #
 #   QMP/qmp-commands.txt
 #   libdis-user/
 #   libdis/
 #   pc-bios/optionrom/vapic.bin
 nothing added to commit but untracked files present (use git add to track)
 
 Please consider applying this patch to qemu-kvm.git.
 
 This is equally applicable to qemu.git, so please sent it to the qemu
 mailing list, qemu-de...@nongnu.org.

Thanks for your advice, Avi.

Now this mail is sent to qemu ML, w/ above quotes as short history.
Could someone pick this up?

Thanks,
H.Seto

=
Subject: [PATCH] QEMU: Update .gitignore

Add some files/directories to .gitignore

  - vapic.bin
  A generated binary file.
  - libdis/ and libdis-user/
  These are directories generated by ./configure.
  - QMP/qmp-commands.txt
  A generated text.

Signed-off-by: Hidetoshi Seto seto.hideto...@jp.fujitsu.com
---
 .gitignore |4 
 1 files changed, 4 insertions(+), 0 deletions(-)

diff --git a/.gitignore b/.gitignore
index 2d7f439..fa4f241 100644
--- a/.gitignore
+++ b/.gitignore
@@ -9,6 +9,8 @@ config-target.*
 libhw32
 libhw64
 libuser
+libdis
+libdis-user
 qemu-doc.html
 qemu-tech.html
 qemu-doc.info
@@ -26,6 +28,7 @@ qemu-img-cmds.texi
 qemu-img-cmds.h
 qemu-io
 qemu-monitor.texi
+QMP/qmp-commands.txt
 .gdbinit
 *.a
 *.aux
@@ -50,4 +53,5 @@ pc-bios/optionrom/linuxboot.bin
 pc-bios/optionrom/multiboot.bin
 pc-bios/optionrom/multiboot.raw
 pc-bios/optionrom/extboot.bin
+pc-bios/optionrom/vapic.bin
 .stgit-*
-- 1.7.0 

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH V2 1/5] ara virt interface of perf to support kvm guest os statistics collection in guest os

2010-06-21 Thread Zhang, Yanmin

Here is the version 2.

ChangeLog since V1: Mostly changes based on Avi's suggestions.
1) Use a id to identify the perf_event between host and guest;
2) Changes lots of codes to deal with malicious guest os;
3) Add a perf_event number limitation per gust os instance;
4) Support guest os on the top of another guest os scenario. But
I didn't test it yet as there is no environment. The design is to
add 2 pointers in struct perf_event. One is used by host and the
other is used by guest.
5) Fix the bug to support 'perf stat'. The key is sync count data
back to guest when guest tries to disable the perf_event at host
side.
6) Add a clear ABI of PV perf.

I don't implement live migration feature.

Avi,
Is live migration necessary on pv perf support?


Based on Ingo's idea, I implement a para virt interface for perf to support
statistics collection in guest os. That means we could run tool perf in guest
os directly.

Great thanks to Peter Zijlstra. He is really the architect and gave me 
architecture
design suggestions. I also want to thank Yangsheng and LinMing for their 
generous
help.

The design is:

1) Add a kvm_pmu whose callbacks mostly just calls hypercall to vmexit to host 
kernel;
2) Create a host perf_event per guest perf_event;
3) Host kernel syncs perf_event count/overflows data changes to guest perf_event
when processing perf_event overflows after NMI arrives. Host kernel inject NMI 
to guest
kernel if a guest event overflows.
4) Guest kernel goes through all enabled event on current cpu and output data 
when they
overflows.
5) No change in user space.

Below is an example.

#perf top
--
   PerfTop:7954 irqs/sec  kernel:79.5%  exact:  0.0% [1000Hz cycles],  
(all, 8 CPUs)
--

 samples  pcnt function DSO
 ___ _  
_

 5315.00  4.9% copy_user_generic_string 
/lib/modules/2.6.35-rc1-tip-guestperf/build/vmlinux  
 3342.00  3.1% add_preempt_count
/lib/modules/2.6.35-rc1-tip-guestperf/build/vmlinux  
 3338.00  3.1% sub_preempt_count
/lib/modules/2.6.35-rc1-tip-guestperf/build/vmlinux  
 2454.00  2.3% pvclock_clocksource_read 
/lib/modules/2.6.35-rc1-tip-guestperf/build/vmlinux  
 2434.00  2.3% tcp_sendmsg  
/lib/modules/2.6.35-rc1-tip-guestperf/build/vmlinux  
 2090.00  1.9% child_run
/bm/tmp/benchmarks/run_bmtbench/dbench/dbench-3.03/tbench
 2081.00  1.9% debug_smp_processor_id   
/lib/modules/2.6.35-rc1-tip-guestperf/build/vmlinux  
 2003.00  1.9% __GI_strstr  /lib64/libc-2.11.so 
 
 1999.00  1.9% __strchr_sse2/lib64/libc-2.11.so 
 
 1983.00  1.8% tcp_ack  
/lib/modules/2.6.35-rc1-tip-guestperf/build/vmlinux  
 1800.00  1.7% tcp_transmit_skb 
/lib/modules/2.6.35-rc1-tip-guestperf/build/vmlinux  
 1727.00  1.6% schedule 
/lib/modules/2.6.35-rc1-tip-guestperf/build/vmlinux  
 1706.00  1.6% __libc_recv  /lib64/libc-2.11.so 
 
 1702.00  1.6% __GI_memchr  /lib64/libc-2.11.so 
 
 1580.00  1.5% tcp_recvmsg  
/lib/modules/2.6.35-rc1-tip-guestperf/build/vmlinux  

The patch is against tip/master tree of June 20st.

Signed-off-by: Zhang Yanmin yanmin_zh...@linux.intel.com

---

--- linux-2.6_tip0620/Documentation/kvm/paravirt-perf.txt   1970-01-01 
08:00:00.0 +0800
+++ linux-2.6_tip0620perfkvm/Documentation/kvm/paravirt-perf.txt
2010-06-21 15:21:39.312999849 +0800
@@ -0,0 +1,133 @@
+The x86 kvm paravirt perf event interface
+===
+
+This paravirt interface is responsible for supporting guest os perf event
+collections. If guest os supports this interface, users could run command
+perf in guest os directly.
+
+Design
+
+
+Guest os calls a series of hypercalls to communicate with host kernel to
+create/enable/disable/close perf events. Host kernel notifies guest os
+by injecting an NMI to guest os when an event overflows. Guets os need
+go through all its active events to check if they overflow, and output
+performance statistics if they do.
+
+ABI
+=
+
+1) Detect if host kernel supports paravirt perf interface:
+#define KVM_FEATURE_PV_PERF   4
+Host kernel defines above cpuid bit. Guest os calls

[PATCH V2 2/5] ara virt interface of perf to support kvm guest os statistics collection in guest os

2010-06-21 Thread Zhang, Yanmin

The 2nd patch is to change the definition of perf_event to facilitate
perf attr copy when a hypercall happens.

Signed-off-by: Zhang Yanmin yanmin_zh...@linux.intel.com

---

--- linux-2.6_tip0620/include/linux/perf_event.h2010-06-21 
15:19:52.821999849 +0800
+++ linux-2.6_tip0620perfkvm/include/linux/perf_event.h 2010-06-21 
16:53:49.283999849 +0800
@@ -188,7 +188,10 @@ struct perf_event_attr {
__u64   sample_type;
__u64   read_format;
 
-   __u64   disabled   :  1, /* off by default*/
+   union {
+   __u64   flags;
+   struct {
+   __u64   disabled   :  1, /* off by default*/
inherit:  1, /* children inherit it   */
pinned :  1, /* must always be on PMU */
exclusive  :  1, /* only group on PMU */
@@ -217,6 +220,8 @@ struct perf_event_attr {
mmap_data  :  1, /* non-exec mmap data*/
 
__reserved_1   : 46;
+   };
+   };
 
union {
__u32   wakeup_events;/* wakeup every n events */
@@ -465,12 +470,6 @@ enum perf_callchain_context {
 # include asm/local64.h
 #endif
 
-struct perf_guest_info_callbacks {
-   int (*is_in_guest) (void);
-   int (*is_user_mode) (void);
-   unsigned long (*get_guest_ip) (void);
-};
-
 #ifdef CONFIG_HAVE_HW_BREAKPOINT
 #include asm/hw_breakpoint.h
 #endif
@@ -753,6 +752,20 @@ struct perf_event {
 
perf_overflow_handler_t overflow_handler;
 
+   /*
+* pointers used by kvm perf paravirt interface.
+*
+* 1) Used in host kernel and points to host_perf_shadow which
+* has information about guest perf_event
+*/
+   void*host_perf_shadow;
+   /*
+* 2) Used in guest kernel and points to guest_perf_shadow which
+* is used as a communication area with host kernel. Host kernel
+* copies overflow data to it when an event overflows.
+*/
+   void*guest_perf_shadow;
+
 #ifdef CONFIG_EVENT_TRACING
struct ftrace_event_call*tp_event;
struct event_filter *filter;
@@ -838,6 +851,16 @@ struct perf_output_handle {
int sample;
 };
 
+struct perf_guest_info_callbacks {
+   /* Support collect guest statistics from host side */
+   int (*is_in_guest) (void);
+   int (*is_user_mode) (void);
+   unsigned long (*get_guest_ip) (void);
+
+   /* Support paravirt interface */
+   void (*copy_event_to_shadow) (struct perf_event *event, int overflows);
+};
+
 #ifdef CONFIG_PERF_EVENTS
 
 /*
@@ -871,6 +894,10 @@ perf_event_create_kernel_counter(struct 
perf_overflow_handler_t callback);
 extern u64 perf_event_read_value(struct perf_event *event,
 u64 *enabled, u64 *running);
+extern void perf_event_output(struct perf_event *event, int nmi,
+   struct perf_sample_data *data, struct pt_regs *regs);
+void perf_event_attach(struct perf_event *event);
+void perf_event_detach(struct perf_event *event);
 
 struct perf_sample_data {
u64 type;
@@ -1023,6 +1050,14 @@ perf_event_task_sched_in(struct task_str
 static inline void
 perf_event_task_sched_out(struct task_struct *task,
struct task_struct *next)   { }
+
+static inline void
+perf_event_output(struct perf_event *event, int nmi,
+   struct perf_sample_data *data, struct pt_regs *regs){ }
+
+static inline void perf_event_attach(struct perf_event *event) { }
+static inline void perf_event_detach(struct perf_event *event) { }
+
 static inline void
 perf_event_task_tick(struct task_struct *task) { }
 static inline int perf_event_init_task(struct task_struct *child)  { 
return 0; }
--- linux-2.6_tip0620/kernel/watchdog.c 2010-06-21 15:20:48.517999849 +0800
+++ linux-2.6_tip0620perfkvm/kernel/watchdog.c  2010-06-21 15:21:39.315999849 
+0800
@@ -197,8 +197,6 @@ static struct perf_event_attr wd_hw_attr
.type   = PERF_TYPE_HARDWARE,
.config = PERF_COUNT_HW_CPU_CYCLES,
.size   = sizeof(struct perf_event_attr),
-   .pinned = 1,
-   .disabled   = 1,
 };
 
 /* Callback function for perf event subsystem */
@@ -361,6 +359,8 @@ static int watchdog_nmi_enable(int cpu)
/* Try to register using hardware perf events */
wd_attr = wd_hw_attr;
wd_attr-sample_period = hw_nmi_get_sample_period();
+   wd_attr-pinned = 1;
+   wd_attr-disabled = 1;
event = perf_event_create_kernel_counter(wd_attr, cpu, -1,

[PATCH V2 3/5] ara virt interface of perf to support kvm guest os statistics collection in guest os

2010-06-21 Thread Zhang, Yanmin

The 3rd patch is to implement para virt perf at host kernel.

Signed-off-by: Zhang Yanmin yanmin_zh...@linux.intel.com

---

--- linux-2.6_tip0620/arch/x86/include/asm/kvm_para.h   2010-06-21 
15:19:38.992999849 +0800
+++ linux-2.6_tip0620perfkvm/arch/x86/include/asm/kvm_para.h2010-06-21 
15:21:39.308999849 +0800
@@ -2,6 +2,7 @@
 #define _ASM_X86_KVM_PARA_H
 
 #include linux/types.h
+#include linux/list.h
 #include asm/hyperv.h
 
 /* This CPUID returns the signature 'KVMKVMKVM' in ebx, ecx, and edx.  It
@@ -19,7 +20,8 @@
 /* This indicates that the new set of kvmclock msrs
  * are available. The use of 0x11 and 0x12 is deprecated
  */
-#define KVM_FEATURE_CLOCKSOURCE23
+#define KVM_FEATURE_CLOCKSOURCE2   3
+#define KVM_FEATURE_PV_PERF4
 
 /* The last 8 bits are used to indicate how to interpret the flags field
  * in pvclock structure. If no bits are set, all flags are ignored.
@@ -33,7 +35,14 @@
 #define MSR_KVM_WALL_CLOCK_NEW  0x4b564d00
 #define MSR_KVM_SYSTEM_TIME_NEW 0x4b564d01
 
-#define KVM_MAX_MMU_OP_BATCH   32
+#define KVM_MAX_MMU_OP_BATCH   32
+
+/* Operations for KVM_PERF_OP */
+#define KVM_PERF_OP_OPEN   1
+#define KVM_PERF_OP_CLOSE  2
+#define KVM_PERF_OP_ENABLE 3
+#define KVM_PERF_OP_DISABLE4
+#define KVM_PERF_OP_READ   5
 
 /* Operations for KVM_HC_MMU_OP */
 #define KVM_MMU_OP_WRITE_PTE1
@@ -64,6 +73,85 @@ struct kvm_mmu_op_release_pt {
 #ifdef __KERNEL__
 #include asm/processor.h
 
+/*
+ * data communication area about perf_event between
+ * Host kernel and guest kernel
+ */
+struct guest_perf_event {
+   u64 count;
+   atomic_t overflows;
+};
+
+/*
+ * In host kernel, perf_event-host_perf_shadow points to
+ * host_perf_shadow which records some information
+ * about the guest.
+ */
+struct host_perf_shadow {
+   /* guest perf_event id passed from guest os */
+   int id;
+   /*
+* Host kernel saves data into data member counter firstly.
+* kvm will get data from this counter and calls kvm functions
+* to copy or add data back to guets os before entering guest os
+* next time
+*/
+   struct guest_perf_event counter;
+   /* guest_event_addr is gpa_t pointing to guest os guest_perf_event*/
+   __u64 guest_event_addr;
+
+   /*
+* Link to  of kvm.kvm_arch.shadow_hash_table
+*/
+   struct list_head shadow_entry;
+   struct kvm_vcpu *vcpu;
+
+   struct perf_event *host_event;
+   /*
+* Below counter is to prevent malicious guest os to try to
+* close/enable event at the same time.
+*/
+   atomic_t ref_counter;
+};
+
+/*
+ * In guest kernel, perf_event-guest_shadow points to
+ * guest_perf_shadow which records some information
+ * about the guest.
+ */
+struct guest_perf_shadow {
+   /* guest perf_event id passed from guest os */
+   int id;
+   /*
+* Host kernel kvm saves data into data member counter
+*/
+   struct guest_perf_event counter;
+};
+
+/*
+ * guest_perf_attr is used when guest calls hypercall to
+ * open a new perf_event at host side. Mostly, it's a copy of
+ * perf_event_attr and deletes something not used by host kernel.
+ */
+struct guest_perf_attr {
+   __u32   type;
+   __u64   config;
+   __u64   sample_period;
+   __u64   sample_type;
+   __u64   read_format;
+   __u64   flags;
+   __u32   bp_type;
+   __u64   bp_addr;
+   __u64   bp_len;
+};
+
+struct guest_perf_event_param {
+   __u64 attr_addr;
+   __u64 guest_event_addr;
+   /* In case there is an alignment issue, we put id as the last one */
+   int id;
+};
+
 extern void kvmclock_init(void);
 
 
--- linux-2.6_tip0620/arch/x86/include/asm/kvm_host.h   2010-06-21 
15:19:39.01849 +0800
+++ linux-2.6_tip0620perfkvm/arch/x86/include/asm/kvm_host.h2010-06-21 
15:21:39.308999849 +0800
@@ -24,6 +24,7 @@
 #include asm/desc.h
 #include asm/mtrr.h
 #include asm/msr-index.h
+#include asm/perf_event.h
 
 #define KVM_MAX_VCPUS 64
 #define KVM_MEMORY_SLOTS 32
@@ -360,6 +361,18 @@ struct kvm_vcpu_arch {
 
/* fields used by HYPER-V emulation */
u64 hv_vapic;
+
+   /*
+* Fields used by PARAVIRT perf interface:
+*
+* kvm checks overflow_events before entering guest os,
+* and copy data back to guest os.
+* event_mutex is to avoid a race between NMI perf event overflow
+* handler, event close, and enable/disable.
+*/
+   struct mutex event_mutex;
+   int overflows;
+   struct perf_event *overflow_events[X86_PMC_IDX_MAX];
 };
 
 struct kvm_mem_alias {
@@ -377,6 +390,9 @@ struct kvm_mem_aliases {
int naliases;
 };
 
+#define KVM_PARAVIRT_PERF_EVENT_ENTRY_BITS (10)

[PATCH V2 5/5] ara virt interface of perf to support kvm guest os statistics collection in guest os

2010-06-21 Thread Zhang, Yanmin

The 5th patch is applied to the latest qemu-kvm tree.

Signed-off-by: Zhang Yanmin yanmin_zh...@linux.intel.com

---

diff -Nraup qemu-kvm_0621/kvm/include/linux/kvm.h 
qemu-kvm_0621_perf/kvm/include/linux/kvm.h
--- qemu-kvm_0621/kvm/include/linux/kvm.h   2010-06-21 11:00:28.0 
+0800
+++ qemu-kvm_0621_perf/kvm/include/linux/kvm.h  2010-06-21 13:23:51.537999849 
+0800
@@ -530,6 +530,7 @@ struct kvm_enable_cap {
 #ifdef __KVM_HAVE_XCRS
 #define KVM_CAP_XCRS 56
 #endif
+#define KVM_CAP_PV_PERF 57
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
diff -Nraup qemu-kvm_0621/kvm/include/x86/asm/kvm_para.h 
qemu-kvm_0621_perf/kvm/include/x86/asm/kvm_para.h
--- qemu-kvm_0621/kvm/include/x86/asm/kvm_para.h2010-06-21 
11:00:28.0 +0800
+++ qemu-kvm_0621_perf/kvm/include/x86/asm/kvm_para.h   2010-06-21 
13:27:04.375999849 +0800
@@ -15,6 +15,7 @@
 #define KVM_FEATURE_CLOCKSOURCE0
 #define KVM_FEATURE_NOP_IO_DELAY   1
 #define KVM_FEATURE_MMU_OP 2
+#define KVM_FEATURE_PV_PERF4
 
 #define MSR_KVM_WALL_CLOCK  0x11
 #define MSR_KVM_SYSTEM_TIME 0x12
diff -Nraup qemu-kvm_0621/target-i386/kvm.c qemu-kvm_0621_perf/target-i386/kvm.c
--- qemu-kvm_0621/target-i386/kvm.c 2010-06-21 11:00:29.0 +0800
+++ qemu-kvm_0621_perf/target-i386/kvm.c2010-06-21 13:00:14.136999850 
+0800
@@ -150,6 +150,9 @@ struct kvm_para_features {
 #ifdef KVM_CAP_PV_MMU
 { KVM_CAP_PV_MMU, KVM_FEATURE_MMU_OP },
 #endif
+#ifdef KVM_CAP_PV_PERF
+{ KVM_CAP_PV_PERF, KVM_FEATURE_PV_PERF },
+#endif
 { -1, -1 }
 };


--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH V2 4/5] ara virt interface of perf to support kvm guest os statistics collection in guest os

2010-06-21 Thread Zhang, Yanmin

The 4th patch is to implement para virt perf at guest side.

Signed-off-by: Zhang Yanmin yanmin_zh...@linux.intel.com

---

--- linux-2.6_tip0620/arch/x86/Kconfig  2010-06-21 15:19:39.180999849 +0800
+++ linux-2.6_tip0620perfkvm/arch/x86/Kconfig   2010-06-21 15:21:39.30849 
+0800
@@ -552,6 +552,14 @@ config KVM_GUEST
  This option enables various optimizations for running under the KVM
  hypervisor.
 
+config KVM_PERF
+   bool KVM Guest perf support
+   select PARAVIRT
+   select PERF_EVENT
+   ---help---
+ This option enables various optimizations for running perf in
+ guest os under the KVM hypervisor.
+
 source arch/x86/lguest/Kconfig
 
 config PARAVIRT
--- linux-2.6_tip0620/arch/x86/kernel/cpu/perf_event.c  2010-06-21 
15:19:39.964999849 +0800
+++ linux-2.6_tip0620perfkvm/arch/x86/kernel/cpu/perf_event.c   2010-06-21 
16:44:36.602999849 +0800
@@ -25,6 +25,7 @@
 #include linux/highmem.h
 #include linux/cpu.h
 #include linux/bitops.h
+#include linux/kvm_para.h
 
 #include asm/apic.h
 #include asm/stacktrace.h
@@ -583,10 +584,20 @@ static void x86_pmu_disable_all(void)
}
 }
 
+#ifdef CONFIG_KVM_PERF
+static int kvm_hw_perf_enable(void);
+static int kvm_hw_perf_disable(void);
+#endif
+
 void hw_perf_disable(void)
 {
struct cpu_hw_events *cpuc = __get_cpu_var(cpu_hw_events);
 
+#ifdef CONFIG_KVM_PERF
+   if (!kvm_hw_perf_disable())
+   return;
+#endif
+
if (!x86_pmu_initialized())
return;
 
@@ -810,6 +821,11 @@ void hw_perf_enable(void)
struct hw_perf_event *hwc;
int i, added = cpuc-n_added;
 
+#ifdef CONFIG_KVM_PERF
+   if (!kvm_hw_perf_enable())
+   return;
+#endif
+
if (!x86_pmu_initialized())
return;
 
@@ -1264,6 +1280,7 @@ x86_get_event_constraints(struct cpu_hw_
 #include perf_event_intel_lbr.c
 #include perf_event_intel_ds.c
 #include perf_event_intel.c
+#include perf_event_kvm.c
 
 static int __cpuinit
 x86_pmu_notifier(struct notifier_block *self, unsigned long action, void *hcpu)
@@ -1317,6 +1334,11 @@ void __init init_hw_perf_events(void)
 
pr_info(Performance Events: );
 
+#ifdef CONFIG_KVM_PERF
+   if (!kvm_init_hw_perf_events())
+   return;
+#endif
+
switch (boot_cpu_data.x86_vendor) {
case X86_VENDOR_INTEL:
err = intel_pmu_init();
@@ -1541,6 +1563,13 @@ const struct pmu *hw_perf_event_init(str
const struct pmu *tmp;
int err;
 
+#ifdef CONFIG_KVM_PERF
+   if (kvm_para_available()) {
+   tmp = kvm_hw_perf_event_init(event);
+   return tmp;
+   }
+#endif
+
err = __hw_perf_event_init(event);
if (!err) {
/*
--- linux-2.6_tip0620/arch/x86/kernel/cpu/perf_event_kvm.c  1970-01-01 
08:00:00.0 +0800
+++ linux-2.6_tip0620perfkvm/arch/x86/kernel/cpu/perf_event_kvm.c   
2010-06-21 16:44:56.735999849 +0800
@@ -0,0 +1,426 @@
+/*
+ * Performance events
+ *
+ * Copyright (C) 2010 Intel Corporation
+ * Zhang Yanmin yanmin.zh...@intel.com
+ *
+ *  For licencing details see kernel-base/COPYING
+ */
+
+#ifdef CONFIG_KVM_PERF
+
+static atomic_t guest_perf_id; /*Global id counter per guest os*/
+
+static inline int get_new_perf_event_id(void)
+{
+   return atomic_inc_return(guest_perf_id);
+}
+
+#ifdef CONFIG_X86_LOCAL_APIC
+
+static bool kvm_reserve_pmc_hardware(void)
+{
+   if (nmi_watchdog == NMI_LOCAL_APIC)
+   disable_lapic_nmi_watchdog();
+
+   return true;
+}
+
+static void kvm_release_pmc_hardware(void)
+{
+   if (nmi_watchdog == NMI_LOCAL_APIC)
+   enable_lapic_nmi_watchdog();
+}
+
+#else
+
+static bool kvm_reserve_pmc_hardware(void) { return true; }
+static void kvm_release_pmc_hardware(void) {}
+
+#endif
+
+static void kvm_hw_perf_event_destroy(struct perf_event *event)
+{
+   struct guest_perf_shadow *shadow = event-guest_perf_shadow;
+
+   BUG_ON(!shadow);
+   kvm_hypercall2(KVM_PERF_OP, KVM_PERF_OP_CLOSE, shadow-id);
+
+   kfree(shadow);
+   event-guest_perf_shadow = NULL;
+
+   if (atomic_dec_and_mutex_lock(active_events, pmc_reserve_mutex)) {
+   kvm_release_pmc_hardware();
+   mutex_unlock(pmc_reserve_mutex);
+   }
+}
+
+/* The guest might also run as a host */
+static int check_ontop_guest_overflow(struct perf_event *event, int overflows)
+{
+   struct host_perf_shadow *host_shadow = event-host_perf_shadow;
+   if (!host_shadow)
+   return 0;
+
+   if (perf_guest_cbs)
+   perf_guest_cbs-copy_event_to_shadow(event, overflows);
+
+   return 1;
+}
+
+static int
+check_event_overflow(struct perf_event *event, struct pt_regs *regs)
+{
+   struct perf_sample_data data;
+   struct guest_perf_shadow *guest_shadow = event-guest_perf_shadow;
+   s32 overflows;
+   int i;
+   int handled = 0;
+
+   local64_set(event-count,

[PATCH] KVM Test: Fix invalid literal bug in ioquit

2010-06-21 Thread Feng Yang

Sometime check_cmd could not finish in setting time.
Then o=, so int(o) will cause ValueError:
invalid literal for int() with base 10: ''
So change to check return status.

Signed-off-by: Feng Yang fy...@redhat.com
---
 client/tests/kvm/tests/ioquit.py   |6 +++---
 client/tests/kvm/tests_base.cfg.sample |2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/client/tests/kvm/tests/ioquit.py b/client/tests/kvm/tests/ioquit.py
index 389a867..8126139 100644
--- a/client/tests/kvm/tests/ioquit.py
+++ b/client/tests/kvm/tests/ioquit.py
@@ -23,13 +23,13 @@ def run_ioquit(test, params, env):
 (s, o) = session.get_command_status_output(bg_cmd, timeout=60)
 check_cmd = params.get(check_cmd)
 (s, o) = session2.get_command_status_output(check_cmd, timeout=60)
-if int(o) = 0:
+if s:
 raise error.TestError(Fail to add IO workload for Guest OS)
 
 logging.info(Sleep for a while)
 time.sleep(random.randrange(30,100))
-(s, o) = session2.get_command_status_output(check_cmd, timeout=300)
-if int(o) = 0:
+(s, o) = session2.get_command_status_output(check_cmd, timeout=60)
+if s:
 logging.info(IO workload finished before the VM was killed)
 logging.info(Kill the virtual machine)
 vm.process.close()
diff --git a/client/tests/kvm/tests_base.cfg.sample 
b/client/tests/kvm/tests_base.cfg.sample
index ce88235..0fd5543 100644
--- a/client/tests/kvm/tests_base.cfg.sample
+++ b/client/tests/kvm/tests_base.cfg.sample
@@ -411,7 +411,7 @@ variants:
 - ioquit:
 type = ioquit
 background_cmd = for i in 1 2 3 4; do (nohup dd if=/dev/urandom 
of=/tmp/file bs=102400 count=1000 ) done
-check_cmd = ps -a |grep dd |wc -l
+check_cmd = ps -a |grep dd
 login_timeout = 360
 
 - qemu_img:
-- 
1.5.5.6

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: kvm-s390: Dont exit SIE on SIGP sense running

2010-06-21 Thread Christian Borntraeger

Am Montag 21 Juni 2010, 10:27:50 schrieb Avi Kivity:
[...]
  VCPU_EVENT(vcpu, 3, %s, free cpu);
  +   clear_bit(63 - vcpu-vcpu_id, (unsigned long 
  *)vcpu-kvm-arch.sca-mcn);
  if (vcpu-kvm-arch.sca-cpu[vcpu-vcpu_id].sda ==
  (__u64) vcpu-arch.sie_block)
  vcpu-kvm-arch.sca-cpu[vcpu-vcpu_id].sda = 0;
 
 
 Unrelated, do these VCPU_EVENTs want to become ftrace tracepoints?
 
 

Hmm, dont know. Currently this calls into a s390 debug tracing facility
(arch/s390/kernel/debug.c) which is heavily used by our service folks.
There are commands for crash and lcrash to show these s390 debug traces
from a dump.

Maybe its worth to investigate if we should change some of these events to
have both ftrace-tracepoints and the debug traces.

Christian
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: kvm-s390: Dont exit SIE on SIGP sense running

2010-06-21 Thread Arnd Bergmann

On Monday 21 June 2010, Christian Borntraeger wrote:
 Hmm, dont know. Currently this calls into a s390 debug tracing facility
 (arch/s390/kernel/debug.c) which is heavily used by our service folks.
 There are commands for crash and lcrash to show these s390 debug traces
 from a dump.
 
 Maybe its worth to investigate if we should change some of these events to
 have both ftrace-tracepoints and the debug traces.

I think that it would be worthwhile to convert the entire s390 debug
code to become tracepoints, either one by one or making it a subclass
with the existing interfaces.

Arnd
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH] KVM Test: Fix invalid literal bug in ioquit

2010-06-21 Thread Michael Goldish

On 06/21/2010 01:07 PM, Feng Yang wrote:
 Sometime check_cmd could not finish in setting time.
 Then o=, so int(o) will cause ValueError:
 invalid literal for int() with base 10: ''
 So change to check return status.
 
 Signed-off-by: Feng Yang fy...@redhat.com
 ---
  client/tests/kvm/tests/ioquit.py   |6 +++---
  client/tests/kvm/tests_base.cfg.sample |2 +-
  2 files changed, 4 insertions(+), 4 deletions(-)
 
 diff --git a/client/tests/kvm/tests/ioquit.py 
 b/client/tests/kvm/tests/ioquit.py
 index 389a867..8126139 100644
 --- a/client/tests/kvm/tests/ioquit.py
 +++ b/client/tests/kvm/tests/ioquit.py
 @@ -23,13 +23,13 @@ def run_ioquit(test, params, env):
  (s, o) = session.get_command_status_output(bg_cmd, timeout=60)
  check_cmd = params.get(check_cmd)
  (s, o) = session2.get_command_status_output(check_cmd, timeout=60)
 -if int(o) = 0:
 +if s:
  raise error.TestError(Fail to add IO workload for Guest OS)

Please use 'if s != 0' because in case of a timeout s is None.

  logging.info(Sleep for a while)
  time.sleep(random.randrange(30,100))
 -(s, o) = session2.get_command_status_output(check_cmd, timeout=300)
 -if int(o) = 0:
 +(s, o) = session2.get_command_status_output(check_cmd, timeout=60)
 +if s:

Same here.

  logging.info(IO workload finished before the VM was killed)
  logging.info(Kill the virtual machine)
  vm.process.close()
 diff --git a/client/tests/kvm/tests_base.cfg.sample 
 b/client/tests/kvm/tests_base.cfg.sample
 index ce88235..0fd5543 100644
 --- a/client/tests/kvm/tests_base.cfg.sample
 +++ b/client/tests/kvm/tests_base.cfg.sample
 @@ -411,7 +411,7 @@ variants:
  - ioquit:
  type = ioquit
  background_cmd = for i in 1 2 3 4; do (nohup dd if=/dev/urandom 
 of=/tmp/file bs=102400 count=1000 ) done
 -check_cmd = ps -a |grep dd |wc -l
 +check_cmd = ps -a |grep dd
  login_timeout = 360
  
  - qemu_img:

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[ kvm-Bugs-1858940 ] Kernel panic - not syncing: IO-APIC + timer doesn't work.

2010-06-21 Thread SourceForge.net

Bugs item #1858940, was opened at 2007-12-27 15:08
Message generated for change (Comment added) made by jessorensen
You can respond by visiting: 
https://sourceforge.net/tracker/?func=detailatid=893831aid=1858940group_id=180599

Please note that this message will contain a full copy of the comment thread,
including the initial issue submission, for this request,
not just the latest update.
Category: None
Group: None
Status: Closed
Resolution: Fixed
Priority: 5
Private: No
Submitted By: Kai Londenberg (kai_londenberg)
Assigned to: Nobody/Anonymous (nobody)
Summary: Kernel panic - not syncing: IO-APIC + timer doesn't work. 

Initial Comment:

The problem:

When booting a Guest with -smp 2 option (and without -no-acpi, since that's 
essential for SMP), I get a Kernel panic.

My Setup:

Host:

64_86 bit Ubuntu 7.10 with custom built 2.6.23.12 Kernel.
AMD Dual CPU with Virtualization extensions.

Guest:

Ubuntu 6.06 - Server -  with pre-compiled 2.6.15.29 server Kernel, with SMP 
Support.


I logged the kernel output via serial console redirection into a file.









--

Comment By: Jes Sorensen (jessorensen)
Date: 2010-06-21 13:32

Message:
Ubuntu 6.06 x86_64 boots fine with -smp 2 on an AMD system with a recent
KVM / qemu-kvm combo.

Closing


--

Comment By: TJ (tjworld)
Date: 2008-01-30 21:40

Message:
Logged In: YES 
user_id=1048563
Originator: NO

I'm experiencing the same problem on Ubuntu Gutsy 7.10 x86_64
(2.6.22-14-generic).

I have working guest images of Ubuntu Gutsy 7.10 x86 with kvm-51 but
hadn't used them in a while. Tried to boot them earlier using the same
launch scripts as usual and the process hung with the CPU looping at 100%
usage.

I then downloaded, built, and installed kvm-60 thinking it was some
strange issue that hadn't manifested previously.

The same problem occurs with kvm-60.

I then booted the images with modified grub settings to watch the kernel
log and saw it fails to find a timer as reported here.

I eventually found that by adding -no-acpi to the launch command-line
they would boot successfully.

I'm not sure what changed in the interim but obviously something did.

This is the working launch options:

qemuctl -qemu vdeq kvm -name Gutsy-Desktop -boot c -m 512 -hda
/home/all/VirtualMachines/Ubuntu-Gutsy-Desktop-x86.ovl -k en-gb -net
nic,model=rtl8139,macaddr=56:44:45:30:30:31,vlan=0 -soundhw es1370 -usb
-net vde,sock=/var/run/kvm0.ctl,vlan=0 -no-acpi

--

Comment By: Technologov (technologov)
Date: 2007-12-27 16:19

Message:
Logged In: YES 
user_id=1839746
Originator: NO

1. If you use KVM from 2.6.23 kernel, then I strongly recommend you to try
the newest KVM-58, as it has some fixes for AMD.

2. What bitness ? Guest OS is 32 or 64-bit ?

3. ACPI is essential only for Windows SMP. On Linux however, SMP works,
and ACPI on/off doesn't affects it.

-Alexey Technologov

--

Comment By: Kai Londenberg (kai_londenberg)
Date: 2007-12-27 15:11

Message:
Logged In: YES 
user_id=1299941
Originator: YES

Using kvm 58, started with -smp 2 option.

--

Comment By: Kai Londenberg (kai_londenberg)
Date: 2007-12-27 15:09

Message:
Logged In: YES 
user_id=1299941
Originator: YES

File Added: boot.log

--

You can respond by visiting: 
https://sourceforge.net/tracker/?func=detailatid=893831aid=1858940group_id=180599
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH V2 1/5] ara virt interface of perf to support kvm guest os statistics collection in guest os

2010-06-21 Thread Avi Kivity


On 06/21/2010 12:31 PM, Zhang, Yanmin wrote:

Here is the version 2.

ChangeLog since V1: Mostly changes based on Avi's suggestions.
1) Use a id to identify the perf_event between host and guest;
2) Changes lots of codes to deal with malicious guest os;
3) Add a perf_event number limitation per gust os instance;
4) Support guest os on the top of another guest os scenario. But
I didn't test it yet as there is no environment. The design is to
add 2 pointers in struct perf_event. One is used by host and the
other is used by guest.
5) Fix the bug to support 'perf stat'. The key is sync count data
back to guest when guest tries to disable the perf_event at host
side.
6) Add a clear ABI of PV perf.

   


Please use meaningful subject lines for individual patches.


I don't implement live migration feature.

Avi,
Is live migration necessary on pv perf support?
   


Yes.


--- linux-2.6_tip0620/Documentation/kvm/paravirt-perf.txt   1970-01-01 
08:00:00.0 +0800
+++ linux-2.6_tip0620perfkvm/Documentation/kvm/paravirt-perf.txt
2010-06-21 15:21:39.312999849 +0800
@@ -0,0 +1,133 @@
+The x86 kvm paravirt perf event interface
+===
+
+This paravirt interface is responsible for supporting guest os perf event
+collections. If guest os supports this interface, users could run command
+perf in guest os directly.
+
+Design
+
+
+Guest os calls a series of hypercalls to communicate with host kernel to
+create/enable/disable/close perf events. Host kernel notifies guest os
+by injecting an NMI to guest os when an event overflows. Guets os need
+go through all its active events to check if they overflow, and output
+performance statistics if they do.
+
+ABI
+=
+
+1) Detect if host kernel supports paravirt perf interface:
+#define KVM_FEATURE_PV_PERF   4
+Host kernel defines above cpuid bit. Guest os calls cpuid to check if host
+os retuns this bit. If it does, it mean host kernel supports paravirt perf
+interface.
+
+2) Open a new event at host side:
+kvm_hypercall3(KVM_PERF_OP, KVM_PERF_OP_OPEN, param_addr_low32bit,
+param_addr_high32bit);
+
+#define KVM_PERF_OP3
+/* Operations for KVM_PERF_OP */
+#define KVM_PERF_OP_OPEN1
+#define KVM_PERF_OP_CLOSE   2
+#define KVM_PERF_OP_ENABLE  3
+#define KVM_PERF_OP_DISABLE 4
+#define KVM_PERF_OP_READ5
   



+/*
+ * guest_perf_attr is used when guest calls hypercall to
+ * open a new perf_event at host side. Mostly, it's a copy of
+ * perf_event_attr and deletes something not used by host kernel.
+ */
+struct guest_perf_attr {
+__u32   type;
   


Need padding here, otherwise the structure is different on 32-bit and 
64-bit guests.



+__u64   config;
+__u64   sample_period;
+__u64   sample_type;
+__u64   read_format;
+__u64   flags;
   


and here.


+__u32   bp_type;
+__u64   bp_addr;
+__u64   bp_len;
   


Do we actually support breakpoints on the guest?  Note the hardware 
breakpoints are also usable by the guest, so if the host uses them, we 
won't be able to emulate them correctly.  We can let the guest to 
breakpoint perf monitoring itself and drop this feature.



+};
   


What about documentation for individual fields?  Esp. type, config, and 
flags, but also the others.



+/*
+ * data communication area about perf_event between
+ * Host kernel and guest kernel
+ */
+struct guest_perf_event {
+u64 count;
+atomic_t overflows;
   


Please use __u64 and __u32, assume guests don't have Linux internal 
types (though of course the first guest _is_ Linux).


Add padding to 64-bit.


+};
+struct guest_perf_event_param {
+__u64 attr_addr;
+__u64 guest_event_addr;
+/* In case there is an alignment issue, we put id as the last one */
+int id;
   


Add explicit padding to be sure.

Also makes sense to add a flags field for future expansion.


+};
+
+param_addr_low32bit and param_addr_high32bit compose a u64 integer which means
+the physical address of parameter struct guest_perf_event_param.
+struct guest_perf_event_param consists of 3 members. attr_addr has the
+physical address of parameter struct guest_perf_attr. guest_event_addr has the
+physical address of a parameter whose type is struct guest_perf_eventi which
+has to be aligned with 4 bytes.
+guest os need allocate an exclusive id per event in this guest os instance, 
and save it to
+guest_perf_event_param-id. Later on, the id is the only method to notify host
+kernel about on what event guest os wants host kernel to operate.
   


Need a way to expose the maximum number of events available to the 
guest.  I suggest exposing it

Re: [PATCH V2 2/5] ara virt interface of perf to support kvm guest os statistics collection in guest os

2010-06-21 Thread Avi Kivity


On 06/21/2010 12:31 PM, Zhang, Yanmin wrote:

The 2nd patch is to change the definition of perf_event to facilitate
perf attr copy when a hypercall happens.

Signed-off-by: Zhang Yanminyanmin_zh...@linux.intel.com

---

--- linux-2.6_tip0620/include/linux/perf_event.h2010-06-21 
15:19:52.821999849 +0800
+++ linux-2.6_tip0620perfkvm/include/linux/perf_event.h 2010-06-21 
16:53:49.283999849 +0800
@@ -188,7 +188,10 @@ struct perf_event_attr {
__u64   sample_type;
__u64   read_format;

   


Assuming these flags are available to the guest?


-   __u64   disabled   :  1, /* off by default*/
+   union {
+   __u64   flags;
+   struct {
+   __u64   disabled   :  1, /* off by default*/
inherit:  1, /* children inherit it   */
   


inherit is meaningless for a guest.


pinned :  1, /* must always be on PMU */
   


We cannot allow a guest to pin a counter.

The other flags are also problematic.  I'd like to see virt-specific 
flags (probably we'll only need kernel/user and nested_hv for nested 
virtualization).


Something that is worrying is that we don't expose group information.  
perf will multiplex the events for us, but there will be a loss in accuracy.



  #ifdef CONFIG_HAVE_HW_BREAKPOINT
  #includeasm/hw_breakpoint.h
  #endif
@@ -753,6 +752,20 @@ struct perf_event {

perf_overflow_handler_t overflow_handler;

+   /*
+* pointers used by kvm perf paravirt interface.
+*
+* 1) Used in host kernel and points to host_perf_shadow which
+* has information about guest perf_event
+*/
+   void*host_perf_shadow;
   


Can we have real types instead of void pointers?


+   /*
+* 2) Used in guest kernel and points to guest_perf_shadow which
+* is used as a communication area with host kernel. Host kernel
+* copies overflow data to it when an event overflows.
+*/
+   void*guest_perf_shadow;
   


It's strange to see both guest and host parts in the same patch.  
Splitting to separate patches will really help review.



@@ -1626,9 +1629,22 @@ void perf_event_task_tick(struct task_st
if (ctx  ctx-nr_events  ctx-nr_events != ctx-nr_active)
rotate = 1;

-   perf_ctx_adjust_freq(cpuctx-ctx);
-   if (ctx)
-   perf_ctx_adjust_freq(ctx);
+#ifdef CONFIG_KVM_PERF
+   if (kvm_para_available()) {
+   /*
+* perf_ctx_adjust_freq causes lots of pmu-read which would
+* trigger too many vmexit to host kernel. We disable it
+* under para virt situation
+*/
+   adjust_freq = 0;
+   }
+#endif
   


Perhaps we can have a batch read interface which will read many counters 
at once.  This would reduce the number of exits.  Also adjust the 
frequency less frequently.



+
+   if (adjust_freq) {
+   perf_ctx_adjust_freq(cpuctx-ctx);
+   if (ctx)
+   perf_ctx_adjust_freq(ctx);
+   }

   


--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH V2 3/5] ara virt interface of perf to support kvm guest os statistics collection in guest os

2010-06-21 Thread Avi Kivity


On 06/21/2010 12:31 PM, Zhang, Yanmin wrote:

The 3rd patch is to implement para virt perf at host kernel.


@@ -64,6 +73,85 @@ struct kvm_mmu_op_release_pt {
  #ifdef __KERNEL__
  #includeasm/processor.h


+/*
+ * In host kernel, perf_event-host_perf_shadow points to
+ * host_perf_shadow which records some information
+ * about the guest.
+ */
+struct host_perf_shadow {
+   /* guest perf_event id passed from guest os */
+   int id;
+   /*
+* Host kernel saves data into data member counter firstly.
+* kvm will get data from this counter and calls kvm functions
+* to copy or add data back to guets os before entering guest os
+* next time
+*/
+   struct guest_perf_event counter;
+   /* guest_event_addr is gpa_t pointing to guest os guest_perf_event*/
+   __u64 guest_event_addr;
   


So just use gpa_t as the type.


+
+   /*
+* Link to  of kvm.kvm_arch.shadow_hash_table
+*/
+   struct list_head shadow_entry;
+   struct kvm_vcpu *vcpu;
+
+   struct perf_event *host_event;
+   /*
+* Below counter is to prevent malicious guest os to try to
+* close/enable event at the same time.
+*/
+   atomic_t ref_counter;
   


If events are made per-vcpu (like real hardware), races become impossible.


+};
   


Please move this structure to include/linux/kvm_host.h.  No need to spam 
kvm_para.h.  Note it's not x86 specific (though you can leave arch 
enabling to arch maintainers).



+
+/*
+ * In guest kernel, perf_event-guest_shadow points to
+ * guest_perf_shadow which records some information
+ * about the guest.
+ */
+struct guest_perf_shadow {
+   /* guest perf_event id passed from guest os */
+   int id;
+   /*
+* Host kernel kvm saves data into data member counter
+*/
+   struct guest_perf_event counter;
+};
   


Don't ordinary perf structures already have a counter ID which we can reuse?


+
+/*
+ * guest_perf_attr is used when guest calls hypercall to
+ * open a new perf_event at host side. Mostly, it's a copy of
+ * perf_event_attr and deletes something not used by host kernel.
+ */
+struct guest_perf_attr {
+   __u32   type;
+   __u64   config;
+   __u64   sample_period;
+   __u64   sample_type;
+   __u64   read_format;
+   __u64   flags;
+   __u32   bp_type;
+   __u64   bp_addr;
+   __u64   bp_len;
+};
   


This is really not a guest or host structure, it's part of the 
interface.  So please rename it (and similar) kvm_pv_perf_*.



@@ -24,6 +24,7 @@
  #includeasm/desc.h
  #includeasm/mtrr.h
  #includeasm/msr-index.h
+#includeasm/perf_event.h

  #define KVM_MAX_VCPUS 64
  #define KVM_MEMORY_SLOTS 32
@@ -360,6 +361,18 @@ struct kvm_vcpu_arch {

/* fields used by HYPER-V emulation */
u64 hv_vapic;
+
+   /*
+* Fields used by PARAVIRT perf interface:
+*
+* kvm checks overflow_events before entering guest os,
+* and copy data back to guest os.
+* event_mutex is to avoid a race between NMI perf event overflow
+* handler, event close, and enable/disable.
+*/
+   struct mutex event_mutex;
   


No race can exist.  The host NMI handler cannot take any mutex so it 
must be immune to races.  The guest NMI handlers and callbacks are all 
serialized by the guest itself.



+   int overflows;
+   struct perf_event *overflow_events[X86_PMC_IDX_MAX];
  };
   


KVM_PV_PERF_MAX_EVENTS (which needs to be exposed to the guest via cpuid).



  struct kvm_mem_alias {
@@ -377,6 +390,9 @@ struct kvm_mem_aliases {
int naliases;
  };

+#define KVM_PARAVIRT_PERF_EVENT_ENTRY_BITS (10)
+#define KVM_PARAVIRT_PERF_EVENT_ENTRY_NUM  
(1KVM_PARAVIRT_PERF_EVENT_ENTRY_BITS)
   


What are these?


+
  struct kvm_arch {
struct kvm_mem_aliases *aliases;

@@ -415,6 +431,15 @@ struct kvm_arch {
/* fields used by HYPER-V emulation */
u64 hv_guest_os_id;
u64 hv_hypercall;
+
+   /*
+* fields used by PARAVIRT perf interface:
+* Used to organize all host perf_events representing guest
+* perf_event on a specific kvm instance
+*/
+   atomic_t kvm_pv_event_num;
+   spinlock_t shadow_lock;
+   struct list_head *shadow_hash_table;
   


Need to be per-vcpu.  Also wrap in a kvm_vcpu_perf structure, the names 
are very generic.


Why do we need the hash table?  Use the index directly?


  /*
   * hypercalls use architecture specific
--- linux-2.6_tip0620/arch/x86/kvm/vmx.c2010-06-21 15:19:39.322999849 
+0800
+++ linux-2.6_tip0620perfkvm/arch/x86/kvm/vmx.c 2010-06-21 15:21:39.310999849 
+0800
@@ -3647,6 +3647,7 @@ static int vmx_handle_exit(struct kvm_vc
struct vcpu_vmx *vmx = to_vmx(vcpu);
u32 exit_reason =

Re: [PATCH V2 5/5] ara virt interface of perf to support kvm guest os statistics collection in guest os

2010-06-21 Thread Avi Kivity


On 06/21/2010 12:31 PM, Zhang, Yanmin wrote:

The 5th patch is applied to the latest qemu-kvm tree.

--- qemu-kvm_0621/target-i386/kvm.c 2010-06-21 11:00:29.0 +0800
+++ qemu-kvm_0621_perf/target-i386/kvm.c2010-06-21 13:00:14.136999850 
+0800
@@ -150,6 +150,9 @@ struct kvm_para_features {
  #ifdef KVM_CAP_PV_MMU
  { KVM_CAP_PV_MMU, KVM_FEATURE_MMU_OP },
  #endif
+#ifdef KVM_CAP_PV_PERF
+{ KVM_CAP_PV_PERF, KVM_FEATURE_PV_PERF },
+#endif
  { -1, -1 }
  };

   



Not really necessary any more - if you expose the cpuid bit via 
KVM_GET_SUPPORTED_CPUID2 then 'qemu -cpu host' will automatically enable it.


On the other hand, do update target-i386/cpuid.c:kvm_feature_name so 
people can enable the feature using qemu -cpu ...,+kvmperf.


--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [Qemu-devel] [PATCH 1/2] Add 'serial' attribute to virtio-blk devices

2010-06-21 Thread Christoph Hellwig

On Fri, Jun 18, 2010 at 01:38:02PM -0500, Ryan Harper wrote:
 Create a new attribute for virtio-blk devices that will fetch the serial 
 number
 of the block device.  This attribute can be used by udev to create disk/by-id
 symlinks for devices that don't have a UUID (filesystem) associated with them.
 
 ATA_IDENTIFY strings are special in that they can be up to 20 chars long
 and aren't required to be NULL-terminated.  The buffer is also zero-padded
 meaning that if the serial is 19 chars or less that we get a NULL terminated
 string.  When copying this value into a string buffer, we must be careful to
 copy up to the NULL (if it present) and only 20 if it is longer and not to
 attempt to NULL terminate; this isn't needed.

Why is this virtio-blk specific?  In a later mail you mention you want
to use it for udev.  So please export this from scsi/libata as well and
we have one proper interface that we can use for all devices.
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[ kvm-Bugs-1841658 ] OpenSolaris 64bit panic with kvm-54

2010-06-21 Thread SourceForge.net

Bugs item #1841658, was opened at 2007-11-30 13:11
Message generated for change (Comment added) made by jessorensen
You can respond by visiting: 
https://sourceforge.net/tracker/?func=detailatid=893831aid=1841658group_id=180599

Please note that this message will contain a full copy of the comment thread,
including the initial issue submission, for this request,
not just the latest update.
Category: None
Group: None
Status: Open
Resolution: None
Priority: 3
Private: No
Submitted By: Carlo Marcelo Arenas Belon (carenas)
Assigned to: Nobody/Anonymous (nobody)
Summary: OpenSolaris 64bit panic with kvm-54

Initial Comment:
Wouldn't mark it as a regression per-se as vanilla kvm-53 wouldn't work 
(because of the need for IDE patches to get it to run/install), but vanilla 
kvm-54 or kvm-54 + the same patches added to kvm-53 and including pre-kvm-55 
patches like 71be592a14aa8d127315b2c47bf83cc0d810a341 wouldn't work.

The panic is observed in kvm-54 (--no-kvm runs ok, and --no-kvm-irqchip doesn't 
help) while running nexenta OpenSolaris alpha 7 or beta 1 (other OpenSolaris 
distributions most likely affected as well) and with the following trace :

panic[cpu0]/thread=fffec2de2260: BAD TRAP: type=e (#pf Page fault) 
rp=ff0001735f30 addr=0 occurred in module unix due to a NULL pointer 
dereference

dbus: #pf Page fault
Bad kernel fault at addr=0x0
pid=278, pc=0xfb83c189, sp=0xff0001736028, eflags=0x10246
cr0: 80050033pg,wp,ne,et,mp,pe cr4: 6b8xmme,fxsr,pge,pae,pse,de
cr2: 0 cr3: 7dc4000 cr8: 0
rdi:0 rsi: fffec0025630 rdx: fffec2de2260
rcx:1  r8: fffec0025630  r9:3
rax:0 rbx:0 rbp: ff0001736080
r10:1 r11: fffec1ad31e0 r12:0
r13: fffec0025680 r14: c0025488 r15:0
fsb:0 gsb: fbc26ef0  ds:   4b
 es:   4b  fs:0  gs:  1c3
trp:e err:0 rip: fb83c189
 cs:   30 rfl:10246 rsp: ff0001736028
 ss:   38

ff0001735e10 unix:die+c8 ()
ff0001735f20 unix:trap+135b ()
ff0001735f30 unix:cmntrap+e9 ()
ff0001736080 unix:mutex_exit+9 ()
ff00017360c0 genunix:kmem_alloc+88 ()
ff0001736110 zfs:zio_push_transform+3a ()
ff0001736190 zfs:zio_create+256 ()
ff0001736240 zfs:zio_vdev_child_io+97 ()
ff0001736320 zfs:vdev_cache_read+182 ()
ff0001736370 zfs:vdev_disk_io_start+41 ()
ff0001736390 zfs:vdev_io_start+1d ()
ff00017363d0 zfs:zio_vdev_io_start+123 ()
ff00017363f0 zfs:zio_next_stage_async+bb ()
ff0001736410 zfs:zio_nowait+11 ()
ff0001736450 zfs:vdev_mirror_io_start+18f ()
ff0001736490 zfs:zio_vdev_io_start+131 ()
ff00017364b0 zfs:zio_next_stage+b3 ()
ff00017364e0 zfs:zio_ready+10e ()
ff0001736500 zfs:zio_next_stage+b3 ()
ff0001736550 zfs:zio_wait_for_children+5d ()
ff0001736570 zfs:zio_wait_children_ready+20 ()
ff0001736590 zfs:zio_next_stage_async+bb ()
ff00017365b0 zfs:zio_nowait+11 ()
ff0001736660 zfs:arc_read+4e8 ()
ff0001736700 zfs:dbuf_read_impl+129 ()
ff0001736760 zfs:dbuf_read+c5 ()
ff0001736810 zfs:dmu_buf_hold_array_by_dnode+1c4 ()
ff00017368a0 zfs:dmu_buf_hold_array+74 ()
ff0001736930 zfs:dmu_read_uio+4d ()
ff00017369c0 zfs:zfs_read+15e ()
ff0001736a30 genunix:fop_read+69 ()
ff0001736af0 genunix:vn_rdwr+161 ()
ff0001736c70 genunix:gexec+11c ()
ff0001736e90 genunix:exec_common+41d ()
ff0001736ec0 genunix:exece+1b ()
ff0001736f10 unix:brand_sys_sysenter+1f2 ()

while running in a Gentoo Linux 2007.0 host with Intel(R) Core(TM)2 CPU 6320.

32bit OpenSolaris works fine

--

Comment By: Jes Sorensen (jessorensen)
Date: 2010-06-21 14:55

Message:
Hi,

I pulled down the iso image you mentioned, and it seems to boot fine for
me here. I was able to run the install to a local disk image and boot it
again afterwards. This is using a 64 bit guest CPU on a Fedora 12 system.

What flags are you using to launch it when you see the crash? Are you
running on an Intel or an AMD system and did you specify SMP by any
chance?

Cheers,
Jes


--

Comment By: Stefan Neufeind (neufeind)
Date: 2010-06-18 22:08

Message:
simply from the OpenSolaris-website:
http://hub.opensolaris.org/bin/view/Main/downloads
Release 2009.06 for x86, installing it with x86_64-processor in KVM

--

Comment By: Jes Sorensen (jessorensen)
Date: 2010-06-18 17:22

Message:
Interesting, can you give me a pointer to where I can download that ISO
image from?

Thanks,
Jes

[PATCH] KVM: PPC: Make BAT only guest segments work

2010-06-21 Thread Alexander Graf

When a guest sets its SR entry to invalid, we may still find a
corresponding entry in a BAT. So we need to make sure we're not
faulting on invalid SR entries, but instead just claim them to be
BAT resolved.

This resolves breakage experienced when using libogc based guests.

Signed-off-by: Alexander Graf ag...@suse.de
---
 arch/powerpc/kvm/book3s_32_mmu.c |8 
 1 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/kvm/book3s_32_mmu.c b/arch/powerpc/kvm/book3s_32_mmu.c
index 0b10503..3292d76 100644
--- a/arch/powerpc/kvm/book3s_32_mmu.c
+++ b/arch/powerpc/kvm/book3s_32_mmu.c
@@ -354,10 +354,10 @@ static int kvmppc_mmu_book3s_32_esid_to_vsid(struct 
kvm_vcpu *vcpu, ulong esid,
*vsid = VSID_REAL_DR | gvsid;
break;
case MSR_DR|MSR_IR:
-   if (!sr-valid)
-   return -1;
-
-   *vsid = sr-vsid;
+   if (sr-valid)
+   *vsid = sr-vsid;
+   else
+   *vsid = VSID_BAT | gvsid;
break;
default:
BUG();
-- 
1.6.0.2

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH] KVM: PPC: Use kernel hash function

2010-06-21 Thread Alexander Graf

The linux kernel already provides a hash function. Let's reuse that
instead of reinventing the wheel!

Signed-off-by: Alexander Graf ag...@suse.de
---
 arch/powerpc/kvm/book3s_32_mmu_host.c |   10 ++
 arch/powerpc/kvm/book3s_64_mmu_host.c |   11 ++-
 2 files changed, 4 insertions(+), 17 deletions(-)

diff --git a/arch/powerpc/kvm/book3s_32_mmu_host.c 
b/arch/powerpc/kvm/book3s_32_mmu_host.c
index 0bb6600..37779a5 100644
--- a/arch/powerpc/kvm/book3s_32_mmu_host.c
+++ b/arch/powerpc/kvm/book3s_32_mmu_host.c
@@ -19,6 +19,7 @@
  */
 
 #include linux/kvm_host.h
+#include linux/hash.h
 
 #include asm/kvm_ppc.h
 #include asm/kvm_book3s.h
@@ -182,14 +183,7 @@ static int kvmppc_mmu_hpte_cache_next(struct kvm_vcpu 
*vcpu)
  * a hash, so we don't waste cycles on looping */
 static u16 kvmppc_sid_hash(struct kvm_vcpu *vcpu, u64 gvsid)
 {
-   return (u16)(((gvsid  (SID_MAP_BITS * 7))  SID_MAP_MASK) ^
-((gvsid  (SID_MAP_BITS * 6))  SID_MAP_MASK) ^
-((gvsid  (SID_MAP_BITS * 5))  SID_MAP_MASK) ^
-((gvsid  (SID_MAP_BITS * 4))  SID_MAP_MASK) ^
-((gvsid  (SID_MAP_BITS * 3))  SID_MAP_MASK) ^
-((gvsid  (SID_MAP_BITS * 2))  SID_MAP_MASK) ^
-((gvsid  (SID_MAP_BITS * 1))  SID_MAP_MASK) ^
-((gvsid  (SID_MAP_BITS * 0))  SID_MAP_MASK));
+   return hash_64(gvsid, SID_MAP_BITS);
 }
 
 
diff --git a/arch/powerpc/kvm/book3s_64_mmu_host.c 
b/arch/powerpc/kvm/book3s_64_mmu_host.c
index e4b5744..5d0e281 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_host.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_host.c
@@ -20,6 +20,7 @@
  */
 
 #include linux/kvm_host.h
+#include linux/hash.h
 
 #include asm/kvm_ppc.h
 #include asm/kvm_book3s.h
@@ -164,17 +165,9 @@ static int kvmppc_mmu_hpte_cache_next(struct kvm_vcpu 
*vcpu)
  * a hash, so we don't waste cycles on looping */
 static u16 kvmppc_sid_hash(struct kvm_vcpu *vcpu, u64 gvsid)
 {
-   return (u16)(((gvsid  (SID_MAP_BITS * 7))  SID_MAP_MASK) ^
-((gvsid  (SID_MAP_BITS * 6))  SID_MAP_MASK) ^
-((gvsid  (SID_MAP_BITS * 5))  SID_MAP_MASK) ^
-((gvsid  (SID_MAP_BITS * 4))  SID_MAP_MASK) ^
-((gvsid  (SID_MAP_BITS * 3))  SID_MAP_MASK) ^
-((gvsid  (SID_MAP_BITS * 2))  SID_MAP_MASK) ^
-((gvsid  (SID_MAP_BITS * 1))  SID_MAP_MASK) ^
-((gvsid  (SID_MAP_BITS * 0))  SID_MAP_MASK));
+   return hash_64(gvsid, SID_MAP_BITS);
 }
 
-
 static struct kvmppc_sid_map *find_sid_vsid(struct kvm_vcpu *vcpu, u64 gvsid)
 {
struct kvmppc_sid_map *map;
-- 
1.6.0.2

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH] KVM: PPC: Remove obsolete kvmppc_mmu_find_pte

2010-06-21 Thread Alexander Graf

Initially we had to search for pte entries to invalidate them. Since
the logic has improved since then, we can just get rid of the search
function.

Signed-off-by: Alexander Graf ag...@suse.de
---
 arch/powerpc/include/asm/kvm_book3s.h |1 -
 arch/powerpc/kvm/book3s_32_mmu_host.c |   20 
 arch/powerpc/kvm/book3s_64_mmu_host.c |   20 
 3 files changed, 0 insertions(+), 41 deletions(-)

diff --git a/arch/powerpc/include/asm/kvm_book3s.h 
b/arch/powerpc/include/asm/kvm_book3s.h
index 6f74d93..4e99559 100644
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -115,7 +115,6 @@ extern void kvmppc_mmu_book3s_32_init(struct kvm_vcpu 
*vcpu);
 extern int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte);
 extern int kvmppc_mmu_map_segment(struct kvm_vcpu *vcpu, ulong eaddr);
 extern void kvmppc_mmu_flush_segments(struct kvm_vcpu *vcpu);
-extern struct kvmppc_pte *kvmppc_mmu_find_pte(struct kvm_vcpu *vcpu, u64 ea, 
bool data);
 extern int kvmppc_ld(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, 
bool data);
 extern int kvmppc_st(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, 
bool data);
 extern void kvmppc_book3s_queue_irqprio(struct kvm_vcpu *vcpu, unsigned int 
vec);
diff --git a/arch/powerpc/kvm/book3s_32_mmu_host.c 
b/arch/powerpc/kvm/book3s_32_mmu_host.c
index 37779a5..904f5ac 100644
--- a/arch/powerpc/kvm/book3s_32_mmu_host.c
+++ b/arch/powerpc/kvm/book3s_32_mmu_host.c
@@ -151,26 +151,6 @@ void kvmppc_mmu_pte_pflush(struct kvm_vcpu *vcpu, ulong 
pa_start, ulong pa_end)
}
 }
 
-struct kvmppc_pte *kvmppc_mmu_find_pte(struct kvm_vcpu *vcpu, u64 ea, bool 
data)
-{
-   int i;
-   u64 guest_vp;
-
-   guest_vp = vcpu-arch.mmu.ea_to_vp(vcpu, ea, false);
-   for (i=0; ivcpu-arch.hpte_cache_offset; i++) {
-   struct hpte_cache *pte;
-
-   pte = vcpu-arch.hpte_cache[i];
-   if (!pte-host_va)
-   continue;
-
-   if (pte-pte.vpage == guest_vp)
-   return pte-pte;
-   }
-
-   return NULL;
-}
-
 static int kvmppc_mmu_hpte_cache_next(struct kvm_vcpu *vcpu)
 {
if (vcpu-arch.hpte_cache_offset == HPTEG_CACHE_NUM)
diff --git a/arch/powerpc/kvm/book3s_64_mmu_host.c 
b/arch/powerpc/kvm/book3s_64_mmu_host.c
index 5d0e281..4ccdde1 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_host.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_host.c
@@ -133,26 +133,6 @@ void kvmppc_mmu_pte_pflush(struct kvm_vcpu *vcpu, ulong 
pa_start, ulong pa_end)
}
 }
 
-struct kvmppc_pte *kvmppc_mmu_find_pte(struct kvm_vcpu *vcpu, u64 ea, bool 
data)
-{
-   int i;
-   u64 guest_vp;
-
-   guest_vp = vcpu-arch.mmu.ea_to_vp(vcpu, ea, false);
-   for (i=0; ivcpu-arch.hpte_cache_offset; i++) {
-   struct hpte_cache *pte;
-
-   pte = vcpu-arch.hpte_cache[i];
-   if (!pte-host_va)
-   continue;
-
-   if (pte-pte.vpage == guest_vp)
-   return pte-pte;
-   }
-
-   return NULL;
-}
-
 static int kvmppc_mmu_hpte_cache_next(struct kvm_vcpu *vcpu)
 {
if (vcpu-arch.hpte_cache_offset == HPTEG_CACHE_NUM)
-- 
1.6.0.2

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 1/2] KVM: PPC: Add generic hpte management functions

2010-06-21 Thread Alexander Graf

Currently the shadow paging code keeps an array of entries it knows about.
Whenever the guest invalidates an entry, we loop through that entry,
trying to invalidate matching parts.

While this is a really simple implementation, it is probably the most
ineffective one possible. So instead, let's keep an array of lists around
that are indexed by a hash. This way each PTE can be added by 4 list_add,
removed by 4 list_del invocations and the search only needs to loop through
entries that share the same hash.

This patch implements said lookup and exports generic functions that both
the 32-bit and 64-bit backend can use.

Signed-off-by: Alexander Graf ag...@suse.de
---
 arch/powerpc/kvm/book3s_mmu_hpte.c |  287 
 1 files changed, 287 insertions(+), 0 deletions(-)
 create mode 100644 arch/powerpc/kvm/book3s_mmu_hpte.c

diff --git a/arch/powerpc/kvm/book3s_mmu_hpte.c 
b/arch/powerpc/kvm/book3s_mmu_hpte.c
new file mode 100644
index 000..8ee0f1e
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_mmu_hpte.c
@@ -0,0 +1,287 @@
+/*
+ * Copyright (C) 2010 SUSE Linux Products GmbH. All rights reserved.
+ *
+ * Authors:
+ * Alexander Graf ag...@suse.de
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include linux/kvm_host.h
+#include linux/hash.h
+#include linux/slab.h
+
+#include asm/kvm_ppc.h
+#include asm/kvm_book3s.h
+#include asm/machdep.h
+#include asm/mmu_context.h
+#include asm/hw_irq.h
+
+#define PTE_SIZE   12
+
+/* #define DEBUG_MMU */
+/* #define DEBUG_SLB */
+
+#ifdef DEBUG_MMU
+#define dprintk_mmu(a, ...) printk(KERN_INFO a, __VA_ARGS__)
+#else
+#define dprintk_mmu(a, ...) do { } while(0)
+#endif
+
+#ifdef DEBUG_SLB
+#define dprintk_slb(a, ...) printk(KERN_INFO a, __VA_ARGS__)
+#else
+#define dprintk_slb(a, ...) do { } while(0)
+#endif
+
+static inline u64 kvmppc_mmu_hash_pte(u64 eaddr) {
+   return hash_64(eaddr  PTE_SIZE, HPTEG_HASH_BITS);
+}
+
+static inline u64 kvmppc_mmu_hash_vpte(u64 vpage) {
+   return hash_64(vpage  0xfULL, HPTEG_HASH_BITS);
+}
+
+static inline u64 kvmppc_mmu_hash_vpte_long(u64 vpage) {
+   return hash_64((vpage  0xff000ULL)  12, HPTEG_HASH_BITS);
+}
+
+void kvmppc_mmu_hpte_cache_map(struct kvm_vcpu *vcpu, struct hpte_cache *pte)
+{
+   u64 index;
+
+   /* Add to ePTE list */
+   index = kvmppc_mmu_hash_pte(pte-pte.eaddr);
+   list_add(pte-list_pte, vcpu-arch.hpte_hash_pte[index]);
+
+   /* Add to vPTE list */
+   index = kvmppc_mmu_hash_vpte(pte-pte.vpage);
+   list_add(pte-list_vpte, vcpu-arch.hpte_hash_vpte[index]);
+
+   /* Add to vPTE_long list */
+   index = kvmppc_mmu_hash_vpte_long(pte-pte.vpage);
+   list_add(pte-list_vpte_long, vcpu-arch.hpte_hash_vpte_long[index]);
+
+   /* Add to all list */
+   list_add(pte-list_all, vcpu-arch.hpte_all);
+}
+
+static void invalidate_pte(struct kvm_vcpu *vcpu, struct hpte_cache *pte)
+{
+   dprintk_mmu(KVM: Flushing SPT: 0x%lx (0x%llx) - 0x%llx\n,
+   pte-pte.eaddr, pte-pte.vpage, pte-host_va);
+
+   /* Different for 32 and 64 bit */
+   kvmppc_mmu_invalidate_pte(vcpu, pte);
+
+   if (pte-pte.may_write)
+   kvm_release_pfn_dirty(pte-pfn);
+   else
+   kvm_release_pfn_clean(pte-pfn);
+
+   list_del(pte-list_pte);
+   list_del(pte-list_vpte);
+   list_del(pte-list_vpte_long);
+   list_del(pte-list_all);
+
+   kmem_cache_free(vcpu-arch.hpte_cache, pte);
+}
+
+static void kvmppc_mmu_pte_flush_all(struct kvm_vcpu *vcpu)
+{
+   struct hpte_cache *pte, *tmp;
+
+   list_for_each_entry_safe(pte, tmp, vcpu-arch.hpte_all, list_all) {
+   /* Jump over the helper entry */
+   if (pte-list_all == vcpu-arch.hpte_all)
+   continue;
+
+   invalidate_pte(vcpu, pte);
+   }
+}
+
+void kvmppc_mmu_pte_flush(struct kvm_vcpu *vcpu, ulong guest_ea, ulong ea_mask)
+{
+   u64 i;
+
+   dprintk_mmu(KVM: Flushing %d Shadow PTEs: 0x%lx  0x%lx\n,
+   vcpu-arch.hpte_cache_count, guest_ea, ea_mask);
+
+   switch (ea_mask) {
+   case ~0xfffUL:
+   {
+   struct list_head *list;
+   struct hpte_cache *pte, *tmp;
+
+   /* Find the list of entries in the map */
+   list =

[PATCH 2/2] KVM: PPC: Make use of hash based Shadow MMU

2010-06-21 Thread Alexander Graf

We just introduced generic functions to handle shadow pages on PPC.
This patch makes the respective backends make use of them, getting
rid of a lot of duplicate code along the way.

Signed-off-by: Alexander Graf ag...@suse.de
---
 arch/powerpc/include/asm/kvm_book3s.h |7 ++
 arch/powerpc/include/asm/kvm_host.h   |   16 -
 arch/powerpc/kvm/Makefile |2 +
 arch/powerpc/kvm/book3s_32_mmu_host.c |  104 +++-
 arch/powerpc/kvm/book3s_64_mmu_host.c |   98 ++
 5 files changed, 39 insertions(+), 188 deletions(-)

diff --git a/arch/powerpc/include/asm/kvm_book3s.h 
b/arch/powerpc/include/asm/kvm_book3s.h
index 4e99559..a96e405 100644
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -115,6 +115,13 @@ extern void kvmppc_mmu_book3s_32_init(struct kvm_vcpu 
*vcpu);
 extern int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte);
 extern int kvmppc_mmu_map_segment(struct kvm_vcpu *vcpu, ulong eaddr);
 extern void kvmppc_mmu_flush_segments(struct kvm_vcpu *vcpu);
+
+extern void kvmppc_mmu_hpte_cache_map(struct kvm_vcpu *vcpu, struct hpte_cache 
*pte);
+extern struct hpte_cache *kvmppc_mmu_hpte_cache_next(struct kvm_vcpu *vcpu);
+extern void kvmppc_mmu_hpte_destroy(struct kvm_vcpu *vcpu);
+extern int kvmppc_mmu_hpte_init(struct kvm_vcpu *vcpu);
+extern void kvmppc_mmu_invalidate_pte(struct kvm_vcpu *vcpu, struct hpte_cache 
*pte);
+
 extern int kvmppc_ld(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, 
bool data);
 extern int kvmppc_st(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, 
bool data);
 extern void kvmppc_book3s_queue_irqprio(struct kvm_vcpu *vcpu, unsigned int 
vec);
diff --git a/arch/powerpc/include/asm/kvm_host.h 
b/arch/powerpc/include/asm/kvm_host.h
index 0c9ad86..0e3fc82 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -38,7 +38,9 @@
 #define KVM_NR_PAGE_SIZES  1
 #define KVM_PAGES_PER_HPAGE(x) (1UL31)
 
-#define HPTEG_CACHE_NUM 1024
+#define HPTEG_CACHE_NUM(1  15)
+#define HPTEG_HASH_BITS13
+#define HPTEG_HASH_NUM (1  HPTEG_HASH_BITS)
 
 struct kvm;
 struct kvm_run;
@@ -151,6 +153,10 @@ struct kvmppc_mmu {
 };
 
 struct hpte_cache {
+   struct list_head list_all;
+   struct list_head list_pte;
+   struct list_head list_vpte;
+   struct list_head list_vpte_long;
u64 host_va;
u64 pfn;
ulong slot;
@@ -282,8 +288,12 @@ struct kvm_vcpu_arch {
unsigned long pending_exceptions;
 
 #ifdef CONFIG_PPC_BOOK3S
-   struct hpte_cache hpte_cache[HPTEG_CACHE_NUM];
-   int hpte_cache_offset;
+   struct kmem_cache *hpte_cache;
+   struct list_head hpte_hash_pte[HPTEG_HASH_NUM];
+   struct list_head hpte_hash_vpte[HPTEG_HASH_NUM];
+   struct list_head hpte_hash_vpte_long[HPTEG_HASH_NUM];
+   struct list_head hpte_all;
+   int hpte_cache_count;
 #endif
 };
 
diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile
index ff43606..d45c818 100644
--- a/arch/powerpc/kvm/Makefile
+++ b/arch/powerpc/kvm/Makefile
@@ -45,6 +45,7 @@ kvm-book3s_64-objs := \
book3s.o \
book3s_emulate.o \
book3s_interrupts.o \
+   book3s_mmu_hpte.o \
book3s_64_mmu_host.o \
book3s_64_mmu.o \
book3s_32_mmu.o
@@ -57,6 +58,7 @@ kvm-book3s_32-objs := \
book3s.o \
book3s_emulate.o \
book3s_interrupts.o \
+   book3s_mmu_hpte.o \
book3s_32_mmu_host.o \
book3s_32_mmu.o
 kvm-objs-$(CONFIG_KVM_BOOK3S_32) := $(kvm-book3s_32-objs)
diff --git a/arch/powerpc/kvm/book3s_32_mmu_host.c 
b/arch/powerpc/kvm/book3s_32_mmu_host.c
index 904f5ac..0b51ef8 100644
--- a/arch/powerpc/kvm/book3s_32_mmu_host.c
+++ b/arch/powerpc/kvm/book3s_32_mmu_host.c
@@ -58,105 +58,19 @@
 static ulong htab;
 static u32 htabmask;
 
-static void invalidate_pte(struct kvm_vcpu *vcpu, struct hpte_cache *pte)
+void kvmppc_mmu_invalidate_pte(struct kvm_vcpu *vcpu, struct hpte_cache *pte)
 {
volatile u32 *pteg;
 
-   dprintk_mmu(KVM: Flushing SPTE: 0x%llx (0x%llx) - 0x%llx\n,
-   pte-pte.eaddr, pte-pte.vpage, pte-host_va);
-
+   /* Remove from host HTAB */
pteg = (u32*)pte-slot;
-
pteg[0] = 0;
+
+   /* And make sure it's gone from the TLB too */
asm volatile (sync);
asm volatile (tlbie %0 : : r (pte-pte.eaddr) : memory);
asm volatile (sync);
asm volatile (tlbsync);
-
-   pte-host_va = 0;
-
-   if (pte-pte.may_write)
-   kvm_release_pfn_dirty(pte-pfn);
-   else
-   kvm_release_pfn_clean(pte-pfn);
-}
-
-void kvmppc_mmu_pte_flush(struct kvm_vcpu *vcpu, ulong guest_ea, ulong ea_mask)
-{
-   int i;
-
-   dprintk_mmu(KVM: Flushing %d Shadow PTEs: 0x%x  0x%x\n,
-   vcpu-arch.hpte_cache_offset, guest_ea, ea_mask);
-

[RFC] Getting specific device from qdev structs

2010-06-21 Thread Eduard - Gabriel Munteanu

Hi,

I'm working on implementing AMD IOMMU emulation in QEMU/KVM and I'm also
creating an API for address translation and access checking. Ideally,
this API should work with different kinds of devices and IOMMUs. These
operations would typically require specific device information to figure
out which IOMMU is responsible and how it refers to the actual device
(bus-device-function number for example).

At the same time, I need to get this from deep within AIO/DMA code, so
adding specific members in those structures doesn't seem to be the best
way.

So I've been looking for a way to obtain things like a PCIDevice from a
more generic structure (say from hw/qdev.h), e.g. DeviceInfo. Is there
something like that already implemented? My searches turned up nothing.

If not, perhaps something like this would be acceptable?

enum DeviceType {
DEV_TYPE_PCI,
DEV_TYPE_ISA,
[...]
};

struct GenericDevice {
enum DeviceType type;
union {
PCIDevice *pci_dev;
ISADevice *isa_dev;
[...]
};
}; /* 
* Embed this in DeviceState for example. Make it
* somehow accesible from AIO/DMA code.
*/

Or some container_of() / DO_UPCAST() magic might do:

struct GenericDevice {
enum DeviceType type;
DeviceState qdev;
}; /* Embed this in PCIDevice and pass a pointer to GenericDevice around. */

struct PCIDevice {
GenericDevice gdev;
[...]
}

int iommu_translate(struct GenericDevice *dev, [other args])
{
PCIDevice *pci_dev;
ISADevice *isa_dev;

switch (dev-type) {
case DEV_TYPE_PCI:
pci_dev = container_of(dev, PCIDevice, gdev);
return iommu_pci_translate(pci_dev, [other args]);
case DEV_TYPE_ISA:
isa_dev = container_of(dev, ISADevice, gdev);
return iommu_pci_translate(isa_dev, [other args]);
[...]
default:
break;
}

[sensible default]
return 0;
}

Note we can't actually do any container_of() magic without recording the
type of the container structure somewhere.

What do you think? I'd appreciate some help here. Perhaps there are
other (simpler) ways I didn't think of.


Thanks,
Eduard

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH V2 3/5] ara virt interface of perf to support kvm guest os statistics collection in guest os

2010-06-21 Thread Gleb Natapov

On Mon, Jun 21, 2010 at 05:31:43PM +0800, Zhang, Yanmin wrote:
 The 3rd patch is to implement para virt perf at host kernel.
 
 Signed-off-by: Zhang Yanmin yanmin_zh...@linux.intel.com
 
 ---
 
 --- linux-2.6_tip0620/arch/x86/include/asm/kvm_para.h 2010-06-21 
 15:19:38.992999849 +0800
 +++ linux-2.6_tip0620perfkvm/arch/x86/include/asm/kvm_para.h  2010-06-21 
 15:21:39.308999849 +0800
 @@ -2,6 +2,7 @@
  #define _ASM_X86_KVM_PARA_H
  
  #include linux/types.h
 +#include linux/list.h
  #include asm/hyperv.h
  
  /* This CPUID returns the signature 'KVMKVMKVM' in ebx, ecx, and edx.  It
 @@ -19,7 +20,8 @@
  /* This indicates that the new set of kvmclock msrs
   * are available. The use of 0x11 and 0x12 is deprecated
   */
 -#define KVM_FEATURE_CLOCKSOURCE23
 +#define KVM_FEATURE_CLOCKSOURCE2 3
 +#define KVM_FEATURE_PV_PERF  4
  
  /* The last 8 bits are used to indicate how to interpret the flags field
   * in pvclock structure. If no bits are set, all flags are ignored.
 @@ -33,7 +35,14 @@
  #define MSR_KVM_WALL_CLOCK_NEW  0x4b564d00
  #define MSR_KVM_SYSTEM_TIME_NEW 0x4b564d01
  
 -#define KVM_MAX_MMU_OP_BATCH   32
 +#define KVM_MAX_MMU_OP_BATCH 32
 +
 +/* Operations for KVM_PERF_OP */
 +#define KVM_PERF_OP_OPEN 1
 +#define KVM_PERF_OP_CLOSE2
 +#define KVM_PERF_OP_ENABLE   3
 +#define KVM_PERF_OP_DISABLE  4
 +#define KVM_PERF_OP_READ 5
  
  /* Operations for KVM_HC_MMU_OP */
  #define KVM_MMU_OP_WRITE_PTE1
 @@ -64,6 +73,85 @@ struct kvm_mmu_op_release_pt {
  #ifdef __KERNEL__
  #include asm/processor.h
  
 +/*
 + * data communication area about perf_event between
 + * Host kernel and guest kernel
 + */
 +struct guest_perf_event {
 + u64 count;
 + atomic_t overflows;
 +};
 +
 +/*
 + * In host kernel, perf_event-host_perf_shadow points to
 + * host_perf_shadow which records some information
 + * about the guest.
 + */
 +struct host_perf_shadow {
 + /* guest perf_event id passed from guest os */
 + int id;
 + /*
 +  * Host kernel saves data into data member counter firstly.
 +  * kvm will get data from this counter and calls kvm functions
 +  * to copy or add data back to guets os before entering guest os
 +  * next time
 +  */
 + struct guest_perf_event counter;
 + /* guest_event_addr is gpa_t pointing to guest os guest_perf_event*/
 + __u64 guest_event_addr;
 +
 + /*
 +  * Link to  of kvm.kvm_arch.shadow_hash_table
 +  */
 + struct list_head shadow_entry;
 + struct kvm_vcpu *vcpu;
 +
 + struct perf_event *host_event;
 + /*
 +  * Below counter is to prevent malicious guest os to try to
 +  * close/enable event at the same time.
 +  */
 + atomic_t ref_counter;
 +};
 +
 +/*
 + * In guest kernel, perf_event-guest_shadow points to
 + * guest_perf_shadow which records some information
 + * about the guest.
 + */
 +struct guest_perf_shadow {
 + /* guest perf_event id passed from guest os */
 + int id;
 + /*
 +  * Host kernel kvm saves data into data member counter
 +  */
 + struct guest_perf_event counter;
 +};
 +
 +/*
 + * guest_perf_attr is used when guest calls hypercall to
 + * open a new perf_event at host side. Mostly, it's a copy of
 + * perf_event_attr and deletes something not used by host kernel.
 + */
 +struct guest_perf_attr {
 + __u32   type;
 + __u64   config;
 + __u64   sample_period;
 + __u64   sample_type;
 + __u64   read_format;
 + __u64   flags;
 + __u32   bp_type;
 + __u64   bp_addr;
 + __u64   bp_len;
 +};
 +
 +struct guest_perf_event_param {
 + __u64 attr_addr;
 + __u64 guest_event_addr;
 + /* In case there is an alignment issue, we put id as the last one */
 + int id;
 +};
 +
  extern void kvmclock_init(void);
  
  
 --- linux-2.6_tip0620/arch/x86/include/asm/kvm_host.h 2010-06-21 
 15:19:39.01849 +0800
 +++ linux-2.6_tip0620perfkvm/arch/x86/include/asm/kvm_host.h  2010-06-21 
 15:21:39.308999849 +0800
 @@ -24,6 +24,7 @@
  #include asm/desc.h
  #include asm/mtrr.h
  #include asm/msr-index.h
 +#include asm/perf_event.h
  
  #define KVM_MAX_VCPUS 64
  #define KVM_MEMORY_SLOTS 32
 @@ -360,6 +361,18 @@ struct kvm_vcpu_arch {
  
   /* fields used by HYPER-V emulation */
   u64 hv_vapic;
 +
 + /*
 +  * Fields used by PARAVIRT perf interface:
 +  *
 +  * kvm checks overflow_events before entering guest os,
 +  * and copy data back to guest os.
 +  * event_mutex is to avoid a race between NMI perf event overflow
 +  * handler, event close, and enable/disable.
 +  */
 + struct mutex event_mutex;
 + int overflows;
 + struct perf_event *overflow_events[X86_PMC_IDX_MAX];
  };
  
  struct kvm_mem_alias {
 @@ -377,6 +390,9 @@ struct

Re: [Qemu-devel] [RFC] Getting specific device from qdev structs

2010-06-21 Thread Paul Brook

 So I've been looking for a way to obtain things like a PCIDevice from a
 more generic structure (say from hw/qdev.h),

If you're having to figure out what kind of a device you have then I think 
you're already doing something else wrong. I'd expect the bits of code that 
needs to identify devices to be inherently bus specific.

If you've got some sort of on-cpu IOMMU which is asking did this come from an 
ISA device, or a PCI device?, then I suspect you've got your abstraction 
layers wrong. This should already have been handled by the pci/isa to cpu 
bridge.

Paul
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [RFC] High availability in KVM

2010-06-21 Thread Luiz Capitulino

On Thu, 17 Jun 2010 12:15:20 +0900
Fernando Luis Vazquez Cao ferna...@oss.ntt.co.jp wrote:

   * qemu-kvm
 
   Currently KVM is only notified about memory errors detected by the
   MCE subsystem. When running on newer x86 hardware, if MCE detects an
   error on user-space it signals the corresponding process with
   SIGBUS. Qemu, upon receiving the signal, checks the problematic
   address which the kernel stored in siginfo and decides whether to
   inject the MCE to the virtual machine.
 
   An obvious limitation is that we would like to be notified about
   other types of error too and, as suggested before, a file-based
   interface that can be sys_poll'ed might be needed for that.  
 
   On a different note, in a HA environment the qemu policy described
   above is not adequate; when a notification of a hardware error that
   our policy determines to be serious arrives the first thing we want
   to do is to put the virtual machine in a quiesced state to avoid
   further wreckage. If we injected the error into the guest we would
   risk a guest panic that might detectable only by polling or, worse,
   being killed by the kernel, which means that postmortem analysis of
   the guest is not possible. Once we had the guests in a quiesced
   state, where all the buffers have been flushed and the hardware
   sources released, we would have two modes of operation that can be
   used together and complement each other.
 
 - Proactive: A qmp event describing the error (severity, topology,
   etc) is emitted. The HA software would have to register to
   receive hardware error events, possibly using the libvirt
   bindings. Upon receiving the event the HA software would know
   that the guest is in a failover-safe quiesced state so it could
   do without fencing and proceed to the failover stage directly.

This seems to match the BLOCK_IO_ERROR event we have today: when a disk error
happens, an event is emitted and the virtual machine can be automatically
stopped (there's a configuration option for this).

On the other hand, there's a number of ways to do this differently. I think
the first thing to do is to agree on what qemu's behavior is going to be, then
we decide how to expose this info to qmp clients.

 - Passive: Polling resource agents that need to check the state of
   the guest generally use libvirt or a wrapper such as virsh. When
   the state is SHUTOFF or CRASHED the resource agent proceeds to
   the facing stage, which might be expensive and usually involves
   killing the qemu process. We propose adding a new state that
   indicates the failover-safe state described before. In this
   state the HA software would not need to use fencing techniques
   and since the qemu process is not killed postmortem analysis of
   the virtual machine is still possible.

It wouldn't be polling, I guess. We already have events for most state changes.
So, when the machine stops, reboots, etc.. the client would be notified and
then it could inspect the virtual machine by using query commands.

This method would be preferable in case we also want this information available
in the user Monitor and/or if the event gets too messy because of the amount of
information we want to put in it.
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: Regarding NAT configuration with KVM

2010-06-21 Thread Charles Duffy

This is the libvirt default network configuration. Please see the 
libvirt documentation and mailing list for support.


As an aside -- you'll probably want to use a bridged configuration 
rather than the NATted one. If you really do want to stick with the NAT 
configuration, you'll need to make sure the routing tables on the other 
hosts guide packets destined for 192.168.122.0/24 back to the VM host.


--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [Qemu-devel] [RFC] Getting specific device from qdev structs

2010-06-21 Thread Eduard - Gabriel Munteanu

On Mon, Jun 21, 2010 at 03:07:13PM +0100, Paul Brook wrote:
  So I've been looking for a way to obtain things like a PCIDevice from a
  more generic structure (say from hw/qdev.h),
 
 If you're having to figure out what kind of a device you have then I think 
 you're already doing something else wrong. I'd expect the bits of code that 
 needs to identify devices to be inherently bus specific.
 
 If you've got some sort of on-cpu IOMMU which is asking did this come from 
 an 
 ISA device, or a PCI device?, then I suspect you've got your abstraction 
 layers wrong. This should already have been handled by the pci/isa to cpu 
 bridge.
 
 Paul

Hi,

Thanks for your reply. This isn't about a specific IOMMU. Let me
describe the situation better:

1. I'm implementing the AMD IOMMU, which is a PCI IOMMU (not in the CPU).
2. Devices need address translation and checking through this IOMMU.
3. But in the future there might be other IOMMU implementations,
possibly for other bus types.

Yes, I could (and have already done to test my code) modify device code
to ask the AMD IOMMU for translation. But we have stuff like AIO, which
isn't really bus-specific and would result in spagetti code if I add
PCI-specific stuff, then somebody else does the same for other buses and
so on. Moreover, even for PCI, it isn't really straightforward to obtain
the bus-device-function number required to do translation from AIO code
(e.g. I needed to add the devfn or a pointer to the actual PCIDevice to
BMDMAState to get it working for PIIX).

So I considered providing a generic IOMMU translation/checking API that
could be used by all devices and all IOMMUs. Generally getting the
{PCI,ISA,Whatever}Device should be enough, I think.

If the IOMMU can't handle that specific bus, that's no problem, we can
have generic code do identity mapping without any access checking. If
somebody comes along and wants to implement another IOMMU emulation, all
he needs is to provide implementations for those functions.


Eduard

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Question regarding KVM networking

2010-06-21 Thread Kangkook Jee

hi, all 

I have a question regarding KVM's networking stack implementation. 

Does KVM hook into NetFilter to intercept packets destined for the guest, or 
PF_PACKET?

Thanks for your help, in advance

Regard, Kangkook--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [Qemu-devel] [RFC] Getting specific device from qdev structs

2010-06-21 Thread Paul Brook

 Thanks for your reply. This isn't about a specific IOMMU. Let me
 describe the situation better:
 
 1. I'm implementing the AMD IOMMU, which is a PCI IOMMU (not in the CPU).
 2. Devices need address translation and checking through this IOMMU.
 3. But in the future there might be other IOMMU implementations,
 possibly for other bus types.
 
 Yes, I could (and have already done to test my code) modify device code
 to ask the AMD IOMMU for translation. But we have stuff like AIO, which
 isn't really bus-specific and would result in spagetti code if I add
 PCI-specific stuff, then somebody else does the same for other buses and
 so on. Moreover, even for PCI, it isn't really straightforward to obtain
 the bus-device-function number required to do translation from AIO code
 (e.g. I needed to add the devfn or a pointer to the actual PCIDevice to
 BMDMAState to get it working for PIIX).

A bus-device-function number is inherently PCI specific.
 
 So I considered providing a generic IOMMU translation/checking API that
 could be used by all devices and all IOMMUs. Generally getting the
 {PCI,ISA,Whatever}Device should be enough, I think.

 If the IOMMU can't handle that specific bus, that's no problem, we can
 have generic code do identity mapping without any access checking. If
 somebody comes along and wants to implement another IOMMU emulation, all
 he needs is to provide implementations for those functions.

The actual code to handle address remapping can be bus agnostic. The  code to 
create the mappings is inherently bus specific. i.e. the generic code needs to 
ask the bus bridge how do I translate this access onto your parent bus.

For example, consider a PCI bridge (Device A) with an IOMMU. On that PCI bus 
resides a PCI-ISA bridge (Device B) that also has an IOMMU. Device C is a bus-
master ISA device[1].

Accesses from device C cause the memory mapping code to walk down the bus 
structure. First the ISA IOMMU translates that into an access from device B. 
Then the PCI IOMMU translates this into a system bus access from device A.

The code to determine each of these mappings is inherently bus specific. That 
code trivially knows how to access bus-specific information from its devices. 
However the framework used to chain these mappings and perform the actual 
transfer should be bus agnostic.

While the IOMMU actually resides in the host bridge, it probably makes most 
sense to associate it with the bus itself. When the host device creates the 
bus it can also create the IOMMU. This should handle both explicit (PCI) and 
implicit (SBUS) slave-side bus interfaces.

Paul

[1] I don't think ISA supports bus-master devices, but ignore that for now.
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: Question regarding KVM networking

2010-06-21 Thread Charles Duffy


On 06/21/2010 09:51 AM, Kangkook Jee wrote:

Does KVM hook into NetFilter to intercept packets destined for the guest, or 
PF_PACKET?


None of the above.

With -net user, KVM uses techniques derived from SLiRP to simulate a 
network stack with only traditional userspace UNIX socket calls. With 
-net tap, it uses the standard ethertap interface. -net socket and -net 
dump likewise do nothing unconventional.


--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH] Search the LAPIC's for one that will accept a PIC interrupt.

2010-06-21 Thread Chris Lalancette

Older versions of 32-bit linux have a Checking 'hlt' instruction
test where they repeatedly call the 'hlt' instruction, and then
expect a timer interrupt to kick the CPU out of halt.  This happens
before any LAPIC or IOAPIC setup happens, which means that all of
the APIC's are in virtual wire mode at this point.  Unfortunately,
the current implementation of virtual wire mode is hardcoded to
only kick the BSP, so if a crash+kexec occurs on a different
vcpu, it will never get kicked.

This patch makes pic_unlock() do the equivalent of
kvm_irq_delivery_to_apic() for the IOAPIC code.  That is, it runs
through all of the vcpus looking for one that is in virtual wire
mode.  In the normal case where LAPICs and IOAPICs are configured,
this won't be used at all.  In the bootstrap phase of a modern
OS, before the LAPICs and IOAPICs are configured, this will have
exactly the same behavior as today; VCPU0 is always looked at
first, so it will always get out of the loop after the first
iteration.  This will only go through the loop more than once
during a kexec/kdump, in which case it will only do it a few times
until the kexec'ed kernel programs the LAPIC and IOAPIC.

Signed-off-by: Chris Lalancette clala...@redhat.com
---
 arch/x86/kvm/i8259.c |   17 +
 1 files changed, 13 insertions(+), 4 deletions(-)

diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c
index 2c73f44..85ecabc 100644
--- a/arch/x86/kvm/i8259.c
+++ b/arch/x86/kvm/i8259.c
@@ -44,16 +44,25 @@ static void pic_unlock(struct kvm_pic *s)
__releases(s-lock)
 {
bool wakeup = s-wakeup_needed;
-   struct kvm_vcpu *vcpu;
+   struct kvm_vcpu *vcpu, *found = NULL;
+   int i;
 
s-wakeup_needed = false;
 
raw_spin_unlock(s-lock);
 
if (wakeup) {
-   vcpu = s-kvm-bsp_vcpu;
-   if (vcpu)
-   kvm_vcpu_kick(vcpu);
+   kvm_for_each_vcpu(i, vcpu, s-kvm) {
+   if (kvm_apic_accept_pic_intr(vcpu)) {
+   found = vcpu;
+   break;
+   }
+   }
+
+   if (!found)
+   found = s-kvm-bsp_vcpu;
+
+   kvm_vcpu_kick(found);
}
 }
 
-- 
1.6.6.1

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[ kvm-Bugs-1899961 ] NIC not working properly with WS2008 RC1 x64

2010-06-21 Thread SourceForge.net

Bugs item #1899961, was opened at 2008-02-22 22:19
Message generated for change (Comment added) made by jessorensen
You can respond by visiting: 
https://sourceforge.net/tracker/?func=detailatid=893831aid=1899961group_id=180599

Please note that this message will contain a full copy of the comment thread,
including the initial issue submission, for this request,
not just the latest update.
Category: None
Group: None
Status: Closed
Resolution: Fixed
Priority: 5
Private: No
Submitted By: Daniel (danielhs)
Assigned to: Nobody/Anonymous (nobody)
Summary: NIC not working properly with WS2008 RC1 x64

Initial Comment:
Using KVM-60 with Windows Server 2008 x64, the NIC is not working properly.

Can't do any kind of network activities.

The windows network monitor indicates that a lot of data has been received, but 
none sent.

Link to windows server 2008 rc1 standard edition

http://www.microsoft.com/downloads/details.aspx?FamilyId=B8144EBA-9EFD-475F-9DD3-A264A00BF5A1displaylang=en

I'm using the install from Ubuntu Hardy.

I've had no problems networking using Windows XP x64 under same KVM/Ubuntu 
install.

--

Comment By: Jes Sorensen (jessorensen)
Date: 2010-06-21 17:43

Message:
Tested with 2008 x64 R1 in here using the e1000 driver - it all seems to
work now.

closing


--

Comment By: Daniel (danielhs)
Date: 2008-03-12 15:58

Message:
Logged In: YES 
user_id=1609821
Originator: YES

Yes.  I believe that at the time I *was* using KVM-60 at the time.  But,
as I said before, all I did was use the install that was on the Ubuntu
Hardy repository.  So I'm not sure which version it was.  I think it was
kvm-60.

Either way, I'm not using that version at this point.

Daniel

--

Comment By: Technologov (technologov)
Date: 2008-03-12 14:41

Message:
Logged In: YES 
user_id=1839746
Originator: NO

Unreproducible. I have it all working.

Are you sure you're working on KVM-60?

Try:
# modinfo kvm-intel

-Technologov

--

Comment By: Daniel (danielhs)
Date: 2008-02-23 22:54

Message:
Logged In: YES 
user_id=1609821
Originator: YES

Looks like new e1000 driver in kvm-61 fixes this problem entirely.

Might want to add a note or something like that to make others aware

--

You can respond by visiting: 
https://sourceforge.net/tracker/?func=detailatid=893831aid=1899961group_id=180599
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 1/2] Add 'serial' attribute to virtio-blk devices

2010-06-21 Thread Ryan Harper

* john cooper john.coo...@redhat.com [2010-06-21 01:11]:
 Rusty Russell wrote:
  On Sat, 19 Jun 2010 04:08:02 am Ryan Harper wrote:
  Create a new attribute for virtio-blk devices that will fetch the serial 
  number
  of the block device.  This attribute can be used by udev to create 
  disk/by-id
  symlinks for devices that don't have a UUID (filesystem) associated with 
  them.
 
  ATA_IDENTIFY strings are special in that they can be up to 20 chars long
  and aren't required to be NULL-terminated.  The buffer is also zero-padded
  meaning that if the serial is 19 chars or less that we get a NULL 
  terminated
  string.  When copying this value into a string buffer, we must be careful 
  to
  copy up to the NULL (if it present) and only 20 if it is longer and not to
  attempt to NULL terminate; this isn't needed.
 
  Signed-off-by: Ryan Harper ry...@us.ibm.com
  Signed-off-by: john cooper john.coo...@redhat.com
  ---
   drivers/block/virtio_blk.c |   32 
   1 files changed, 32 insertions(+), 0 deletions(-)
 
  diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
  index 258bc2a..f1ef26f 100644
  --- a/drivers/block/virtio_blk.c
  +++ b/drivers/block/virtio_blk.c
  @@ -281,6 +281,31 @@ static int index_to_minor(int index)
 return index  PART_BITS;
   }
   
  +/* Copy serial number from *s to *d.  Copy operation terminates on either
  + * encountering a nul in *s or after n bytes have been copied, whichever
  + * occurs first.  *d is not forcibly nul terminated.  Return # of bytes 
  copied.
  + */
  +static inline int serial_sysfs(char *d, char *s, int n)
  +{
  +  char *di = d;
  +
  +  while (*s  n--)
  +  *d++ = *s++;
  +  return d - di;
  +}
  +
  +static ssize_t virtblk_serial_show(struct device *dev,
  +  struct device_attribute *attr, char *buf)
  +{
  +  struct gendisk *disk = dev_to_disk(dev);
  +  char id_str[VIRTIO_BLK_ID_BYTES];
  +
  +  if (IS_ERR(virtblk_get_id(disk, id_str)))
  +  return 0;
  
  0?  Really?  That doesn't seem very informative.
 
 Propagating a prospective error from virtblk_get_id() should
 be possible.  Unsure if doing so is more useful from the
 user's perspective compared to just a nul id string.

I'm not sure we can do any thing else here; maybe printk a warning?

Documentation/filesystems/sysfs.txt says that showing attributes should
always return the number of chars put into the buffer; so when there is
an error; zero is the right value to return since we're not filling the
buffer.
 
  +  return serial_sysfs(buf, id_str, min(VIRTIO_BLK_ID_BYTES, PAGE_SIZE));
  
  How about something like this:
  
  BUILD_BUG_ON(PAGE_SIZE  VIRTIO_BLK_ID_BYTES + 1);
 
 Agreed, that's a better wrench in the gearworks.
 Note padding buf[] by 1 isn't necessary as indicated
 below.

Yep; that's a good one to take.

 
  /* id_str is not necessarily nul-terminated! */
  buf[VIRTIO_BLK_ID_BYTES] = '\0';
  return virtblk_get_id(disk, buf);
 
 The /sys file is rendered according to the length
 returned from this function and the trailing nul
 is not interpreted in this context.  In fact if a
 nul is added and included in the byte count of the
 string it will appear in the /sys file.

Yeah; I like the simplicity; but we do need to know how long the string
is so we can return that value. 

 
 Thanks,
 
 -john
 
 
 -- 
 john.coo...@redhat.com

-- 
Ryan Harper
Software Engineer; Linux Technology Center
IBM Corp., Austin, Tx
ry...@us.ibm.com
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [Qemu-devel] [PATCH 1/2] Add 'serial' attribute to virtio-blk devices

2010-06-21 Thread Ryan Harper

* Christoph Hellwig h...@lst.de [2010-06-21 07:46]:
 On Fri, Jun 18, 2010 at 01:38:02PM -0500, Ryan Harper wrote:
  Create a new attribute for virtio-blk devices that will fetch the serial 
  number
  of the block device.  This attribute can be used by udev to create 
  disk/by-id
  symlinks for devices that don't have a UUID (filesystem) associated with 
  them.
  
  ATA_IDENTIFY strings are special in that they can be up to 20 chars long
  and aren't required to be NULL-terminated.  The buffer is also zero-padded
  meaning that if the serial is 19 chars or less that we get a NULL terminated
  string.  When copying this value into a string buffer, we must be careful to
  copy up to the NULL (if it present) and only 20 if it is longer and not to
  attempt to NULL terminate; this isn't needed.
 
 Why is this virtio-blk specific?  In a later mail you mention you want
 to use it for udev.  So please export this from scsi/libata as well and
 we have one proper interface that we can use for all devices.

ATA and SCSI devices are already supported via ata_id and scsi_id
commands included in udev.  Qemu implements the drive serial part for
them and udev creates proper disk/by-id links.  This patch is about
filling the gap for virtio-blk devices which cannot work with ata_id and
scsi_id.


-- 
Ryan Harper
Software Engineer; Linux Technology Center
IBM Corp., Austin, Tx
ry...@us.ibm.com
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 1/2] Add 'serial' attribute to virtio-blk devices

2010-06-21 Thread john cooper

Ryan Harper wrote:
 * john cooper john.coo...@redhat.com [2010-06-21 01:11]:
 Rusty Russell wrote:
 On Sat, 19 Jun 2010 04:08:02 am Ryan Harper wrote:
 Create a new attribute for virtio-blk devices that will fetch the serial 
 number
 of the block device.  This attribute can be used by udev to create 
 disk/by-id
 symlinks for devices that don't have a UUID (filesystem) associated with 
 them.

 ATA_IDENTIFY strings are special in that they can be up to 20 chars long
 and aren't required to be NULL-terminated.  The buffer is also zero-padded
 meaning that if the serial is 19 chars or less that we get a NULL 
 terminated
 string.  When copying this value into a string buffer, we must be careful 
 to
 copy up to the NULL (if it present) and only 20 if it is longer and not to
 attempt to NULL terminate; this isn't needed.

 Signed-off-by: Ryan Harper ry...@us.ibm.com
 Signed-off-by: john cooper john.coo...@redhat.com
 ---
  drivers/block/virtio_blk.c |   32 
  1 files changed, 32 insertions(+), 0 deletions(-)

 diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
 index 258bc2a..f1ef26f 100644
 --- a/drivers/block/virtio_blk.c
 +++ b/drivers/block/virtio_blk.c
 @@ -281,6 +281,31 @@ static int index_to_minor(int index)
return index  PART_BITS;
  }
  
 +/* Copy serial number from *s to *d.  Copy operation terminates on either
 + * encountering a nul in *s or after n bytes have been copied, whichever
 + * occurs first.  *d is not forcibly nul terminated.  Return # of bytes 
 copied.
 + */
 +static inline int serial_sysfs(char *d, char *s, int n)
 +{
 +  char *di = d;
 +
 +  while (*s  n--)
 +  *d++ = *s++;
 +  return d - di;
 +}
 +
 +static ssize_t virtblk_serial_show(struct device *dev,
 +  struct device_attribute *attr, char *buf)
 +{
 +  struct gendisk *disk = dev_to_disk(dev);
 +  char id_str[VIRTIO_BLK_ID_BYTES];
 +
 +  if (IS_ERR(virtblk_get_id(disk, id_str)))
 +  return 0;
 0?  Really?  That doesn't seem very informative.
 Propagating a prospective error from virtblk_get_id() should
 be possible.  Unsure if doing so is more useful from the
 user's perspective compared to just a nul id string.
 
 I'm not sure we can do any thing else here; maybe printk a warning?
 
 Documentation/filesystems/sysfs.txt says that showing attributes should
 always return the number of chars put into the buffer; so when there is
 an error; zero is the right value to return since we're not filling the
 buffer.

So we return a nul string in the case the qemu user
didn't specify an id string and also in the case a
legacy qemu doesn't support retrieval of an id string.
Not too much difference and if needed going forward the
error return can be elaborated.

 /* id_str is not necessarily nul-terminated! */
 buf[VIRTIO_BLK_ID_BYTES] = '\0';
 return virtblk_get_id(disk, buf);
 The /sys file is rendered according to the length
 returned from this function and the trailing nul
 is not interpreted in this context.  In fact if a
 nul is added and included in the byte count of the
 string it will appear in the /sys file.
 
 Yeah; I like the simplicity; but we do need to know how long the string
 is so we can return that value. 

Which we're getting from serial_sysfs() without
having to accommodate an unused nul.  I'd hazard the
primary reason the sysfs calling code keys off a
return of byte count vs. traversing the string itself
is due to the called function almost always having the
byte count available.

-john

-- 
john.coo...@redhat.com
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 0/4] Preparation for PIC-APIC rewiring

2010-06-21 Thread Marcelo Tosatti


Looks good.

On Sun, Jun 20, 2010 at 07:21:54PM +0300, Avi Kivity wrote:
 These four patches were part of an optimization patchset I've been neglecting,
 but are equally useful as preparation for fixing the PIC-APIC wiring issues.
 
 Avi Kivity (4):
   KVM: i8259: reduce excessive abstraction for pic_irq_request()
   KVM: i8259: simplify pic_irq_request() calling sequence
   KVM: Add mini-API for vcpu-requests
   KVM: Reduce atomic operations on vcpu-requests
 
  arch/x86/kvm/i8259.c |   18 ++
  arch/x86/kvm/irq.h   |4 
  arch/x86/kvm/lapic.c |2 +-
  arch/x86/kvm/mmu.c   |6 +++---
  arch/x86/kvm/svm.c   |2 +-
  arch/x86/kvm/timer.c |2 +-
  arch/x86/kvm/vmx.c   |2 +-
  arch/x86/kvm/x86.c   |   27 +--
  include/linux/kvm_host.h |   20 
  virt/kvm/kvm_main.c  |4 ++--
  10 files changed, 48 insertions(+), 39 deletions(-)
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 2/2] KVM: Prevent internal slots from being COWed

2010-06-21 Thread Marcelo Tosatti

On Mon, Jun 21, 2010 at 11:18:13AM +0300, Avi Kivity wrote:
 If a process with a memory slot is COWed, the page will change its address
 (despite having an elevated reference count).  This breaks internal memory
 slots which have their physical addresses loaded into vmcs registers (see
 the APIC access memory slot).
 
 Signed-off-by: Avi Kivity a...@redhat.com
 ---
  arch/x86/kvm/x86.c |5 +
  1 files changed, 5 insertions(+), 0 deletions(-)
 
 diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
 index 33156a3..d9a33e6 100644
 --- a/arch/x86/kvm/x86.c
 +++ b/arch/x86/kvm/x86.c
 @@ -5633,6 +5633,11 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
   int user_alloc)
  {
   int npages = memslot-npages;
 + int map_flags = MAP_PRIVATE | MAP_ANONYMOUS;
 +
 + /* Prevent internal slot pages from being moved by fork()/COW. */
 + if (memslot-id = KVM_MEMORY_SLOTS)
 + map_flags = MAP_SHARED | MAP_ANONYMOUS;
  
   /*To keep backward compatibility with older userspace,
*x86 needs to hanlde !user_alloc case.

Forgot to use map_flags below.

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 1/2] KVM: Remove memory alias support

2010-06-21 Thread Marcelo Tosatti

On Mon, Jun 21, 2010 at 11:51:58AM +0300, Avi Kivity wrote:
 As advertised in feature-removal-schedule.txt.  Equivalent support is provided
 by overlapping memory regions.
 
 Signed-off-by: Avi Kivity a...@redhat.com
 ---
  Documentation/feature-removal-schedule.txt |   11 ---
  Documentation/kvm/api.txt  |   12 +---
  arch/ia64/kvm/kvm-ia64.c   |5 -
  arch/powerpc/kvm/powerpc.c |5 -
  arch/s390/kvm/kvm-s390.c   |5 -
  arch/x86/include/asm/kvm_host.h|   21 -
  arch/x86/kvm/mmu.c |   17 +---
  arch/x86/kvm/paging_tmpl.h |3 +-
  arch/x86/kvm/x86.c |  125 
 
  arch/x86/kvm/x86.h |7 --
  include/linux/kvm.h|1 +
  include/linux/kvm_host.h   |6 --
  virt/kvm/kvm_main.c|   18 +---
  13 files changed, 11 insertions(+), 225 deletions(-)
 

 diff --git a/include/linux/kvm.h b/include/linux/kvm.h
 index 6fd40f5..ea294e6 100644
 --- a/include/linux/kvm.h
 +++ b/include/linux/kvm.h
 @@ -619,6 +619,7 @@ struct kvm_clock_data {
   */
  #define KVM_CREATE_VCPU   _IO(KVMIO,   0x41)
  #define KVM_GET_DIRTY_LOG _IOW(KVMIO,  0x42, struct kvm_dirty_log)
 +/* KVM_SET_MEMORY_REGION is obsolete: */

SET_MEMORY_ALIAS?

  #define KVM_SET_MEMORY_ALIAS  _IOW(KVMIO,  0x43, struct kvm_memory_alias)
  #define KVM_SET_NR_MMU_PAGES  _IO(KVMIO,   0x44)
  #define KVM_GET_NR_MMU_PAGES  _IO(KVMIO,   0x45)
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH] do not enter vcpu again if it was stopped during IO

2010-06-21 Thread Marcelo Tosatti

On Mon, Jun 21, 2010 at 12:01:52PM +0300, Gleb Natapov wrote:
 To prevent reentering vcpu after IO completion it is not enough
 to set env-stopped since it is checked only in main loop but control
 will not get there until next non-IO exit since kvm_run() will reenter
 vcpu to complete IO instruction. Solve this by sending self-signal to
 request exit after IO instruction completion.
 
 Signed-off-by: Gleb Natapov g...@redhat.com
 diff --git a/qemu-kvm.c b/qemu-kvm.c
 index be1dac2..4f7cf6d 100644
 --- a/qemu-kvm.c
 +++ b/qemu-kvm.c
 @@ -603,6 +603,10 @@ int kvm_run(CPUState *env)
  r = pre_kvm_run(kvm, env);
  if (r)
  return r;
 +if (env-exit_request) {
 +env-exit_request = 0;
 +pthread_kill(env-kvm_cpu_state.thread, SIG_IPI);
 +}
  r = ioctl(fd, KVM_RUN, 0);

Can't you check for env-stopped instead?

  
  if (r == -1  errno != EINTR  errno != EAGAIN) {
 diff --git a/vl.c b/vl.c
 index 9e9c176..dcfab13 100644
 --- a/vl.c
 +++ b/vl.c
 @@ -1817,6 +1817,7 @@ void qemu_system_reset_request(void)
  }
  if (cpu_single_env) {
  cpu_single_env-stopped = 1;
 +cpu_exit(cpu_single_env);
  }
  qemu_notify_event();
  }
 --
   Gleb.
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 1/2] Add 'serial' attribute to virtio-blk devices

2010-06-21 Thread Rusty Russell

On Tue, 22 Jun 2010 02:13:21 am Ryan Harper wrote:
 * john cooper john.coo...@redhat.com [2010-06-21 01:11]:
  Rusty Russell wrote:
   On Sat, 19 Jun 2010 04:08:02 am Ryan Harper wrote:
   Create a new attribute for virtio-blk devices that will fetch the serial 
   number
   of the block device.  This attribute can be used by udev to create 
   disk/by-id
   symlinks for devices that don't have a UUID (filesystem) associated with 
   them.
  
   ATA_IDENTIFY strings are special in that they can be up to 20 chars long
   and aren't required to be NULL-terminated.  The buffer is also 
   zero-padded
   meaning that if the serial is 19 chars or less that we get a NULL 
   terminated
   string.  When copying this value into a string buffer, we must be 
   careful to
   copy up to the NULL (if it present) and only 20 if it is longer and not 
   to
   attempt to NULL terminate; this isn't needed.
  
   Signed-off-by: Ryan Harper ry...@us.ibm.com
   Signed-off-by: john cooper john.coo...@redhat.com
   ---
drivers/block/virtio_blk.c |   32 
1 files changed, 32 insertions(+), 0 deletions(-)
  
   diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
   index 258bc2a..f1ef26f 100644
   --- a/drivers/block/virtio_blk.c
   +++ b/drivers/block/virtio_blk.c
   @@ -281,6 +281,31 @@ static int index_to_minor(int index)
return index  PART_BITS;
}

   +/* Copy serial number from *s to *d.  Copy operation terminates on 
   either
   + * encountering a nul in *s or after n bytes have been copied, whichever
   + * occurs first.  *d is not forcibly nul terminated.  Return # of bytes 
   copied.
   + */
   +static inline int serial_sysfs(char *d, char *s, int n)
   +{
   +char *di = d;
   +
   +while (*s  n--)
   +*d++ = *s++;
   +return d - di;
   +}
   +
   +static ssize_t virtblk_serial_show(struct device *dev,
   +struct device_attribute *attr, char 
   *buf)
   +{
   +struct gendisk *disk = dev_to_disk(dev);
   +char id_str[VIRTIO_BLK_ID_BYTES];
   +
   +if (IS_ERR(virtblk_get_id(disk, id_str)))
   +return 0;
   
   0?  Really?  That doesn't seem very informative.
  
  Propagating a prospective error from virtblk_get_id() should
  be possible.  Unsure if doing so is more useful from the
  user's perspective compared to just a nul id string.
 
 I'm not sure we can do any thing else here; maybe printk a warning?
 
 Documentation/filesystems/sysfs.txt says that showing attributes should
 always return the number of chars put into the buffer; so when there is
 an error; zero is the right value to return since we're not filling the
 buffer.

Ideally, the file shouldn't be set up if we don't have an ID.  But we never
did add a feature bit for this :(

At a glance, we'll get -EIO if the host doesn't support it (or any other
transport error).  -ENOMEM if we run out of memory.

printk is dumb, but it's nice to differentiate host didn't supply one vs
something went wrong.  How about return 0 on -EIO?  Whatever is easiest
for udev is best here.

 /* id_str is not necessarily nul-terminated! */
 buf[VIRTIO_BLK_ID_BYTES] = '\0';
 return virtblk_get_id(disk, buf);
  
  The /sys file is rendered according to the length
  returned from this function and the trailing nul
  is not interpreted in this context.  In fact if a
  nul is added and included in the byte count of the
  string it will appear in the /sys file.
 
 Yeah; I like the simplicity; but we do need to know how long the string
 is so we can return that value. 

So we're looking at something like:

/* id_str is not necessarily nul-terminated! */
buf[VIRTIO_BLK_ID_BYTES] = '\0';
err = virtblk_get_id(disk, buf);
if (!err)
return strlen(buf);
if (err == -EIO) /* Unsupported?  Make it empty. */ 
return 0;
return err;

Then, please *test*!

Thanks,
Rusty.
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH] KVM: Remove redundant change of return value

2010-06-21 Thread akong

From: Amos Kong ak...@redhat.com

In the following situation, assign zero to 'r' is redundant, just remove them.

r = foo();
if (r)
goto out;
r = 0;
...

Signed-off-by: Amos Kong ak...@redhat.com
---
 arch/x86/kvm/x86.c |7 ---
 1 files changed, 0 insertions(+), 7 deletions(-)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 33156a3..a23bfa0 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2477,7 +2477,6 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
r = kvm_vcpu_ioctl_set_lapic(vcpu, u.lapic);
if (r)
goto out;
-   r = 0;
break;
}
case KVM_INTERRUPT: {
@@ -2489,14 +2488,12 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
r = kvm_vcpu_ioctl_interrupt(vcpu, irq);
if (r)
goto out;
-   r = 0;
break;
}
case KVM_NMI: {
r = kvm_vcpu_ioctl_nmi(vcpu);
if (r)
goto out;
-   r = 0;
break;
}
case KVM_SET_CPUID: {
@@ -3227,7 +3224,6 @@ long kvm_arch_vm_ioctl(struct file *filp,
r = kvm_vm_ioctl_set_irqchip(kvm, chip);
if (r)
goto set_irqchip_out;
-   r = 0;
set_irqchip_out:
kfree(chip);
if (r)
@@ -3260,7 +3256,6 @@ long kvm_arch_vm_ioctl(struct file *filp,
r = kvm_vm_ioctl_set_pit(kvm, u.ps);
if (r)
goto out;
-   r = 0;
break;
}
case KVM_GET_PIT2: {
@@ -3286,7 +3281,6 @@ long kvm_arch_vm_ioctl(struct file *filp,
r = kvm_vm_ioctl_set_pit2(kvm, u.ps2);
if (r)
goto out;
-   r = 0;
break;
}
case KVM_REINJECT_CONTROL: {
@@ -3297,7 +3291,6 @@ long kvm_arch_vm_ioctl(struct file *filp,
r = kvm_vm_ioctl_reinject(kvm, control);
if (r)
goto out;
-   r = 0;
break;
}
case KVM_XEN_HVM_CONFIG: {
-- 
1.7.0.4

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[KVM-AUTOTEST PATCH] KVM test: kvm_preprocessing.py: test for vm.is_alive() instead of vm.is_dead()

2010-06-21 Thread Michael Goldish

vm.is_alive() verifies that the monitor is responsive, which is required for
taking screendumps.

Signed-off-by: Michael Goldish mgold...@redhat.com
---
 client/tests/kvm/kvm_preprocessing.py |2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/client/tests/kvm/kvm_preprocessing.py 
b/client/tests/kvm/kvm_preprocessing.py
index 1ed4ec2..ee279bd 100644
--- a/client/tests/kvm/kvm_preprocessing.py
+++ b/client/tests/kvm/kvm_preprocessing.py
@@ -406,7 +406,7 @@ def _take_screendumps(test, params, env):
 
 while True:
 for vm in kvm_utils.env_get_all_vms(env):
-if vm.is_dead():
+if not vm.is_alive():
 continue
 try:
 vm.monitor.screendump(temp_filename)
-- 
1.5.4.1

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[KVM-AUTOTEST PATCH] KVM test: concentrate image and cdrom filename prefixes in tests.cfg.sample

2010-06-21 Thread Michael Goldish

Don't prefix image_name and cdrom at the end of tests_base.cfg.sample.
Instead, do it all in tests.cfg.sample, to make it clearer to users editing
the file.

Signed-off-by: Michael Goldish mgold...@redhat.com
---
 client/tests/kvm/tests.cfg.sample  |   10 --
 client/tests/kvm/tests_base.cfg.sample |2 --
 2 files changed, 4 insertions(+), 8 deletions(-)

diff --git a/client/tests/kvm/tests.cfg.sample 
b/client/tests/kvm/tests.cfg.sample
index a55a320..b303686 100644
--- a/client/tests/kvm/tests.cfg.sample
+++ b/client/tests/kvm/tests.cfg.sample
@@ -4,19 +4,17 @@
 include tests_base.cfg
 include cdkeys.cfg
 
-# Modify/comment the following lines if you wish to modify
-# the paths of the image files, ISO files, step files or qemu binaries.
+# Modify/comment the following lines if you wish to modify the paths of the
+# image files, ISO files or qemu binaries.
 #
 # As for the defaults:
 # * qemu and qemu-img are expected to be found under /usr/bin/qemu-kvm and
 #   /usr/bin/qemu-img respectively.
 # * All image files are expected under /tmp/kvm_autotest_root/images/
 # * All iso files are expected under /tmp/kvm_autotest_root/isos/
-# * All step files are expected under /tmp/kvm_autotest_root/steps/
 qemu_img_binary = /usr/bin/qemu-img
-image_name.* ?= /tmp/kvm_autotest_root/
-cdrom.* ?= /tmp/kvm_autotest_root/
-steps ?= /tmp/kvm_autotest_root/
+image_name.* ?= /tmp/kvm_autotest_root/images/
+cdrom.* ?= /tmp/kvm_autotest_root/isos/
 
 # Here are the test sets variants. The variant 'qemu_kvm_windows_quick' is
 # fully commented, the following ones have comments only on noteworthy points
diff --git a/client/tests/kvm/tests_base.cfg.sample 
b/client/tests/kvm/tests_base.cfg.sample
index ec61a5e..2c78cfc 100644
--- a/client/tests/kvm/tests_base.cfg.sample
+++ b/client/tests/kvm/tests_base.cfg.sample
@@ -1447,6 +1447,4 @@ variants:
 devices_requested = 7
 
 
-image_name.* ?= images/
-cdrom.* ?= isos/
 steps ?= steps/
-- 
1.5.4.1

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[KVM-AUTOTEST PATCH] KVM test: scan_results.py: fix handling of empty result list

2010-06-21 Thread Michael Goldish

If there are no test results, max() tries to operate on an empty sequence
and throws an exception.

Signed-off-by: Michael Goldish mgold...@redhat.com
---
 client/tests/kvm/scan_results.py |2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/client/tests/kvm/scan_results.py b/client/tests/kvm/scan_results.py
index f7073e4..a339a85 100755
--- a/client/tests/kvm/scan_results.py
+++ b/client/tests/kvm/scan_results.py
@@ -74,7 +74,7 @@ def main(resfiles):
 continue
 results = parse_results(text)
 result_lists.append((resfile, results))
-name_width = max(name_width, max(len(r[0]) for r in results))
+name_width = max([name_width] + [len(r[0]) for r in results])
 
 print_result((Test, Status, Seconds, Info), name_width)
 print_result((, --, ---, ), name_width)
-- 
1.5.4.1

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [RFC] High availability in KVM

2010-06-21 Thread Takuya Yoshikawa


(2010/06/21 23:19), Luiz Capitulino wrote:

   On a different note, in a HA environment the qemu policy described
   above is not adequate; when a notification of a hardware error that
   our policy determines to be serious arrives the first thing we want
   to do is to put the virtual machine in a quiesced state to avoid
   further wreckage. If we injected the error into the guest we would
   risk a guest panic that might detectable only by polling or, worse,
   being killed by the kernel, which means that postmortem analysis of
   the guest is not possible. Once we had the guests in a quiesced
   state, where all the buffers have been flushed and the hardware
   sources released, we would have two modes of operation that can be
   used together and complement each other.

 - Proactive: A qmp event describing the error (severity, topology,
   etc) is emitted. The HA software would have to register to
   receive hardware error events, possibly using the libvirt
   bindings. Upon receiving the event the HA software would know
   that the guest is in a failover-safe quiesced state so it could
   do without fencing and proceed to the failover stage directly.


This seems to match the BLOCK_IO_ERROR event we have today: when a disk error
happens, an event is emitted and the virtual machine can be automatically
stopped (there's a configuration option for this).

On the other hand, there's a number of ways to do this differently. I think
the first thing to do is to agree on what qemu's behavior is going to be, then
we decide how to expose this info to qmp clients.


I would like to support qemu/KVM bugs too in the same framework.

Even though there are some debugging ways, the easiest and most reliable one 
would
be using the frozen state of the guest at the moment the bug happened.


We've already experienced some qemu crashes which seemed to be caused by a KVM's
emulation failure in our test environment. Although we could guess what happened
by checking some messages like the exit reason, the guest state might have been
more help.

So what I want to get is:

 - new qemu/KVM mode in which guests are automatically stopped in a 
failover-safe
   state if qemu/KVM becomes impossible to continue,

 - new interface between qemu and HA to handle the failover-safe state,

Although I personally don't mind whether the interface is event based or polling
based, one important problem from the HA's point of view would be:

 * how to treat errors which can be caused in different layers uniformly.

E.g. if the problem is caused by guest side, qemu may normally exit without 
sending
any events to HA. So an interface for polling may be helpful even when we 
choose event
driven one.


Takuya





 - Passive: Polling resource agents that need to check the state of
   the guest generally use libvirt or a wrapper such as virsh. When
   the state is SHUTOFF or CRASHED the resource agent proceeds to
   the facing stage, which might be expensive and usually involves
   killing the qemu process. We propose adding a new state that
   indicates the failover-safe state described before. In this
   state the HA software would not need to use fencing techniques
   and since the qemu process is not killed postmortem analysis of
   the virtual machine is still possible.


It wouldn't be polling, I guess. We already have events for most state changes.
So, when the machine stops, reboots, etc.. the client would be notified and
then it could inspect the virtual machine by using query commands.

This method would be preferable in case we also want this information available
in the user Monitor and/or if the event gets too messy because of the amount of
information we want to put in it.


--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH V2 1/5] ara virt interface of perf to support kvm guest os statistics collection in guest os

2010-06-21 Thread Zhang, Yanmin

On Mon, 2010-06-21 at 14:45 +0300, Avi Kivity wrote:
 On 06/21/2010 12:31 PM, Zhang, Yanmin wrote:
  Here is the version 2.
 
  ChangeLog since V1: Mostly changes based on Avi's suggestions.
  1) Use a id to identify the perf_event between host and guest;
  2) Changes lots of codes to deal with malicious guest os;
  3) Add a perf_event number limitation per gust os instance;
  4) Support guest os on the top of another guest os scenario. But
  I didn't test it yet as there is no environment. The design is to
  add 2 pointers in struct perf_event. One is used by host and the
  other is used by guest.
  5) Fix the bug to support 'perf stat'. The key is sync count data
  back to guest when guest tries to disable the perf_event at host
  side.
  6) Add a clear ABI of PV perf.
 
 
 
 Please use meaningful subject lines for individual patches.
Yes, I should. I rushed to send the patches out yesterday afternoon as I need
to take company shuttle back home.
 
  I don't implement live migration feature.
 
  Avi,
  Is live migration necessary on pv perf support?
 
 
 Yes.
Ok. With the PV perf interface, host perf saves all counter info into perf_event
structure. To support live migration, we need save all host perf_event 
structure,
or at least perf_event-count and perf_event-attr. Then, recreate the host 
perf_event
after migration.

I check qemu-kvm codes and it seems most live migration is to save cpu states.
So it seems it's hard for perf pv interface to match current live migration. 
Any suggestion?

 
  --- linux-2.6_tip0620/Documentation/kvm/paravirt-perf.txt   1970-01-01 
  08:00:00.0 +0800
  +++ linux-2.6_tip0620perfkvm/Documentation/kvm/paravirt-perf.txt
  2010-06-21 15:21:39.312999849 +0800
  @@ -0,0 +1,133 @@
  +The x86 kvm paravirt perf event interface
  +===
  +
  +This paravirt interface is responsible for supporting guest os perf event
  +collections. If guest os supports this interface, users could run command
  +perf in guest os directly.
  +
  +Design
  +
  +
  +Guest os calls a series of hypercalls to communicate with host kernel to
  +create/enable/disable/close perf events. Host kernel notifies guest os
  +by injecting an NMI to guest os when an event overflows. Guets os need
  +go through all its active events to check if they overflow, and output
  +performance statistics if they do.
  +
  +ABI
  +=
  +
  +1) Detect if host kernel supports paravirt perf interface:
  +#define KVM_FEATURE_PV_PERF   4
  +Host kernel defines above cpuid bit. Guest os calls cpuid to check if host
  +os retuns this bit. If it does, it mean host kernel supports paravirt perf
  +interface.
  +
  +2) Open a new event at host side:
  +kvm_hypercall3(KVM_PERF_OP, KVM_PERF_OP_OPEN, param_addr_low32bit,
  +param_addr_high32bit);
  +
  +#define KVM_PERF_OP3
  +/* Operations for KVM_PERF_OP */
  +#define KVM_PERF_OP_OPEN1
  +#define KVM_PERF_OP_CLOSE   2
  +#define KVM_PERF_OP_ENABLE  3
  +#define KVM_PERF_OP_DISABLE 4
  +#define KVM_PERF_OP_READ5
 
 
  +/*
  + * guest_perf_attr is used when guest calls hypercall to
  + * open a new perf_event at host side. Mostly, it's a copy of
  + * perf_event_attr and deletes something not used by host kernel.
  + */
  +struct guest_perf_attr {
  +__u32   type;
 
 
 Need padding here, otherwise the structure is different on 32-bit and 
 64-bit guests.
Ok. I will change it.

 
  +__u64   config;
  +__u64   sample_period;
  +__u64   sample_type;
  +__u64   read_format;
  +__u64   flags;
 
 
 and here.
I will rearrange the whole structure.

 
  +__u32   bp_type;
  +__u64   bp_addr;
  +__u64   bp_len;
 
 
 Do we actually support breakpoints on the guest?  Note the hardware 
 breakpoints are also usable by the guest, so if the host uses them, we 
 won't be able to emulate them correctly.
   We can let the guest to 
 breakpoint perf monitoring itself and drop this feature.
Ok, I will disable breakpoint feature of pv interface.

 
  +};
 
 
 What about documentation for individual fields?  Esp. type, config, and 
 flags, but also the others.
They are really perf implementation specific. Even perf_event definition
has no document but code comments. I will add simple explanation around
the new structure definition.

 
  +/*
  + * data communication area about perf_event between
  + * Host kernel and guest kernel
  + */
  +struct guest_perf_event {
  +u64 count;
  +atomic_t overflows;
 
 
 Please use __u64 and __u32, assume guests don't have Linux internal 
 types (though of course the first guest _is_ Linux).
This structure is used by both host and

[PATCH 01/13] KVM test: kvm_utils.py: add a primitive logging mechanism for kvm_subprocess

2010-06-21 Thread Lucas Meneghel Rodrigues

From: Michael Goldish mgold...@redhat.com

Add log_line() which logs a single line to a given file.  The file's path is
given relative to a certain base dir.
Add set_log_dir() which sets the base dir.

This is useful for logging the output of kvm_subprocess.  kvm_subprocess can
take a callback function, which it calls with each line of output it gets from
the running subprocess.  Redirecting kvm_subprocess's output to the regular log
files is done by passing it logging.debug or logging.info.  However, in order
to log to other files, we'd have to pass kvm_subprocess a custom logger method,
e.g. our_custom_logger.debug.  Unfortunately, such methods (called
instancemethods) cannot be pickled, and kvm_subprocess relies on pickling.
This patch offers an easy yet somewhat dirty solution to the problem.

Signed-off-by: Michael Goldish mgold...@redhat.com
---
 client/tests/kvm/kvm_utils.py |   37 +
 1 files changed, 37 insertions(+), 0 deletions(-)

diff --git a/client/tests/kvm/kvm_utils.py b/client/tests/kvm/kvm_utils.py
index 040124b..367e1e5 100644
--- a/client/tests/kvm/kvm_utils.py
+++ b/client/tests/kvm/kvm_utils.py
@@ -733,6 +733,43 @@ def find_free_ports(start_port, end_port, count):
 return ports
 
 
+# An easy way to log lines to files when the logging system can't be used
+
+_open_log_files = {}
+_log_file_dir = /tmp
+
+
+def log_line(filename, line):
+
+Write a line to a file.  '\n' is appended to the line.
+
+@param filename: Path of file to write to, either absolute or relative to
+the dir set by set_log_file_dir().
+@param line: Line to write.
+
+global _open_log_files, _log_file_dir
+if filename not in _open_log_files:
+path = get_path(_log_file_dir, filename)
+try:
+os.makedirs(os.path.dirname(path))
+except OSError:
+pass
+_open_log_files[filename] = open(path, w)
+timestr = time.strftime(%Y-%m-%d %H:%M:%S)
+_open_log_files[filename].write(%s: %s\n % (timestr, line))
+_open_log_files[filename].flush()
+
+
+def set_log_file_dir(dir):
+
+Set the base directory for log files created by log_line().
+
+@param dir: Directory for log files.
+
+global _log_file_dir
+_log_file_dir = dir
+
+
 # The following are miscellaneous utility functions.
 
 def get_path(base_path, user_path):
-- 
1.7.0.1

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 02/13] KVM test: add the auto_close option to all kvm_subprocess classes

2010-06-21 Thread Lucas Meneghel Rodrigues

From: Michael Goldish mgold...@redhat.com

Allow all kvm_subprocess classes (kvm_spawn, kvm_tail, kvm_expect,
kvm_shell_session) to close automatically if auto_close is given and True.
By default auto_close if False for all classes except kvm_shell_session,
for which it is True by default.

Signed-off-by: Michael Goldish mgold...@redhat.com
---
 client/tests/kvm/kvm_subprocess.py |   50 +---
 1 files changed, 29 insertions(+), 21 deletions(-)

diff --git a/client/tests/kvm/kvm_subprocess.py 
b/client/tests/kvm/kvm_subprocess.py
index 2d70146..73edc5d 100755
--- a/client/tests/kvm/kvm_subprocess.py
+++ b/client/tests/kvm/kvm_subprocess.py
@@ -285,7 +285,8 @@ class kvm_spawn:
 resumes _tail() if needed.
 
 
-def __init__(self, command=None, id=None, echo=False, linesep=\n):
+def __init__(self, command=None, id=None, auto_close=False, echo=False,
+ linesep=\n):
 
 Initialize the class and run command as a child process.
 
@@ -293,6 +294,8 @@ class kvm_spawn:
 server.
 @param id: ID of an already running server, if accessing a running
 server, or None if starting a new one.
+@param auto_close: If True, close() the instance automatically when its
+reference count drops to zero (default False).
 @param echo: Boolean indicating whether echo should be initially
 enabled for the pseudo terminal running the subprocess.  This
 parameter has an effect only when starting a new server.
@@ -316,6 +319,7 @@ class kvm_spawn:
   self.id)
 
 # Remember some attributes
+self.auto_close = auto_close
 self.echo = echo
 self.linesep = linesep
 
@@ -378,7 +382,12 @@ class kvm_spawn:
 def __getinitargs__(self):
 # Save some information when pickling -- will be passed to the
 # constructor upon unpickling
-return (None, self.id, self.echo, self.linesep)
+return (None, self.id, self.auto_close, self.echo, self.linesep)
+
+
+def __del__(self):
+if self.auto_close:
+self.close()
 
 
 def _add_reader(self, reader):
@@ -554,10 +563,9 @@ class kvm_tail(kvm_spawn):
 When this class is unpickled, it automatically resumes reporting output.
 
 
-def __init__(self, command=None, id=None, echo=False, linesep=\n,
- termination_func=None, termination_params=(),
- output_func=None, output_params=(),
- output_prefix=):
+def __init__(self, command=None, id=None, auto_close=False, echo=False,
+ linesep=\n, termination_func=None, termination_params=(),
+ output_func=None, output_params=(), output_prefix=):
 
 Initialize the class and run command as a child process.
 
@@ -565,6 +573,8 @@ class kvm_tail(kvm_spawn):
 server.
 @param id: ID of an already running server, if accessing a running
 server, or None if starting a new one.
+@param auto_close: If True, close() the instance automatically when its
+reference count drops to zero (default False).
 @param echo: Boolean indicating whether echo should be initially
 enabled for the pseudo terminal running the subprocess.  This
 parameter has an effect only when starting a new server.
@@ -587,7 +597,7 @@ class kvm_tail(kvm_spawn):
 self._add_close_hook(kvm_tail._join_thread)
 
 # Init the superclass
-kvm_spawn.__init__(self, command, id, echo, linesep)
+kvm_spawn.__init__(self, command, id, auto_close, echo, linesep)
 
 # Remember some attributes
 self.termination_func = termination_func
@@ -751,10 +761,9 @@ class kvm_expect(kvm_tail):
 It also provides all of kvm_tail's functionality.
 
 
-def __init__(self, command=None, id=None, echo=False, linesep=\n,
- termination_func=None, termination_params=(),
- output_func=None, output_params=(),
- output_prefix=):
+def __init__(self, command=None, id=None, auto_close=False, echo=False,
+ linesep=\n, termination_func=None, termination_params=(),
+ output_func=None, output_params=(), output_prefix=):
 
 Initialize the class and run command as a child process.
 
@@ -762,6 +771,8 @@ class kvm_expect(kvm_tail):
 server.
 @param id: ID of an already running server, if accessing a running
 server, or None if starting a new one.
+@param auto_close: If True, close() the instance automatically when its
+reference count drops to zero (default False).
 @param echo: Boolean indicating whether echo should be initially
 enabled for the pseudo terminal running the subprocess.  This

[PATCH 03/13] KVM test: restructure remote_login() and remote_scp()

2010-06-21 Thread Lucas Meneghel Rodrigues

- Add _remote_login() and _remote_scp() which, instead
  of taking a command line, take an existing session
  and operate on it.  This is useful for logging into
  existing always-open sessions, such as serial console
  sessions.

- Merge ssh/telnet/netcat into remote_login().

Signed-off-by: Michael Goldish mgold...@redhat.com
---
 client/tests/kvm/kvm_utils.py |  209 +++--
 client/tests/kvm/kvm_vm.py|   11 +--
 2 files changed, 100 insertions(+), 120 deletions(-)

diff --git a/client/tests/kvm/kvm_utils.py b/client/tests/kvm/kvm_utils.py
index 367e1e5..d4edbaa 100644
--- a/client/tests/kvm/kvm_utils.py
+++ b/client/tests/kvm/kvm_utils.py
@@ -451,143 +451,183 @@ def check_kvm_source_dir(source_dir):
 # The following are functions used for SSH, SCP and Telnet communication with
 # guests.
 
-def remote_login(command, password, prompt, linesep=\n, timeout=10):
+def _remote_login(session, password, prompt, timeout=10):
 
-Log into a remote host (guest) using SSH or Telnet. Run the given command
-using kvm_spawn and provide answers to the questions asked. If timeout
-expires while waiting for output from the child (e.g. a password prompt
-or a shell prompt) -- fail.
+Log into a remote host (guest) using SSH or Telnet.  Wait for questions
+and provide answers.  If timeout expires while waiting for output from the
+child (e.g. a password prompt or a shell prompt) -- fail.
 
 @brief: Log into a remote host (guest) using SSH or Telnet.
 
-@param command: The command to execute (e.g. ssh r...@localhost)
+@param session: A kvm_expect or kvm_shell_session instance to operate on
 @param password: The password to send in reply to a password prompt
 @param prompt: The shell prompt that indicates a successful login
-@param linesep: The line separator to send instead of \\n
-(sometimes \\r\\n is required)
 @param timeout: The maximal time duration (in seconds) to wait for each
 step of the login procedure (i.e. the Are you sure prompt, the
 password prompt, the shell prompt, etc)
 
-@return Return the kvm_spawn object on success and None on failure.
+@return: True on success and False otherwise.
 
-sub = kvm_subprocess.kvm_shell_session(command,
-   linesep=linesep,
-   prompt=prompt)
-
 password_prompt_count = 0
 
-logging.debug(Trying to login with command '%s' % command)
-
 while True:
-(match, text) = sub.read_until_last_line_matches(
+(match, text) = session.read_until_last_line_matches(
 [r[Aa]re you sure, r[Pp]assword:\s*$, r^\s*[Ll]ogin:\s*$,
  r[Cc]onnection.*closed, r[Cc]onnection.*refused,
  r[Pp]lease wait, prompt],
  timeout=timeout, internal_timeout=0.5)
 if match == 0:  # Are you sure you want to continue connecting
 logging.debug(Got 'Are you sure...'; sending 'yes')
-sub.sendline(yes)
+session.sendline(yes)
 continue
 elif match == 1:  # password:
 if password_prompt_count == 0:
 logging.debug(Got password prompt; sending '%s' % password)
-sub.sendline(password)
+session.sendline(password)
 password_prompt_count += 1
 continue
 else:
 logging.debug(Got password prompt again)
-sub.close()
-return None
+return False
 elif match == 2:  # login:
 logging.debug(Got unexpected login prompt)
-sub.close()
-return None
+return False
 elif match == 3:  # Connection closed
 logging.debug(Got 'Connection closed')
-sub.close()
-return None
+return False
 elif match == 4:  # Connection refused
 logging.debug(Got 'Connection refused')
-sub.close()
-return None
+return False
 elif match == 5:  # Please wait
 logging.debug(Got 'Please wait')
 timeout = 30
 continue
 elif match == 6:  # prompt
 logging.debug(Got shell prompt -- logged in)
-return sub
+return session
 else:  # match == None
 logging.debug(Timeout elapsed or process terminated)
-sub.close()
-return None
+return False
 
 
-def remote_scp(command, password, transfer_timeout=600, login_timeout=10):
+def _remote_scp(session, password, transfer_timeout=600, login_timeout=10):
 
-Run the given command using kvm_spawn and provide answers to the questions
-asked. If transfer_timeout expires while waiting for the transfer to
-complete, fail. If login_timeout expires while waiting for output from

[PATCH 04/13] KVM test: send username in remote_login()

2010-06-21 Thread Lucas Meneghel Rodrigues

In order to let the serial console work, we must let the
remote_login() send the username when met the username prompt. This
patch fails the progress if if it met the username prompt twice.

Signed-off-by: Jason Wang jasow...@redhat.com
Signed-off-by: Michael Goldish mgold...@redhat.com
---
 client/tests/kvm/kvm_utils.py |   16 
 1 files changed, 12 insertions(+), 4 deletions(-)

diff --git a/client/tests/kvm/kvm_utils.py b/client/tests/kvm/kvm_utils.py
index d4edbaa..0c40b55 100644
--- a/client/tests/kvm/kvm_utils.py
+++ b/client/tests/kvm/kvm_utils.py
@@ -451,7 +451,7 @@ def check_kvm_source_dir(source_dir):
 # The following are functions used for SSH, SCP and Telnet communication with
 # guests.
 
-def _remote_login(session, password, prompt, timeout=10):
+def _remote_login(session, username, password, prompt, timeout=10):
 
 Log into a remote host (guest) using SSH or Telnet.  Wait for questions
 and provide answers.  If timeout expires while waiting for output from the
@@ -460,6 +460,7 @@ def _remote_login(session, password, prompt, timeout=10):
 @brief: Log into a remote host (guest) using SSH or Telnet.
 
 @param session: A kvm_expect or kvm_shell_session instance to operate on
+@param username: The username to send in reply to a login prompt
 @param password: The password to send in reply to a password prompt
 @param prompt: The shell prompt that indicates a successful login
 @param timeout: The maximal time duration (in seconds) to wait for each
@@ -469,6 +470,7 @@ def _remote_login(session, password, prompt, timeout=10):
 @return: True on success and False otherwise.
 
 password_prompt_count = 0
+login_prompt_count = 0
 
 while True:
 (match, text) = session.read_until_last_line_matches(
@@ -490,8 +492,14 @@ def _remote_login(session, password, prompt, timeout=10):
 logging.debug(Got password prompt again)
 return False
 elif match == 2:  # login:
-logging.debug(Got unexpected login prompt)
-return False
+if login_prompt_count == 0:
+logging.debug(Got username prompt; sending '%s' % username)
+session.sendline(username)
+login_prompt_count += 1
+continue
+else:
+logging.debug(Got username prompt again)
+return False
 elif match == 3:  # Connection closed
 logging.debug(Got 'Connection closed')
 return False
@@ -596,7 +604,7 @@ def remote_login(client, host, port, username, password, 
prompt, linesep=\n,
 logging.debug(Trying to login with command '%s' % cmd)
 session = kvm_subprocess.kvm_shell_session(cmd, linesep=linesep,
prompt=prompt)
-if _remote_login(session, password, prompt, timeout):
+if _remote_login(session, username, password, prompt, timeout):
 return session
 else:
 session.close()
-- 
1.7.0.1

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 05/13] KVM test: remote_login(): make the login re suitable for serial console

2010-06-21 Thread Lucas Meneghel Rodrigues

From: Michael Goldish mgold...@redhat.com

Current matching re ^\s*[Ll]ogin:\s*$ is not suitable for the serial
console, so change it to [Ll]ogin:\s*$.

Signed-off-by: Jason Wang jasow...@redhat.com

Signed-off-by: Michael Goldish mgold...@redhat.com
---
 client/tests/kvm/kvm_utils.py |2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/client/tests/kvm/kvm_utils.py b/client/tests/kvm/kvm_utils.py
index 0c40b55..354450c 100644
--- a/client/tests/kvm/kvm_utils.py
+++ b/client/tests/kvm/kvm_utils.py
@@ -474,7 +474,7 @@ def _remote_login(session, username, password, prompt, 
timeout=10):
 
 while True:
 (match, text) = session.read_until_last_line_matches(
-[r[Aa]re you sure, r[Pp]assword:\s*$, r^\s*[Ll]ogin:\s*$,
+[r[Aa]re you sure, r[Pp]assword:\s*$, r[Ll]ogin:\s*$,
  r[Cc]onnection.*closed, r[Cc]onnection.*refused,
  r[Pp]lease wait, prompt],
  timeout=timeout, internal_timeout=0.5)
-- 
1.7.0.1

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 06/13] KVM test: kvm_vm.py: redirect the serial console to a unix socket

2010-06-21 Thread Lucas Meneghel Rodrigues

From: Michael Goldish mgold...@redhat.com

Based on Jason Wang's patch.

Signed-off-by: Jason Wang jasow...@redhat.com
Signed-off-by: Michael Goldish mgold...@redhat.com
---
 client/tests/kvm/kvm_vm.py |   13 +
 1 files changed, 13 insertions(+), 0 deletions(-)

diff --git a/client/tests/kvm/kvm_vm.py b/client/tests/kvm/kvm_vm.py
index 225f26a..1edecb9 100755
--- a/client/tests/kvm/kvm_vm.py
+++ b/client/tests/kvm/kvm_vm.py
@@ -206,6 +206,9 @@ class VM:
 def add_qmp_monitor(help, filename):
 return  -qmp unix:'%s',server,nowait % filename
 
+def add_serial(help, filename):
+return  -serial unix:'%s',server,nowait % filename
+
 def add_mem(help, mem):
 return  -m %s % mem
 
@@ -314,6 +317,9 @@ class VM:
 else:
 qemu_cmd += add_human_monitor(help, monitor_filename)
 
+# Add serial console redirection
+qemu_cmd += add_serial(help, self.get_serial_console_filename())
+
 for image_name in kvm_utils.get_sub_dict_names(params, images):
 image_params = kvm_utils.get_sub_dict(params, image_name)
 if image_params.get(boot_drive) == no:
@@ -774,6 +780,13 @@ class VM:
 kvm_utils.get_sub_dict_names(self.params, monitors)]
 
 
+def get_serial_console_filename(self):
+
+Return the serial console filename.
+
+return /tmp/serial-%s % self.instance
+
+
 def get_testlog_filename(self):
 
 Return the testlog filename.
-- 
1.7.0.1

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 07/13] KVM test: kvm_vm.py: log serial console output and allow serial login

2010-06-21 Thread Lucas Meneghel Rodrigues

From: Michael Goldish mgold...@redhat.com

Log serial console output to files in the debug dir.

Also, add VM.serial_login() which attempts to log in via the serial console.

Note:

- Tests must NOT close() a serial console session, because it needs to remain
  open for the following tests to use.

- Instead, tests must use session.sendline(exit) for serial console sessions:

session = vm.serial_login()
try:
...
finally:
session.sendline(exit)

- Only one serial console session per VM is available at a time.
  Calling serial_login() twice for the same VM, without an exit between the
  calls, will probably fail and/or do bad stuff.

Signed-off-by: Michael Goldish mgold...@redhat.com
---
 client/tests/kvm/kvm_vm.py |   41 +
 1 files changed, 41 insertions(+), 0 deletions(-)

diff --git a/client/tests/kvm/kvm_vm.py b/client/tests/kvm/kvm_vm.py
index 1edecb9..71df892 100755
--- a/client/tests/kvm/kvm_vm.py
+++ b/client/tests/kvm/kvm_vm.py
@@ -106,6 +106,7 @@ class VM:
 @param address_cache: A dict that maps MAC addresses to IP addresses
 
 self.process = None
+self.serial_console = None
 self.redirs = {}
 self.vnc_port = 5900
 self.uuid = None
@@ -634,6 +635,15 @@ class VM:
 return False
 
 logging.debug(VM appears to be alive with PID %s, self.get_pid())
+
+# Establish a session with the serial console -- requires a version
+# of netcat that supports -U
+self.serial_console = kvm_subprocess.kvm_shell_session(
+nc -U %s % self.get_serial_console_filename(),
+auto_close=False,
+output_func=kvm_utils.log_line,
+output_params=(serial-%s.log % name,))
+
 return True
 
 finally:
@@ -707,6 +717,8 @@ class VM:
 self.pci_assignable.release_devs()
 if self.process:
 self.process.close()
+if self.serial_console:
+self.serial_console.close()
 for f in ([self.get_testlog_filename()] +
   self.get_monitor_filenames()):
 try:
@@ -975,6 +987,35 @@ class VM:
  remote_path, local_path, timeout)
 
 
+def serial_login(self, timeout=10):
+
+Log into the guest via the serial console.
+If timeout expires while waiting for output from the guest (e.g. a
+password prompt or a shell prompt) -- fail.
+
+@param timeout: Time (seconds) before giving up logging into the guest.
+@return: kvm_spawn object on success and None on failure.
+
+username = self.params.get(username, )
+password = self.params.get(password, )
+prompt = self.params.get(shell_prompt, [\#\$])
+linesep = eval('%s' % self.params.get(shell_linesep, r\n))
+status_test_command = self.params.get(status_test_command, )
+
+if self.serial_console:
+self.serial_console.set_linesep(linesep)
+self.serial_console.set_status_test_command(status_test_command)
+else:
+return None
+
+# Make sure we get a login prompt
+self.serial_console.sendline()
+
+if kvm_utils._remote_login(self.serial_console, username, password,
+   prompt, timeout):
+return self.serial_console
+
+
 def send_key(self, keystr):
 
 Send a key event to the VM.
-- 
1.7.0.1

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 08/13] KVM test: kvm.py: set log file dir for kvm_subprocess logging

2010-06-21 Thread Lucas Meneghel Rodrigues

From: Michael Goldish mgold...@redhat.com

Signed-off-by: Michael Goldish mgold...@redhat.com
---
 client/tests/kvm/kvm.py |4 
 1 files changed, 4 insertions(+), 0 deletions(-)

diff --git a/client/tests/kvm/kvm.py b/client/tests/kvm/kvm.py
index bab1e6f..0799cff 100644
--- a/client/tests/kvm/kvm.py
+++ b/client/tests/kvm/kvm.py
@@ -31,6 +31,10 @@ class kvm(test.test):
 logging.debug(%s = %s, key, params[key])
 self.write_test_keyval({key: params[key]})
 
+# Set the log file dir for the logging mechanism used by kvm_subprocess
+# (this must be done before unpickling env)
+kvm_utils.set_log_file_dir(self.debugdir)
+
 # Open the environment file
 logging.info(Unpickling env. You may see some harmless error 
  messages.)
-- 
1.7.0.1

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 09/13] KVM test: kvm_subprocess.py: don't sanitize text before passing it to callbacks

2010-06-21 Thread Lucas Meneghel Rodrigues

From: Michael Goldish mgold...@redhat.com

Converting the text to utf-8 seems to cause trouble when converting back (e.g.
when writing to files).  The logging system seems to be fine with unsanitized
text, so let's not sanitize it.

Signed-off-by: Michael Goldish mgold...@redhat.com
---
 client/tests/kvm/kvm_subprocess.py |6 ++
 1 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/client/tests/kvm/kvm_subprocess.py 
b/client/tests/kvm/kvm_subprocess.py
index 73edc5d..93a8429 100755
--- a/client/tests/kvm/kvm_subprocess.py
+++ b/client/tests/kvm/kvm_subprocess.py
@@ -688,9 +688,7 @@ class kvm_tail(kvm_spawn):
 def print_line(text):
 # Pre-pend prefix and remove trailing whitespace
 text = self.output_prefix + text.rstrip()
-# Sanitize text
-text = text.decode(utf-8, replace)
-# Pass it to output_func
+# Pass text to output_func
 try:
 params = self.output_params + (text,)
 self.output_func(*params)
@@ -888,7 +886,7 @@ class kvm_expect(kvm_tail):
 if str.endswith(\n):
 str = str[:-1]
 for line in str.split(\n):
-print_func(line.decode(utf-8, replace))
+print_func(line)
 data += newdata
 
 done = False
-- 
1.7.0.1

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 10/13] KVM test: log output of all shell sessions and SCP transfers

2010-06-21 Thread Lucas Meneghel Rodrigues

From: Michael Goldish mgold...@redhat.com

Use kvm_utils.log_line() to log the output of all shell sessions and SCP
transfers.

Signed-off-by: Michael Goldish mgold...@redhat.com
---
 client/tests/kvm/kvm_utils.py |   33 ++---
 client/tests/kvm/kvm_vm.py|   15 ---
 2 files changed, 38 insertions(+), 10 deletions(-)

diff --git a/client/tests/kvm/kvm_utils.py b/client/tests/kvm/kvm_utils.py
index 354450c..12508b6 100644
--- a/client/tests/kvm/kvm_utils.py
+++ b/client/tests/kvm/kvm_utils.py
@@ -572,7 +572,7 @@ def _remote_scp(session, password, transfer_timeout=600, 
login_timeout=10):
 
 
 def remote_login(client, host, port, username, password, prompt, linesep=\n,
- timeout=10):
+ log_filename=None, timeout=10):
 
 Log into a remote host (guest) using SSH/Telnet/Netcat.
 
@@ -584,6 +584,7 @@ def remote_login(client, host, port, username, password, 
prompt, linesep=\n,
 @param prompt: Shell prompt (regular expression)
 @param linesep: The line separator to use when sending lines
 (e.g. '\\n' or '\\r\\n')
+@param log_filename: If specified, log all output to this file
 @param timeout: The maximal time duration (in seconds) to wait for
 each step of the login procedure (i.e. the Are you sure prompt
 or the password prompt)
@@ -601,16 +602,21 @@ def remote_login(client, host, port, username, password, 
prompt, linesep=\n,
 else:
 logging.error(Unknown remote shell client: %s % client)
 return
+
 logging.debug(Trying to login with command '%s' % cmd)
 session = kvm_subprocess.kvm_shell_session(cmd, linesep=linesep,
prompt=prompt)
 if _remote_login(session, username, password, prompt, timeout):
+if log_filename:
+session.set_output_func(log_line)
+session.set_output_params((log_filename,))
 return session
 else:
 session.close()
 
 
-def remote_scp(command, password, transfer_timeout=600, login_timeout=10):
+def remote_scp(command, password, log_filename=None, transfer_timeout=600,
+   login_timeout=10):
 
 Transfer file(s) to a remote host (guest) using SCP.
 
@@ -619,6 +625,7 @@ def remote_scp(command, password, transfer_timeout=600, 
login_timeout=10):
 @param command: The command to execute
 (e.g. scp -r foobar r...@localhost:/tmp/).
 @param password: The password to send in reply to a password prompt.
+@param log_filename: If specified, log all output to this file
 @param transfer_timeout: The time duration (in seconds) to wait for the
 transfer to complete.
 @param login_timeout: The maximal time duration (in seconds) to wait for
@@ -629,7 +636,17 @@ def remote_scp(command, password, transfer_timeout=600, 
login_timeout=10):
 
 logging.debug(Trying to SCP with command '%s', timeout %ss,
   command, transfer_timeout)
-session = kvm_subprocess.kvm_expect(command)
+
+if log_filename:
+output_func = log_line
+output_params = (log_filename,)
+else:
+output_func = None
+output_params = ()
+
+session = kvm_subprocess.kvm_expect(command,
+output_func=output_func,
+output_params=output_params)
 try:
 return _remote_scp(session, password, transfer_timeout, login_timeout)
 finally:
@@ -637,7 +654,7 @@ def remote_scp(command, password, transfer_timeout=600, 
login_timeout=10):
 
 
 def scp_to_remote(host, port, username, password, local_path, remote_path,
-  timeout=600):
+  log_filename=None, timeout=600):
 
 Copy files to a remote host (guest).
 
@@ -646,6 +663,7 @@ def scp_to_remote(host, port, username, password, 
local_path, remote_path,
 @param password: Password (if required)
 @param local_path: Path on the local machine where we are copying from
 @param remote_path: Path on the remote machine where we are copying to
+@param log_filename: If specified, log all output to this file
 @param timeout: The time duration (in seconds) to wait for the transfer
 to complete.
 
@@ -654,11 +672,11 @@ def scp_to_remote(host, port, username, password, 
local_path, remote_path,
 command = (scp -v -o UserKnownHostsFile=/dev/null 
-o PreferredAuthentications=password -r -P %s %s %...@%s:%s %
(port, local_path, username, host, remote_path))
-return remote_scp(command, password, timeout)
+return remote_scp(command, password, log_filename, timeout)
 
 
 def scp_from_remote(host, port, username, password, remote_path, local_path,
-timeout=600):
+log_filename=None, timeout=600):
 
 Copy files from a remote host (guest).
 
@@ -667,6 +685,7 @@ def scp_from_remote(host, port, username,

[PATCH 11/13] KVM test: Enable the serial console during unattended installation

2010-06-21 Thread Lucas Meneghel Rodrigues

This patch enable the serial console during unattended installation
for all linux guests.

Signed-off-by: Jason Wang jasow...@redhat.com
---
 client/tests/kvm/tests_base.cfg.sample |8 
 1 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/client/tests/kvm/tests_base.cfg.sample 
b/client/tests/kvm/tests_base.cfg.sample
index ce88235..ec871df 100644
--- a/client/tests/kvm/tests_base.cfg.sample
+++ b/client/tests/kvm/tests_base.cfg.sample
@@ -527,7 +527,7 @@ variants:
 pxe_initrd = initrd.img
 tftp = images/tftpboot
 extra_params +=  -bootp /pxelinux.0 -boot cn
-kernel_args = ks=floppy nicdelay=60
+kernel_args = ks=floppy nicdelay=60 console=ttyS0,115200 
console=tty0
 
 variants:
 - 8.32:
@@ -688,7 +688,7 @@ variants:
 pxe_initrd = initrd
 tftp = images/tftpboot
 extra_params +=  -bootp /pxelinux.0 -boot cn
-kernel_args = autoyast=floppy
+kernel_args = autoyast=floppy console=ttyS0,115200 
console=tty0
 post_install_delay = 10
 
 variants:
@@ -770,7 +770,7 @@ variants:
 pxe_image = linux
 pxe_initrd = initrd
 extra_params +=  -bootp /pxelinux.0 -boot cn
-kernel_args = autoyast=floppy
+kernel_args = autoyast=floppy console=ttyS0,115200 
console=tty0
 post_install_delay = 10
 
 variants:
@@ -862,7 +862,7 @@ variants:
 pxe_initrd = initrd.img
 tftp = images/tftpboot
 extra_params +=  -bootp /pxelinux.0 -boot cn
-kernel_args = ks=floppy nicdelay=60
+kernel_args = ks=floppy nicdelay=60 console=ttyS0,115200 
console=tty0
 
 variants:
 - 3.9.i386:
-- 
1.7.0.1

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 12/13] KVM test: Enable the serial console for all linux guests

2010-06-21 Thread Lucas Meneghel Rodrigues

From: Jason Wang jasow...@redhat.com

As we have the ability to dump the content from serial console or use
a session through it, we need to redirect the console to serial
through unattended files to make use of it. The patch also keep the
tty0 accroding to the suggestion of Michael Goldish.

Signed-off-by: Jason Wang jasow...@redhat.com
---
 client/tests/kvm/unattended/Fedora-10.ks |2 +-
 client/tests/kvm/unattended/Fedora-11.ks |2 +-
 client/tests/kvm/unattended/Fedora-12.ks |2 +-
 client/tests/kvm/unattended/Fedora-13.ks |2 +-
 client/tests/kvm/unattended/Fedora-8.ks  |2 +-
 client/tests/kvm/unattended/Fedora-9.ks  |2 +-
 client/tests/kvm/unattended/OpenSUSE-11.xml  |1 +
 client/tests/kvm/unattended/RHEL-3-series.ks |2 +-
 client/tests/kvm/unattended/RHEL-4-series.ks |2 +-
 client/tests/kvm/unattended/RHEL-5-series.ks |2 +-
 client/tests/kvm/unattended/SLES-11.xml  |1 +
 11 files changed, 11 insertions(+), 9 deletions(-)

diff --git a/client/tests/kvm/unattended/Fedora-10.ks 
b/client/tests/kvm/unattended/Fedora-10.ks
index 43c236a..03163c3 100644
--- a/client/tests/kvm/unattended/Fedora-10.ks
+++ b/client/tests/kvm/unattended/Fedora-10.ks
@@ -11,7 +11,7 @@ firewall --enabled --ssh
 selinux --enforcing
 timezone --utc America/New_York
 firstboot --disable
-bootloader --location=mbr
+bootloader --location=mbr --append=console=ttyS0,115200 console=tty0
 zerombr
 clearpart --all --initlabel
 autopart
diff --git a/client/tests/kvm/unattended/Fedora-11.ks 
b/client/tests/kvm/unattended/Fedora-11.ks
index bef3af7..443e2c3 100644
--- a/client/tests/kvm/unattended/Fedora-11.ks
+++ b/client/tests/kvm/unattended/Fedora-11.ks
@@ -10,7 +10,7 @@ firewall --enabled --ssh
 selinux --enforcing
 timezone --utc America/New_York
 firstboot --disable
-bootloader --location=mbr
+bootloader --location=mbr --append=console=ttyS0,115200 console=tty0
 zerombr
 
 clearpart --all --initlabel
diff --git a/client/tests/kvm/unattended/Fedora-12.ks 
b/client/tests/kvm/unattended/Fedora-12.ks
index bef3af7..443e2c3 100644
--- a/client/tests/kvm/unattended/Fedora-12.ks
+++ b/client/tests/kvm/unattended/Fedora-12.ks
@@ -10,7 +10,7 @@ firewall --enabled --ssh
 selinux --enforcing
 timezone --utc America/New_York
 firstboot --disable
-bootloader --location=mbr
+bootloader --location=mbr --append=console=ttyS0,115200 console=tty0
 zerombr
 
 clearpart --all --initlabel
diff --git a/client/tests/kvm/unattended/Fedora-13.ks 
b/client/tests/kvm/unattended/Fedora-13.ks
index 0be7d06..ef978e8 100644
--- a/client/tests/kvm/unattended/Fedora-13.ks
+++ b/client/tests/kvm/unattended/Fedora-13.ks
@@ -10,7 +10,7 @@ firewall --enabled --ssh
 selinux --enforcing
 timezone --utc America/New_York
 firstboot --disable
-bootloader --location=mbr
+bootloader --location=mbr --append=console=ttyS0,115200 console=tty0
 zerombr
 
 clearpart --all --initlabel
diff --git a/client/tests/kvm/unattended/Fedora-8.ks 
b/client/tests/kvm/unattended/Fedora-8.ks
index cde85dd..3e9d387 100644
--- a/client/tests/kvm/unattended/Fedora-8.ks
+++ b/client/tests/kvm/unattended/Fedora-8.ks
@@ -11,7 +11,7 @@ firewall --enabled --ssh
 selinux --enforcing
 timezone --utc America/New_York
 firstboot --disable
-bootloader --location=mbr
+bootloader --location=mbr --append=console=ttyS0,115200 console=tty0
 zerombr
 clearpart --all --initlabel
 autopart
diff --git a/client/tests/kvm/unattended/Fedora-9.ks 
b/client/tests/kvm/unattended/Fedora-9.ks
index cde85dd..3e9d387 100644
--- a/client/tests/kvm/unattended/Fedora-9.ks
+++ b/client/tests/kvm/unattended/Fedora-9.ks
@@ -11,7 +11,7 @@ firewall --enabled --ssh
 selinux --enforcing
 timezone --utc America/New_York
 firstboot --disable
-bootloader --location=mbr
+bootloader --location=mbr --append=console=ttyS0,115200 console=tty0
 zerombr
 clearpart --all --initlabel
 autopart
diff --git a/client/tests/kvm/unattended/OpenSUSE-11.xml 
b/client/tests/kvm/unattended/OpenSUSE-11.xml
index 7dd44fa..64140bf 100644
--- a/client/tests/kvm/unattended/OpenSUSE-11.xml
+++ b/client/tests/kvm/unattended/OpenSUSE-11.xml
@@ -50,6 +50,7 @@
 moduleedd/module
   /initrd_module
 /initrd_modules
+appendconsole=ttyS0,115200 console=tty0/append
 loader_typegrub/loader_type
 sections config:type=list/
   /bootloader
diff --git a/client/tests/kvm/unattended/RHEL-3-series.ks 
b/client/tests/kvm/unattended/RHEL-3-series.ks
index 5321118..413890a 100644
--- a/client/tests/kvm/unattended/RHEL-3-series.ks
+++ b/client/tests/kvm/unattended/RHEL-3-series.ks
@@ -10,7 +10,7 @@ rootpw 123456
 firewall --enabled --ssh
 timezone America/New_York
 firstboot --disable
-bootloader --location=mbr
+bootloader --location=mbr --append=console=ttyS0,115200 console=tty0
 clearpart --all --initlabel
 autopart
 reboot
diff --git a/client/tests/kvm/unattended/RHEL-4-series.ks 
b/client/tests/kvm/unattended/RHEL-4-series.ks
index 159998b..213914d 100644
---

[PATCH 13/13] KVM test: Fix a small bug on timedrift test

2010-06-21 Thread Lucas Meneghel Rodrigues

On timedrift.py, a prefix is added to indicate the
kvm subprocess instances of the guest load processes.
With the fact the ssh sessions are being logged, that
implies in changing the the kvm subprocess callback
function. When this happens, the wrong parameters
will be passed to the logging system causing an exception
while formatting the message. Let's fix that.

Signed-off-by: Lucas Meneghel Rodrigues l...@redhat.com
Signed-off-by: Michael Goldish mgold...@redhat.com
---
 client/tests/kvm/tests/timedrift.py |5 +
 1 files changed, 5 insertions(+), 0 deletions(-)

diff --git a/client/tests/kvm/tests/timedrift.py 
b/client/tests/kvm/tests/timedrift.py
index 028b74c..a6d3076 100644
--- a/client/tests/kvm/tests/timedrift.py
+++ b/client/tests/kvm/tests/timedrift.py
@@ -90,6 +90,11 @@ def run_timedrift(test, params, env):
 load_session = vm.remote_login()
 if not load_session:
 raise error.TestFail(Could not log into guest)
+# Set output func to None to stop it from being called so we
+# can change the callback function and the parameters it takes
+# with no problems
+load_session.set_output_func(None)
+load_session.set_output_params(())
 load_session.set_output_prefix((guest load %d)  % i)
 load_session.set_output_func(logging.debug)
 guest_load_sessions.append(load_session)
-- 
1.7.0.1

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH V2 2/5] ara virt interface of perf to support kvm guest os statistics collection in guest os

2010-06-21 Thread Zhang, Yanmin

On Mon, 2010-06-21 at 15:00 +0300, Avi Kivity wrote:
 On 06/21/2010 12:31 PM, Zhang, Yanmin wrote:
  The 2nd patch is to change the definition of perf_event to facilitate
  perf attr copy when a hypercall happens.
 
  Signed-off-by: Zhang Yanminyanmin_zh...@linux.intel.com
 
  ---
 
  --- linux-2.6_tip0620/include/linux/perf_event.h2010-06-21 
  15:19:52.821999849 +0800
  +++ linux-2.6_tip0620perfkvm/include/linux/perf_event.h 2010-06-21 
  16:53:49.283999849 +0800
  @@ -188,7 +188,10 @@ struct perf_event_attr {
  __u64   sample_type;
  __u64   read_format;
 
 
 
 Assuming these flags are available to the guest?
These flags are used by generic perf codes. To match with host kernel, we wish
guest os also use the flags.

 
  -   __u64   disabled   :  1, /* off by default*/
  +   union {
  +   __u64   flags;
  +   struct {
  +   __u64   disabled   :  1, /* off by default*/
  inherit:  1, /* children inherit it   */
 
 
 inherit is meaningless for a guest.
Right. host kernel will reset it to 0 before create perf_event for guest os.
Here we couldn't delete the flag as it's used by perf generic codes.
I need separate the patch a little better. All definitions in 
include/linux/perf_event.h
are mostly perf generic code related. I'm very careful to change it.

 
  pinned :  1, /* must always be on PMU */
 
 
 We cannot allow a guest to pin a counter.
Ok. I will reset it in function kvm_pv_perf_op_open.

 
 The other flags are also problematic.  I'd like to see virt-specific 
 flags (probably we'll only need kernel/user and nested_hv for nested 
 virtualization).
How about to add more comments around struct guest_perf_attr-flags that
guest os developers should take a look at include/linux/perf_event.h?
BTW, I will reset more flags to 0 in kvm_pv_perf_op_open.

 
 Something that is worrying is that we don't expose group information.  
 perf will multiplex the events for us, but there will be a loss in accuracy.
 
#ifdef CONFIG_HAVE_HW_BREAKPOINT
#includeasm/hw_breakpoint.h
#endif
  @@ -753,6 +752,20 @@ struct perf_event {
 
  perf_overflow_handler_t overflow_handler;
 
  +   /*
  +* pointers used by kvm perf paravirt interface.
  +*
  +* 1) Used in host kernel and points to host_perf_shadow which
  +* has information about guest perf_event
  +*/
  +   void*host_perf_shadow;
 
 
 Can we have real types instead of void pointers?
I just want perf generic codes have less dependency on KVM codes.

 
  +   /*
  +* 2) Used in guest kernel and points to guest_perf_shadow which
  +* is used as a communication area with host kernel. Host kernel
  +* copies overflow data to it when an event overflows.
  +*/
  +   void*guest_perf_shadow;
 
 
 It's strange to see both guest and host parts in the same patch.  
 Splitting to separate patches will really help review.
It's a little hard to split the patches if they change the same file. Perhaps
I could add more statements before the patch when I send it out.

 
  @@ -1626,9 +1629,22 @@ void perf_event_task_tick(struct task_st
  if (ctx  ctx-nr_events  ctx-nr_events != ctx-nr_active)
  rotate = 1;
 
  -   perf_ctx_adjust_freq(cpuctx-ctx);
  -   if (ctx)
  -   perf_ctx_adjust_freq(ctx);
  +#ifdef CONFIG_KVM_PERF
  +   if (kvm_para_available()) {
  +   /*
  +* perf_ctx_adjust_freq causes lots of pmu-read which would
  +* trigger too many vmexit to host kernel. We disable it
  +* under para virt situation
  +*/
  +   adjust_freq = 0;
  +   }
  +#endif
 
 
 Perhaps we can have a batch read interface which will read many counters 
 at once.
It's a good idea. But that will touch many perf generic codes which causes it's 
hard
to maintain or follow future changes.

   This would reduce the number of exits.  Also adjust the 
 frequency less frequently.
Here it depends on process scheduler frequency, CONFIG_HZ. 

 
  +
  +   if (adjust_freq) {
  +   perf_ctx_adjust_freq(cpuctx-ctx);
  +   if (ctx)
  +   perf_ctx_adjust_freq(ctx);
  +   }
 
 
 


--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH] KVM Test: Fix invalid literal bug in ioquit

2010-06-21 Thread Feng Yang


- Michael Goldish mgold...@redhat.com wrote:

 From: Michael Goldish mgold...@redhat.com
 To: Feng Yang fy...@redhat.com
 Cc: autot...@test.kernel.org, kvm@vger.kernel.org
 Sent: Monday, June 21, 2010 7:19:58 PM GMT +08:00 Beijing / Chongqing / Hong 
 Kong / Urumqi
 Subject: Re: [PATCH] KVM Test: Fix invalid literal bug in ioquit

 On 06/21/2010 01:07 PM, Feng Yang wrote:
  Sometime check_cmd could not finish in setting time.
  Then o=, so int(o) will cause ValueError:
  invalid literal for int() with base 10: ''
  So change to check return status.
  
  Signed-off-by: Feng Yang fy...@redhat.com
  ---
   client/tests/kvm/tests/ioquit.py   |6 +++---
   client/tests/kvm/tests_base.cfg.sample |2 +-
   2 files changed, 4 insertions(+), 4 deletions(-)
  
  diff --git a/client/tests/kvm/tests/ioquit.py
 b/client/tests/kvm/tests/ioquit.py
  index 389a867..8126139 100644
  --- a/client/tests/kvm/tests/ioquit.py
  +++ b/client/tests/kvm/tests/ioquit.py
  @@ -23,13 +23,13 @@ def run_ioquit(test, params, env):
   (s, o) = session.get_command_status_output(bg_cmd,
 timeout=60)
   check_cmd = params.get(check_cmd)
   (s, o) = session2.get_command_status_output(check_cmd,
 timeout=60)
  -if int(o) = 0:
  +if s:
   raise error.TestError(Fail to add IO workload for
 Guest OS)
 
 Please use 'if s != 0' because in case of a timeout s is None.
Hi Michael, thanks for your comments!
But here, I think 'if s:' is better.
get_command_status_output(check_cmd, timeout=60) timeout should caused by 
workload is bigger in the guest. This just what we want.
Should not throw an error here.

Only 'if s:' match, we can say 'Fail to add IO workload for Guest OS'.

Thanks!

 
   logging.info(Sleep for a while)
   time.sleep(random.randrange(30,100))
  -(s, o) = session2.get_command_status_output(check_cmd,
 timeout=300)
  -if int(o) = 0:
  +(s, o) = session2.get_command_status_output(check_cmd,
 timeout=60)
  +if s:
 
 Same here.
 
   logging.info(IO workload finished before the VM was
 killed)
   logging.info(Kill the virtual machine)
   vm.process.close()
  diff --git a/client/tests/kvm/tests_base.cfg.sample
 b/client/tests/kvm/tests_base.cfg.sample
  index ce88235..0fd5543 100644
  --- a/client/tests/kvm/tests_base.cfg.sample
  +++ b/client/tests/kvm/tests_base.cfg.sample
  @@ -411,7 +411,7 @@ variants:
   - ioquit:
   type = ioquit
   background_cmd = for i in 1 2 3 4; do (nohup dd
 if=/dev/urandom of=/tmp/file bs=102400 count=1000 ) done
  -check_cmd = ps -a |grep dd |wc -l
  +check_cmd = ps -a |grep dd
   login_timeout = 360
   
   - qemu_img:
 
 --
 To unsubscribe from this list: send the line unsubscribe kvm in
 the body of a message to majord...@vger.kernel.org
 More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH V2 3/5] ara virt interface of perf to support kvm guest os statistics collection in guest os

2010-06-21 Thread Zhang, Yanmin

On Mon, 2010-06-21 at 15:33 +0300, Avi Kivity wrote:
 On 06/21/2010 12:31 PM, Zhang, Yanmin wrote:
  The 3rd patch is to implement para virt perf at host kernel.
 
 
  @@ -64,6 +73,85 @@ struct kvm_mmu_op_release_pt {
#ifdef __KERNEL__
#includeasm/processor.h
 
 
  +/*
  + * In host kernel, perf_event-host_perf_shadow points to
  + * host_perf_shadow which records some information
  + * about the guest.
  + */
  +struct host_perf_shadow {
  +   /* guest perf_event id passed from guest os */
  +   int id;
  +   /*
  +* Host kernel saves data into data member counter firstly.
  +* kvm will get data from this counter and calls kvm functions
  +* to copy or add data back to guets os before entering guest os
  +* next time
  +*/
  +   struct guest_perf_event counter;
  +   /* guest_event_addr is gpa_t pointing to guest os guest_perf_event*/
  +   __u64 guest_event_addr;
 
 
 So just use gpa_t as the type.
host_perf_shadow-guest_event_addr is a copy of 
guest_event_addr-guest_event_addr.
As the latter's type is __u64 as the interface between guest os and host os, I 
use
__u64 as the type of host_perf_shadow-guest_event_addr.

 
  +
  +   /*
  +* Link to  of kvm.kvm_arch.shadow_hash_table
  +*/
  +   struct list_head shadow_entry;
  +   struct kvm_vcpu *vcpu;
  +
  +   struct perf_event *host_event;
  +   /*
  +* Below counter is to prevent malicious guest os to try to
  +* close/enable event at the same time.
  +*/
  +   atomic_t ref_counter;
 
 
 If events are made per-vcpu (like real hardware), races become impossible.
This design is to deal with a task context perf collection in guest os.
Scenario 1:
1) guest os starts to collect statistics of process A on vcpu 0;
2) process A is scheduled to vcpu 1. Then, the perf_event at host side need
to be moved to VCPU 1 's thread. With the per KVM instance design, we needn't
move host_perf_shadow among vcpus.

Scenario 2:
1) guest os creates a perf_event at host side on vcpu 0;
2) malicious guest os calls close to delete the host perf_event on vcpu 1, but
enables the perf_event on vcpu0 at the same time. When close thread runs to get 
the
host_perf_shadow from the list, enable thread also gets it. Then, close thread
deletes the perf_event, and enable thread will cause host kernel panic when 
using
host_perf_shadow.


 
  +};
 
 
 Please move this structure to include/linux/kvm_host.h.  No need to spam 
 kvm_para.h.  Note it's not x86 specific (though you can leave arch 
 enabling to arch maintainers).
Ok. Originally, I wanted to do so, but I'm afraid other arch might be not happy.

 
  +
  +/*
  + * In guest kernel, perf_event-guest_shadow points to
  + * guest_perf_shadow which records some information
  + * about the guest.
  + */
  +struct guest_perf_shadow {
  +   /* guest perf_event id passed from guest os */
  +   int id;
  +   /*
  +* Host kernel kvm saves data into data member counter
  +*/
  +   struct guest_perf_event counter;
  +};
 
 
 Don't ordinary perf structures already have a counter ID which we can reuse?
No. In the other hand, if we assume generic perf has, we couldn't use it, 
because
generic perf id (actually there is no) is host kernel system-wide while here
guest_perf_shadow-id is per kvm instance wide.

 
  +
  +/*
  + * guest_perf_attr is used when guest calls hypercall to
  + * open a new perf_event at host side. Mostly, it's a copy of
  + * perf_event_attr and deletes something not used by host kernel.
  + */
  +struct guest_perf_attr {
  +   __u32   type;
  +   __u64   config;
  +   __u64   sample_period;
  +   __u64   sample_type;
  +   __u64   read_format;
  +   __u64   flags;
  +   __u32   bp_type;
  +   __u64   bp_addr;
  +   __u64   bp_len;
  +};
 
 
 This is really not a guest or host structure, it's part of the 
 interface.  So please rename it (and similar) kvm_pv_perf_*.
Good idea.

 
  @@ -24,6 +24,7 @@
#includeasm/desc.h
#includeasm/mtrr.h
#includeasm/msr-index.h
  +#includeasm/perf_event.h
 
#define KVM_MAX_VCPUS 64
#define KVM_MEMORY_SLOTS 32
  @@ -360,6 +361,18 @@ struct kvm_vcpu_arch {
 
  /* fields used by HYPER-V emulation */
  u64 hv_vapic;
  +
  +   /*
  +* Fields used by PARAVIRT perf interface:
  +*
  +* kvm checks overflow_events before entering guest os,
  +* and copy data back to guest os.
  +* event_mutex is to avoid a race between NMI perf event overflow
  +* handler, event close, and enable/disable.
  +*/
  +   struct mutex event_mutex;
 
 
 No race can exist.  The host NMI handler cannot take any mutex
We use a mutex_trylock in NMI hanlder. If it can't get the lock, there is a NMI 
miss
happening, but host kernel still updates perf_event-host_perf_shadow.counter, 
so the
overflow data will be accumulated. 

  so it 
 must be immune to

1 2 >

1 - 100 of 108 matches

Mail list logo