[COMMIT master] device-assignment: Cleanup on exit
From: Alex Williamson alex.william...@redhat.com close() the resource fd when we're done with it. Unregister and munmap the anonymous memory for the MSIX table. Signed-off-by: Alex Williamson alex.william...@redhat.com Signed-off-by: Marcelo Tosatti mtosa...@redhat.com diff --git a/hw/device-assignment.c b/hw/device-assignment.c index 7e53a95..ba02157 100644 --- a/hw/device-assignment.c +++ b/hw/device-assignment.c @@ -60,6 +60,8 @@ static void assigned_dev_load_option_rom(AssignedDevice *dev); +static void assigned_dev_unregister_msix_mmio(AssignedDevice *dev); + static uint32_t guest_to_host_ioport(AssignedDevRegion *region, uint32_t addr) { return region-u.r_baseport + (addr - region-e_physbase); @@ -783,10 +785,14 @@ static void free_assigned_device(AssignedDevice *dev) fprintf(stderr, Failed to unmap assigned device region: %s\n, strerror(errno)); +close(pci_region-resource_fd); } } } +if (dev-cap.available ASSIGNED_DEVICE_CAP_MSIX) +assigned_dev_unregister_msix_mmio(dev); + if (dev-real_device.config_fd) { close(dev-real_device.config_fd); dev-real_device.config_fd = 0; @@ -1370,6 +1376,21 @@ static int assigned_dev_register_msix_mmio(AssignedDevice *dev) return 0; } +static void assigned_dev_unregister_msix_mmio(AssignedDevice *dev) +{ +if (!dev-msix_table_page) +return; + +cpu_unregister_io_memory(dev-mmio_index); +dev-mmio_index = 0; + +if (munmap(dev-msix_table_page, 0x1000) == -1) { +fprintf(stderr, error unmapping msix_table_page! %s\n, +strerror(errno)); +} +dev-msix_table_page = NULL; +} + static int assigned_initfn(struct PCIDevice *pci_dev) { AssignedDevice *dev = DO_UPCAST(AssignedDevice, dev, pci_dev); -- To unsubscribe from this list: send the line unsubscribe kvm-commits in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[COMMIT master] test: Add XSAVE unit test
From: Sheng Yang sh...@linux.intel.com Based on IDT test framework. Signed-off-by: Sheng Yang sh...@linux.intel.com Signed-off-by: Marcelo Tosatti mtosa...@redhat.com diff --git a/kvm/test/config-x86-common.mak b/kvm/test/config-x86-common.mak index 800b635..0e1ccce 100644 --- a/kvm/test/config-x86-common.mak +++ b/kvm/test/config-x86-common.mak @@ -61,6 +61,8 @@ $(TEST_DIR)/msr.flat: $(cstart.o) $(TEST_DIR)/msr.o $(TEST_DIR)/idt_test.flat: $(cstart.o) $(TEST_DIR)/idt.o $(TEST_DIR)/idt_test.o +$(TEST_DIR)/xsave.flat: $(cstart.o) $(TEST_DIR)/idt.o $(TEST_DIR)/xsave.o + arch_clean: $(RM) $(TEST_DIR)/*.o $(TEST_DIR)/*.flat \ $(TEST_DIR)/.*.d $(TEST_DIR)/lib/.*.d $(TEST_DIR)/lib/*.o diff --git a/kvm/test/config-x86_64.mak b/kvm/test/config-x86_64.mak index f9cd121..2da2906 100644 --- a/kvm/test/config-x86_64.mak +++ b/kvm/test/config-x86_64.mak @@ -5,6 +5,7 @@ ldarch = elf64-x86-64 CFLAGS += -D__x86_64__ tests = $(TEST_DIR)/access.flat $(TEST_DIR)/apic.flat \ - $(TEST_DIR)/emulator.flat $(TEST_DIR)/idt_test.flat + $(TEST_DIR)/emulator.flat $(TEST_DIR)/idt_test.flat \ + $(TEST_DIR)/xsave.flat include config-x86-common.mak diff --git a/kvm/test/x86/xsave.c b/kvm/test/x86/xsave.c new file mode 100644 index 000..a22b44c --- /dev/null +++ b/kvm/test/x86/xsave.c @@ -0,0 +1,262 @@ +#include libcflat.h +#include idt.h + +#ifdef __x86_64__ +#define uint64_t unsigned long +#else +#define uint64_t unsigned long long +#endif + +static inline void __cpuid(unsigned int *eax, unsigned int *ebx, +unsigned int *ecx, unsigned int *edx) +{ +/* ecx is often an input as well as an output. */ +asm volatile(cpuid +: =a (*eax), +=b (*ebx), +=c (*ecx), +=d (*edx) +: 0 (*eax), 2 (*ecx)); +} + +/* + * Generic CPUID function + * clear %ecx since some cpus (Cyrix MII) do not set or clear %ecx + * resulting in stale register contents being returned. + */ +void cpuid(unsigned int op, +unsigned int *eax, unsigned int *ebx, +unsigned int *ecx, unsigned int *edx) +{ +*eax = op; +*ecx = 0; +__cpuid(eax, ebx, ecx, edx); +} + +/* Some CPUID calls want 'count' to be placed in ecx */ +void cpuid_count(unsigned int op, int count, +unsigned int *eax, unsigned int *ebx, +unsigned int *ecx, unsigned int *edx) +{ +*eax = op; +*ecx = count; +__cpuid(eax, ebx, ecx, edx); +} + +int xgetbv_checking(u32 index, u64 *result) +{ +u32 eax, edx; + +asm volatile(ASM_TRY(1f) +.byte 0x0f,0x01,0xd0\n\t /* xgetbv */ +1: +: =a (eax), =d (edx) +: c (index)); +*result = eax + ((u64)edx 32); +return exception_vector(); +} + +int xsetbv_checking(u32 index, u64 value) +{ +u32 eax = value; +u32 edx = value 32; + +asm volatile(ASM_TRY(1f) +.byte 0x0f,0x01,0xd1\n\t /* xsetbv */ +1: +: : a (eax), d (edx), c (index)); +return exception_vector(); +} + +unsigned long read_cr4(void) +{ +unsigned long val; +asm volatile(mov %%cr4,%0 : =r (val)); +return val; +} + +int write_cr4_checking(unsigned long val) +{ +asm volatile(ASM_TRY(1f) +mov %0,%%cr4\n\t +1:: : r (val)); +return exception_vector(); +} + +#define CPUID_1_ECX_XSAVE (1 26) +#define CPUID_1_ECX_OSXSAVE(1 27) +int check_cpuid_1_ecx(unsigned int bit) +{ +unsigned int eax, ebx, ecx, edx; +cpuid(1, eax, ebx, ecx, edx); +if (ecx bit) +return 1; +return 0; +} + +uint64_t get_supported_xcr0(void) +{ +unsigned int eax, ebx, ecx, edx; +cpuid_count(0xd, 0, eax, ebx, ecx, edx); +printf(eax %x, ebx %x, ecx %x, edx %x\n, +eax, ebx, ecx, edx); +return eax + ((u64)edx 32); +} + +#define X86_CR4_OSXSAVE0x0004 +#define XCR_XFEATURE_ENABLED_MASK 0x +#define XCR_XFEATURE_ILLEGAL_MASK 0x0010 + +#define XSTATE_FP 0x1 +#define XSTATE_SSE 0x2 +#define XSTATE_YMM 0x4 + +static int total_tests, fail_tests; + +void pass_if(int condition) +{ +total_tests ++; +if (condition) +printf(Pass!\n); +else { +printf(Fail!\n); +fail_tests ++; +} +} + +void test_xsave(void) +{ +unsigned long cr4; +uint64_t supported_xcr0; +uint64_t test_bits; +u64 xcr0; +int r; + +printf(Legal instruction testing:\n); +supported_xcr0 = get_supported_xcr0(); +printf(Supported XCR0 bits: 0x%x\n, supported_xcr0); + +printf(Check minimal XSAVE required bits: ); +test_bits = XSTATE_FP | XSTATE_SSE; +pass_if((supported_xcr0 test_bits) == test_bits); + +printf(Set CR4 OSXSAVE: ); +cr4 = read_cr4(); +r = write_cr4_checking(cr4 | X86_CR4_OSXSAVE); +pass_if(r == 0); + +printf(Check CPUID.1.ECX.OSXSAVE - expect 1: ); +pass_if(check_cpuid_1_ecx(CPUID_1_ECX_OSXSAVE)); + +printf(Legal
[COMMIT master] qemu: kvm: Enable XSAVE live migration support
From: Sheng Yang sh...@linux.intel.com Signed-off-by: Sheng Yang sh...@linux.intel.com Signed-off-by: Marcelo Tosatti mtosa...@redhat.com diff --git a/qemu-kvm-x86.c b/qemu-kvm-x86.c index 3c33e64..4f0b1d0 100644 --- a/qemu-kvm-x86.c +++ b/qemu-kvm-x86.c @@ -772,10 +772,20 @@ static void get_seg(SegmentCache *lhs, const struct kvm_segment *rhs) | (rhs-avl * DESC_AVL_MASK); } +#define XSAVE_CWD_RIP 2 +#define XSAVE_CWD_RDP 4 +#define XSAVE_MXCSR 6 +#define XSAVE_ST_SPACE8 +#define XSAVE_XMM_SPACE 40 +#define XSAVE_XSTATE_BV 128 +#define XSAVE_YMMH_SPACE 144 + void kvm_arch_load_regs(CPUState *env, int level) { struct kvm_regs regs; struct kvm_fpu fpu; +struct kvm_xsave* xsave; +struct kvm_xcrs xcrs; struct kvm_sregs sregs; struct kvm_msr_entry msrs[100]; int rc, n, i; @@ -806,16 +816,47 @@ void kvm_arch_load_regs(CPUState *env, int level) kvm_set_regs(env, regs); -memset(fpu, 0, sizeof fpu); -fpu.fsw = env-fpus ~(7 11); -fpu.fsw |= (env-fpstt 7) 11; -fpu.fcw = env-fpuc; -for (i = 0; i 8; ++i) - fpu.ftwx |= (!env-fptags[i]) i; -memcpy(fpu.fpr, env-fpregs, sizeof env-fpregs); -memcpy(fpu.xmm, env-xmm_regs, sizeof env-xmm_regs); -fpu.mxcsr = env-mxcsr; -kvm_set_fpu(env, fpu); +if (kvm_check_extension(kvm_state, KVM_CAP_XSAVE)) { +uint16_t cwd, swd, twd, fop; + +xsave = qemu_memalign(4096, sizeof(struct kvm_xsave)); +memset(xsave, 0, sizeof(struct kvm_xsave)); +cwd = swd = twd = fop = 0; +swd = env-fpus ~(7 11); +swd |= (env-fpstt 7) 11; +cwd = env-fpuc; +for (i = 0; i 8; ++i) +twd |= (!env-fptags[i]) i; +xsave-region[0] = (uint32_t)(swd 16) + cwd; +xsave-region[1] = (uint32_t)(fop 16) + twd; +memcpy(xsave-region[XSAVE_ST_SPACE], env-fpregs, +sizeof env-fpregs); +memcpy(xsave-region[XSAVE_XMM_SPACE], env-xmm_regs, +sizeof env-xmm_regs); +xsave-region[XSAVE_MXCSR] = env-mxcsr; +*(uint64_t *)xsave-region[XSAVE_XSTATE_BV] = env-xstate_bv; +memcpy(xsave-region[XSAVE_YMMH_SPACE], env-ymmh_regs, +sizeof env-ymmh_regs); +kvm_set_xsave(env, xsave); +if (kvm_check_extension(kvm_state, KVM_CAP_XCRS)) { +xcrs.nr_xcrs = 1; +xcrs.flags = 0; +xcrs.xcrs[0].xcr = 0; +xcrs.xcrs[0].value = env-xcr0; +kvm_set_xcrs(env, xcrs); +} +} else { +memset(fpu, 0, sizeof fpu); +fpu.fsw = env-fpus ~(7 11); +fpu.fsw |= (env-fpstt 7) 11; +fpu.fcw = env-fpuc; +for (i = 0; i 8; ++i) +fpu.ftwx |= (!env-fptags[i]) i; +memcpy(fpu.fpr, env-fpregs, sizeof env-fpregs); +memcpy(fpu.xmm, env-xmm_regs, sizeof env-xmm_regs); +fpu.mxcsr = env-mxcsr; +kvm_set_fpu(env, fpu); +} memset(sregs.interrupt_bitmap, 0, sizeof(sregs.interrupt_bitmap)); if (env-interrupt_injected = 0) { @@ -934,6 +975,8 @@ void kvm_arch_save_regs(CPUState *env) { struct kvm_regs regs; struct kvm_fpu fpu; +struct kvm_xsave* xsave; +struct kvm_xcrs xcrs; struct kvm_sregs sregs; struct kvm_msr_entry msrs[100]; uint32_t hflags; @@ -965,15 +1008,43 @@ void kvm_arch_save_regs(CPUState *env) env-eflags = regs.rflags; env-eip = regs.rip; -kvm_get_fpu(env, fpu); -env-fpstt = (fpu.fsw 11) 7; -env-fpus = fpu.fsw; -env-fpuc = fpu.fcw; -for (i = 0; i 8; ++i) - env-fptags[i] = !((fpu.ftwx i) 1); -memcpy(env-fpregs, fpu.fpr, sizeof env-fpregs); -memcpy(env-xmm_regs, fpu.xmm, sizeof env-xmm_regs); -env-mxcsr = fpu.mxcsr; +if (kvm_check_extension(kvm_state, KVM_CAP_XSAVE)) { +uint16_t cwd, swd, twd, fop; +xsave = qemu_memalign(4096, sizeof(struct kvm_xsave)); +kvm_get_xsave(env, xsave); +cwd = (uint16_t)xsave-region[0]; +swd = (uint16_t)(xsave-region[0] 16); +twd = (uint16_t)xsave-region[1]; +fop = (uint16_t)(xsave-region[1] 16); +env-fpstt = (swd 11) 7; +env-fpus = swd; +env-fpuc = cwd; +for (i = 0; i 8; ++i) +env-fptags[i] = !((twd i) 1); +env-mxcsr = xsave-region[XSAVE_MXCSR]; +memcpy(env-fpregs, xsave-region[XSAVE_ST_SPACE], +sizeof env-fpregs); +memcpy(env-xmm_regs, xsave-region[XSAVE_XMM_SPACE], +sizeof env-xmm_regs); +env-xstate_bv = *(uint64_t *)xsave-region[XSAVE_XSTATE_BV]; +memcpy(env-ymmh_regs, xsave-region[XSAVE_YMMH_SPACE], +sizeof env-ymmh_regs); +if (kvm_check_extension(kvm_state, KVM_CAP_XCRS)) { +kvm_get_xcrs(env, xcrs); +if (xcrs.xcrs[0].xcr == 0) +env-xcr0 = xcrs.xcrs[0].value; +} +} else { +kvm_get_fpu(env, fpu); +
[COMMIT master] Fix compilation with !KVM_CAP_XSAVE
From: Marcelo Tosatti mtosa...@redhat.com Signed-off-by: Marcelo Tosatti mtosa...@redhat.com diff --git a/qemu-kvm-x86.c b/qemu-kvm-x86.c index 4f0b1d0..1232049 100644 --- a/qemu-kvm-x86.c +++ b/qemu-kvm-x86.c @@ -784,8 +784,6 @@ void kvm_arch_load_regs(CPUState *env, int level) { struct kvm_regs regs; struct kvm_fpu fpu; -struct kvm_xsave* xsave; -struct kvm_xcrs xcrs; struct kvm_sregs sregs; struct kvm_msr_entry msrs[100]; int rc, n, i; @@ -816,7 +814,10 @@ void kvm_arch_load_regs(CPUState *env, int level) kvm_set_regs(env, regs); +#ifdef KVM_CAP_XSAVE if (kvm_check_extension(kvm_state, KVM_CAP_XSAVE)) { +struct kvm_xsave* xsave; + uint16_t cwd, swd, twd, fop; xsave = qemu_memalign(4096, sizeof(struct kvm_xsave)); @@ -839,6 +840,8 @@ void kvm_arch_load_regs(CPUState *env, int level) sizeof env-ymmh_regs); kvm_set_xsave(env, xsave); if (kvm_check_extension(kvm_state, KVM_CAP_XCRS)) { +struct kvm_xcrs xcrs; + xcrs.nr_xcrs = 1; xcrs.flags = 0; xcrs.xcrs[0].xcr = 0; @@ -846,6 +849,7 @@ void kvm_arch_load_regs(CPUState *env, int level) kvm_set_xcrs(env, xcrs); } } else { +#endif memset(fpu, 0, sizeof fpu); fpu.fsw = env-fpus ~(7 11); fpu.fsw |= (env-fpstt 7) 11; @@ -856,7 +860,9 @@ void kvm_arch_load_regs(CPUState *env, int level) memcpy(fpu.xmm, env-xmm_regs, sizeof env-xmm_regs); fpu.mxcsr = env-mxcsr; kvm_set_fpu(env, fpu); +#ifdef KVM_CAP_XSAVE } +#endif memset(sregs.interrupt_bitmap, 0, sizeof(sregs.interrupt_bitmap)); if (env-interrupt_injected = 0) { @@ -975,8 +981,6 @@ void kvm_arch_save_regs(CPUState *env) { struct kvm_regs regs; struct kvm_fpu fpu; -struct kvm_xsave* xsave; -struct kvm_xcrs xcrs; struct kvm_sregs sregs; struct kvm_msr_entry msrs[100]; uint32_t hflags; @@ -1008,7 +1012,9 @@ void kvm_arch_save_regs(CPUState *env) env-eflags = regs.rflags; env-eip = regs.rip; +#ifdef KVM_CAP_XSAVE if (kvm_check_extension(kvm_state, KVM_CAP_XSAVE)) { +struct kvm_xsave* xsave; uint16_t cwd, swd, twd, fop; xsave = qemu_memalign(4096, sizeof(struct kvm_xsave)); kvm_get_xsave(env, xsave); @@ -1030,11 +1036,14 @@ void kvm_arch_save_regs(CPUState *env) memcpy(env-ymmh_regs, xsave-region[XSAVE_YMMH_SPACE], sizeof env-ymmh_regs); if (kvm_check_extension(kvm_state, KVM_CAP_XCRS)) { +struct kvm_xcrs xcrs; + kvm_get_xcrs(env, xcrs); if (xcrs.xcrs[0].xcr == 0) env-xcr0 = xcrs.xcrs[0].value; } } else { +#endif kvm_get_fpu(env, fpu); env-fpstt = (fpu.fsw 11) 7; env-fpus = fpu.fsw; @@ -1044,7 +1053,9 @@ void kvm_arch_save_regs(CPUState *env) memcpy(env-fpregs, fpu.fpr, sizeof env-fpregs); memcpy(env-xmm_regs, fpu.xmm, sizeof env-xmm_regs); env-mxcsr = fpu.mxcsr; +#ifdef KVM_CAP_XSAVE } +#endif kvm_get_sregs(env, sregs); -- To unsubscribe from this list: send the line unsubscribe kvm-commits in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[COMMIT master] test: add test for pusha and popa instructions
From: Wei Yongjun yj...@cn.fujitsu.com This patch add test for pusha and popa instructions. Signed-off-by: Wei Yongjun yj...@cn.fujitsu.com Signed-off-by: Marcelo Tosatti mtosa...@redhat.com diff --git a/kvm/test/x86/realmode.c b/kvm/test/x86/realmode.c index 70a1e05..bd79348 100644 --- a/kvm/test/x86/realmode.c +++ b/kvm/test/x86/realmode.c @@ -820,12 +820,58 @@ void test_null(void) print_serial(null test: PASS\n); } +void test_pusha_popa() +{ + struct regs inregs = { .eax = 0, .ebx = 1, .ecx = 2, .edx = 3, .esi = 4, .edi = 5, .ebp = 6, .esp = 7}, outregs; + + MK_INSN(pusha, pusha\n\t + pop %edi\n\t + pop %esi\n\t + pop %ebp\n\t + pop %eax\n\t + pop %ebx\n\t + pop %edx\n\t + pop %ecx\n\t + pop %esp\n\t + xchg %esp, %eax\n\t + ); + + MK_INSN(popa, push %eax\n\t + push %ecx\n\t + push %edx\n\t + push %ebx\n\t + push %esp\n\t + push %ebp\n\t + push %esi\n\t + push %edi\n\t + popa\n\t + ); + + exec_in_big_real_mode(inregs, outregs, + insn_pusha, + insn_pusha_end - insn_pusha); + + if (!regs_equal(inregs, outregs, 0)) + print_serial(Pusha/Popa Test1: FAIL\n); + else + print_serial(Pusha/Popa Test1: PASS\n); + + exec_in_big_real_mode(inregs, outregs, + insn_popa, + insn_popa_end - insn_popa); + if (!regs_equal(inregs, outregs, 0)) + print_serial(Pusha/Popa Test2: FAIL\n); + else + print_serial(Pusha/Popa Test2: PASS\n); +} + void realmode_start(void) { test_null(); test_shld(); test_push_pop(); + test_pusha_popa(); test_mov_imm(); test_cmp_imm(); test_add_imm(); -- To unsubscribe from this list: send the line unsubscribe kvm-commits in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[COMMIT master] qemu: kvm: Extend kvm_arch_get_supported_cpuid() to support index
From: Sheng Yang sh...@linux.intel.com Would use it later for XSAVE related CPUID. Signed-off-by: Sheng Yang sh...@linux.intel.com Signed-off-by: Marcelo Tosatti mtosa...@redhat.com diff --git a/kvm.h b/kvm.h index aab5118..16b06a4 100644 --- a/kvm.h +++ b/kvm.h @@ -152,7 +152,7 @@ bool kvm_arch_stop_on_emulation_error(CPUState *env); int kvm_check_extension(KVMState *s, unsigned int extension); uint32_t kvm_arch_get_supported_cpuid(CPUState *env, uint32_t function, - int reg); + uint32_t index, int reg); void kvm_cpu_synchronize_state(CPUState *env); void kvm_cpu_synchronize_post_reset(CPUState *env); void kvm_cpu_synchronize_post_init(CPUState *env); diff --git a/target-i386/kvm.c b/target-i386/kvm.c index 66df142..57327f5 100644 --- a/target-i386/kvm.c +++ b/target-i386/kvm.c @@ -71,7 +71,8 @@ static struct kvm_cpuid2 *try_get_cpuid(KVMState *s, int max) return cpuid; } -uint32_t kvm_arch_get_supported_cpuid(CPUState *env, uint32_t function, int reg) +uint32_t kvm_arch_get_supported_cpuid(CPUState *env, uint32_t function, + uint32_t index, int reg) { struct kvm_cpuid2 *cpuid; int i, max; @@ -88,7 +89,8 @@ uint32_t kvm_arch_get_supported_cpuid(CPUState *env, uint32_t function, int reg) } for (i = 0; i cpuid-nent; ++i) { -if (cpuid-entries[i].function == function) { +if (cpuid-entries[i].function == function +cpuid-entries[i].index == index) { switch (reg) { case R_EAX: ret = cpuid-entries[i].eax; @@ -110,7 +112,7 @@ uint32_t kvm_arch_get_supported_cpuid(CPUState *env, uint32_t function, int reg) /* On Intel, kvm returns cpuid according to the Intel spec, * so add missing bits according to the AMD spec: */ -cpuid_1_edx = kvm_arch_get_supported_cpuid(env, 1, R_EDX); +cpuid_1_edx = kvm_arch_get_supported_cpuid(env, 1, 0, R_EDX); ret |= cpuid_1_edx 0x183f7ff; break; } @@ -126,7 +128,8 @@ uint32_t kvm_arch_get_supported_cpuid(CPUState *env, uint32_t function, int reg) #else -uint32_t kvm_arch_get_supported_cpuid(CPUState *env, uint32_t function, int reg) +uint32_t kvm_arch_get_supported_cpuid(CPUState *env, uint32_t function, + uint32_t index, int reg) { return -1U; } @@ -190,16 +193,16 @@ int kvm_arch_init_vcpu(CPUState *env) #endif -env-cpuid_features = kvm_arch_get_supported_cpuid(env, 1, R_EDX); +env-cpuid_features = kvm_arch_get_supported_cpuid(env, 1, 0, R_EDX); i = env-cpuid_ext_features CPUID_EXT_HYPERVISOR; -env-cpuid_ext_features = kvm_arch_get_supported_cpuid(env, 1, R_ECX); +env-cpuid_ext_features = kvm_arch_get_supported_cpuid(env, 1, 0, R_ECX); env-cpuid_ext_features |= i; env-cpuid_ext2_features = kvm_arch_get_supported_cpuid(env, 0x8001, - R_EDX); + 0, R_EDX); env-cpuid_ext3_features = kvm_arch_get_supported_cpuid(env, 0x8001, - R_ECX); + 0, R_ECX); cpuid_i = 0; -- To unsubscribe from this list: send the line unsubscribe kvm-commits in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[COMMIT master] qemu: Enable XSAVE related CPUID
From: Sheng Yang sh...@linux.intel.com We can support it in KVM now. The 0xd leaf is queried from KVM. Signed-off-by: Sheng Yang sh...@linux.intel.com Signed-off-by: Marcelo Tosatti mtosa...@redhat.com diff --git a/target-i386/cpuid.c b/target-i386/cpuid.c index fb78061..26e4054 100644 --- a/target-i386/cpuid.c +++ b/target-i386/cpuid.c @@ -1081,6 +1081,27 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, *ecx = 0; *edx = 0; break; +case 0xD: +/* Processor Extended State */ +if (!(env-cpuid_ext_features CPUID_EXT_XSAVE)) { +*eax = 0; +*ebx = 0; +*ecx = 0; +*edx = 0; +break; +} +if (kvm_enabled()) { +*eax = kvm_arch_get_supported_cpuid(env, 0xd, count, R_EAX); +*ebx = kvm_arch_get_supported_cpuid(env, 0xd, count, R_EBX); +*ecx = kvm_arch_get_supported_cpuid(env, 0xd, count, R_ECX); +*edx = kvm_arch_get_supported_cpuid(env, 0xd, count, R_EDX); +} else { +*eax = 0; +*ebx = 0; +*ecx = 0; +*edx = 0; +} +break; case 0x8000: *eax = env-cpuid_xlevel; *ebx = env-cpuid_vendor1; -- To unsubscribe from this list: send the line unsubscribe kvm-commits in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[COMMIT master] device-assignment, msi: PBA is long
From: Hidetoshi Seto seto.hideto...@jp.fujitsu.com Accidentally a pci_read_long() was replaced with assigned_dev_pci_read_byte() by the commit: commit a81a1f0a7410976be7dbc9a81524a8640f446ab5 Author: Alex Williamson alex.william...@redhat.com device-assignment: Don't use libpci Signed-off-by: Hidetoshi Seto seto.hideto...@jp.fujitsu.com Acked-by: Alex Williamson alex.william...@redhat.com Signed-off-by: Marcelo Tosatti mtosa...@redhat.com diff --git a/hw/device-assignment.c b/hw/device-assignment.c index ba02157..20ed934 100644 --- a/hw/device-assignment.c +++ b/hw/device-assignment.c @@ -1284,7 +1284,7 @@ static int assigned_device_pci_cap_init(PCIDevice *pci_dev) pci_dev-cap.length + PCI_MSIX_TABLE) = msix_table_entry; *(uint32_t *)(pci_dev-config + pci_dev-cap.start + pci_dev-cap.length + PCI_MSIX_PBA) = -assigned_dev_pci_read_byte(pci_dev, pos + PCI_MSIX_PBA); +assigned_dev_pci_read_long(pci_dev, pos + PCI_MSIX_PBA); bar_nr = msix_table_entry PCI_MSIX_BIR; msix_table_entry = ~PCI_MSIX_BIR; dev-msix_table_addr = pci_region[bar_nr].base_addr + msix_table_entry; -- To unsubscribe from this list: send the line unsubscribe kvm-commits in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[COMMIT master] Synchronize kernel headers
From: Marcelo Tosatti mtosa...@redhat.com Signed-off-by: Marcelo Tosatti mtosa...@redhat.com diff --git a/kvm/include/linux/kvm.h b/kvm/include/linux/kvm.h index 6485981..e46729e 100644 --- a/kvm/include/linux/kvm.h +++ b/kvm/include/linux/kvm.h @@ -103,7 +103,7 @@ struct kvm_userspace_memory_region { /* for kvm_memory_region::flags */ #define KVM_MEM_LOG_DIRTY_PAGES 1UL - +#define KVM_MEMSLOT_INVALID (1UL 1) /* for KVM_IRQ_LINE */ struct kvm_irq_level { @@ -160,6 +160,7 @@ struct kvm_pit_config { #define KVM_EXIT_DCR 15 #define KVM_EXIT_NMI 16 #define KVM_EXIT_INTERNAL_ERROR 17 +#define KVM_EXIT_OSI 18 /* For KVM_EXIT_INTERNAL_ERROR */ #define KVM_INTERNAL_ERROR_EMULATION 1 @@ -259,6 +260,10 @@ struct kvm_run { __u32 ndata; __u64 data[16]; } internal; + /* KVM_EXIT_OSI */ + struct { + __u64 gprs[32]; + } osi; /* Fix the size of the union. */ char padding[256]; }; @@ -400,6 +405,15 @@ struct kvm_ioeventfd { __u8 pad[36]; }; +/* for KVM_ENABLE_CAP */ +struct kvm_enable_cap { + /* in */ + __u32 cap; + __u32 flags; + __u64 args[4]; + __u8 pad[64]; +}; + #define KVMIO 0xAE /* @@ -497,8 +511,25 @@ struct kvm_ioeventfd { #endif #define KVM_CAP_S390_PSW 42 #define KVM_CAP_PPC_SEGSTATE 43 - +#define KVM_CAP_HYPERV 44 +#define KVM_CAP_HYPERV_VAPIC 45 +#define KVM_CAP_HYPERV_SPIN 46 #define KVM_CAP_PCI_SEGMENT 47 +#define KVM_CAP_PPC_PAIRED_SINGLES 48 +#define KVM_CAP_INTR_SHADOW 49 +#ifdef __KVM_HAVE_DEBUGREGS +#define KVM_CAP_DEBUGREGS 50 +#endif +#define KVM_CAP_X86_ROBUST_SINGLESTEP 51 +#define KVM_CAP_PPC_OSI 52 +#define KVM_CAP_PPC_UNSET_IRQ 53 +#define KVM_CAP_ENABLE_CAP 54 +#ifdef __KVM_HAVE_XSAVE +#define KVM_CAP_XSAVE 55 +#endif +#ifdef __KVM_HAVE_XCRS +#define KVM_CAP_XCRS 56 +#endif #ifdef KVM_CAP_IRQ_ROUTING @@ -685,6 +716,16 @@ struct kvm_clock_data { /* Available with KVM_CAP_VCPU_EVENTS */ #define KVM_GET_VCPU_EVENTS _IOR(KVMIO, 0x9f, struct kvm_vcpu_events) #define KVM_SET_VCPU_EVENTS _IOW(KVMIO, 0xa0, struct kvm_vcpu_events) +/* Available with KVM_CAP_DEBUGREGS */ +#define KVM_GET_DEBUGREGS _IOR(KVMIO, 0xa1, struct kvm_debugregs) +#define KVM_SET_DEBUGREGS _IOW(KVMIO, 0xa2, struct kvm_debugregs) +#define KVM_ENABLE_CAP_IOW(KVMIO, 0xa3, struct kvm_enable_cap) +/* Available with KVM_CAP_XSAVE */ +#define KVM_GET_XSAVE_IOR(KVMIO, 0xa4, struct kvm_xsave) +#define KVM_SET_XSAVE_IOW(KVMIO, 0xa5, struct kvm_xsave) +/* Available with KVM_CAP_XCRS */ +#define KVM_GET_XCRS _IOR(KVMIO, 0xa6, struct kvm_xcrs) +#define KVM_SET_XCRS _IOW(KVMIO, 0xa7, struct kvm_xcrs) #define KVM_DEV_ASSIGN_ENABLE_IOMMU(1 0) diff --git a/kvm/include/x86/asm/kvm.h b/kvm/include/x86/asm/kvm.h index f46b79f..4d8dcbd 100644 --- a/kvm/include/x86/asm/kvm.h +++ b/kvm/include/x86/asm/kvm.h @@ -21,6 +21,9 @@ #define __KVM_HAVE_PIT_STATE2 #define __KVM_HAVE_XEN_HVM #define __KVM_HAVE_VCPU_EVENTS +#define __KVM_HAVE_DEBUGREGS +#define __KVM_HAVE_XSAVE +#define __KVM_HAVE_XCRS /* Architectural interrupt line count. */ #define KVM_NR_INTERRUPTS 256 @@ -257,6 +260,11 @@ struct kvm_reinject_control { /* When set in flags, include corresponding fields on KVM_SET_VCPU_EVENTS */ #define KVM_VCPUEVENT_VALID_NMI_PENDING0x0001 #define KVM_VCPUEVENT_VALID_SIPI_VECTOR0x0002 +#define KVM_VCPUEVENT_VALID_SHADOW 0x0004 + +/* Interrupt shadow states */ +#define KVM_X86_SHADOW_INT_MOV_SS 0x01 +#define KVM_X86_SHADOW_INT_STI 0x02 /* for KVM_GET/SET_VCPU_EVENTS */ struct kvm_vcpu_events { @@ -271,7 +279,7 @@ struct kvm_vcpu_events { __u8 injected; __u8 nr; __u8 soft; - __u8 pad; + __u8 shadow; } interrupt; struct { __u8 injected; @@ -284,4 +292,33 @@ struct kvm_vcpu_events { __u32 reserved[10]; }; +/* for KVM_GET/SET_DEBUGREGS */ +struct kvm_debugregs { + __u64 db[4]; + __u64 dr6; + __u64 dr7; + __u64 flags; + __u64 reserved[9]; +}; + +/* for KVM_CAP_XSAVE */ +struct kvm_xsave { + __u32 region[1024]; +}; + +#define KVM_MAX_XCRS 16 + +struct kvm_xcr { + __u32 xcr; + __u32 reserved; + __u64 value; +}; + +struct kvm_xcrs { + __u32 nr_xcrs; + __u32 flags; + struct kvm_xcr xcrs[KVM_MAX_XCRS]; + __u64 padding[16]; +}; + #endif /* _ASM_X86_KVM_H */ -- To unsubscribe from this list: send the line unsubscribe kvm-commits in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[COMMIT master] KVM: Fix mov cr4 #GP at wrong instruction
From: Avi Kivity a...@redhat.com On Intel, we call skip_emulated_instruction() even if we injected a #GP, resulting in the #GP pointing at the wrong address. Fix by injecting the exception and skipping the instruction at the same place, so we can do just one or the other. Signed-off-by: Avi Kivity a...@redhat.com Signed-off-by: Marcelo Tosatti mtosa...@redhat.com diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index b237084..ea8c319 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -599,7 +599,7 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason, int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0); void kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3); -void kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4); +int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4); void kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8); int kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val); int kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val); diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index eb4703f..a6322af 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -3192,8 +3192,8 @@ static int handle_cr(struct kvm_vcpu *vcpu) skip_emulated_instruction(vcpu); return 1; case 4: - kvm_set_cr4(vcpu, val); - skip_emulated_instruction(vcpu); + err = kvm_set_cr4(vcpu, val); + complete_insn_gp(vcpu, err); return 1; case 8: { u8 cr8_prev = kvm_get_cr8(vcpu); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 1e0337a..b3eeb24 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -531,7 +531,7 @@ static void update_cpuid(struct kvm_vcpu *vcpu) } } -int __kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) +int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) { unsigned long old_cr4 = kvm_read_cr4(vcpu); unsigned long pdptr_bits = X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE; @@ -563,12 +563,6 @@ int __kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) return 0; } - -void kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) -{ - if (__kvm_set_cr4(vcpu, cr4)) - kvm_inject_gp(vcpu, 0); -} EXPORT_SYMBOL_GPL(kvm_set_cr4); static int __kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) @@ -3735,7 +3729,7 @@ static int emulator_set_cr(int cr, unsigned long val, struct kvm_vcpu *vcpu) res = __kvm_set_cr3(vcpu, val); break; case 4: - res = __kvm_set_cr4(vcpu, mk_cr_64(kvm_read_cr4(vcpu), val)); + res = kvm_set_cr4(vcpu, mk_cr_64(kvm_read_cr4(vcpu), val)); break; case 8: res = __kvm_set_cr8(vcpu, val 0xfUL); -- To unsubscribe from this list: send the line unsubscribe kvm-commits in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[COMMIT master] KVM: Fix mov cr3 #GP at wrong instruction
From: Avi Kivity a...@redhat.com On Intel, we call skip_emulated_instruction() even if we injected a #GP, resulting in the #GP pointing at the wrong address. Fix by injecting the exception and skipping the instruction at the same place, so we can do just one or the other. Signed-off-by: Avi Kivity a...@redhat.com Signed-off-by: Marcelo Tosatti mtosa...@redhat.com diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index ea8c319..c2813d6 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -598,7 +598,7 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason, bool has_error_code, u32 error_code); int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0); -void kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3); +int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3); int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4); void kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8); int kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val); diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 75ddaa1..fcf 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -3201,7 +3201,7 @@ static int kvm_pv_mmu_write(struct kvm_vcpu *vcpu, static int kvm_pv_mmu_flush_tlb(struct kvm_vcpu *vcpu) { - kvm_set_cr3(vcpu, vcpu-arch.cr3); + (void)kvm_set_cr3(vcpu, vcpu-arch.cr3); return 1; } diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 6d1616d..f7a6fdc 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1963,7 +1963,7 @@ static int nested_svm_vmexit(struct vcpu_svm *svm) svm-vmcb-save.cr3 = hsave-save.cr3; svm-vcpu.arch.cr3 = hsave-save.cr3; } else { - kvm_set_cr3(svm-vcpu, hsave-save.cr3); + (void)kvm_set_cr3(svm-vcpu, hsave-save.cr3); } kvm_register_write(svm-vcpu, VCPU_REGS_RAX, hsave-save.rax); kvm_register_write(svm-vcpu, VCPU_REGS_RSP, hsave-save.rsp); @@ -2086,7 +2086,7 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm) svm-vmcb-save.cr3 = nested_vmcb-save.cr3; svm-vcpu.arch.cr3 = nested_vmcb-save.cr3; } else - kvm_set_cr3(svm-vcpu, nested_vmcb-save.cr3); + (void)kvm_set_cr3(svm-vcpu, nested_vmcb-save.cr3); /* Guest paging mode is active - reset mmu */ kvm_mmu_reset_context(svm-vcpu); diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index a6322af..6c81f0e 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -3188,8 +3188,8 @@ static int handle_cr(struct kvm_vcpu *vcpu) complete_insn_gp(vcpu, err); return 1; case 3: - kvm_set_cr3(vcpu, val); - skip_emulated_instruction(vcpu); + err = kvm_set_cr3(vcpu, val); + complete_insn_gp(vcpu, err); return 1; case 4: err = kvm_set_cr4(vcpu, val); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index b3eeb24..e16a00e 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -565,7 +565,7 @@ int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) } EXPORT_SYMBOL_GPL(kvm_set_cr4); -static int __kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) +int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) { if (cr3 == vcpu-arch.cr3 !pdptrs_changed(vcpu)) { kvm_mmu_sync_roots(vcpu); @@ -604,12 +604,6 @@ static int __kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) vcpu-arch.mmu.new_cr3(vcpu); return 0; } - -void kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) -{ - if (__kvm_set_cr3(vcpu, cr3)) - kvm_inject_gp(vcpu, 0); -} EXPORT_SYMBOL_GPL(kvm_set_cr3); int __kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8) @@ -3726,7 +3720,7 @@ static int emulator_set_cr(int cr, unsigned long val, struct kvm_vcpu *vcpu) vcpu-arch.cr2 = val; break; case 3: - res = __kvm_set_cr3(vcpu, val); + res = kvm_set_cr3(vcpu, val); break; case 4: res = kvm_set_cr4(vcpu, mk_cr_64(kvm_read_cr4(vcpu), val)); -- To unsubscribe from this list: send the line unsubscribe kvm-commits in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[COMMIT master] KVM: x86: XSAVE/XRSTOR live migration support
From: Sheng Yang sh...@linux.intel.com This patch enable save/restore of xsave state. Signed-off-by: Sheng Yang sh...@linux.intel.com Signed-off-by: Marcelo Tosatti mtosa...@redhat.com diff --git a/Documentation/kvm/api.txt b/Documentation/kvm/api.txt index 159b4ef..ffba03f 100644 --- a/Documentation/kvm/api.txt +++ b/Documentation/kvm/api.txt @@ -922,6 +922,80 @@ Define which vcpu is the Bootstrap Processor (BSP). Values are the same as the vcpu id in KVM_CREATE_VCPU. If this ioctl is not called, the default is vcpu 0. +4.41 KVM_GET_XSAVE + +Capability: KVM_CAP_XSAVE +Architectures: x86 +Type: vcpu ioctl +Parameters: struct kvm_xsave (out) +Returns: 0 on success, -1 on error + +struct kvm_xsave { + __u32 region[1024]; +}; + +This ioctl would copy current vcpu's xsave struct to the userspace. + +4.42 KVM_SET_XSAVE + +Capability: KVM_CAP_XSAVE +Architectures: x86 +Type: vcpu ioctl +Parameters: struct kvm_xsave (in) +Returns: 0 on success, -1 on error + +struct kvm_xsave { + __u32 region[1024]; +}; + +This ioctl would copy userspace's xsave struct to the kernel. + +4.43 KVM_GET_XCRS + +Capability: KVM_CAP_XCRS +Architectures: x86 +Type: vcpu ioctl +Parameters: struct kvm_xcrs (out) +Returns: 0 on success, -1 on error + +struct kvm_xcr { + __u32 xcr; + __u32 reserved; + __u64 value; +}; + +struct kvm_xcrs { + __u32 nr_xcrs; + __u32 flags; + struct kvm_xcr xcrs[KVM_MAX_XCRS]; + __u64 padding[16]; +}; + +This ioctl would copy current vcpu's xcrs to the userspace. + +4.44 KVM_SET_XCRS + +Capability: KVM_CAP_XCRS +Architectures: x86 +Type: vcpu ioctl +Parameters: struct kvm_xcrs (in) +Returns: 0 on success, -1 on error + +struct kvm_xcr { + __u32 xcr; + __u32 reserved; + __u64 value; +}; + +struct kvm_xcrs { + __u32 nr_xcrs; + __u32 flags; + struct kvm_xcr xcrs[KVM_MAX_XCRS]; + __u64 padding[16]; +}; + +This ioctl would set vcpu's xcr to the value userspace specified. + 5. The kvm_run structure Application code obtains a pointer to the kvm_run structure by diff --git a/arch/x86/include/asm/kvm.h b/arch/x86/include/asm/kvm.h index ff90055..4d8dcbd 100644 --- a/arch/x86/include/asm/kvm.h +++ b/arch/x86/include/asm/kvm.h @@ -22,6 +22,8 @@ #define __KVM_HAVE_XEN_HVM #define __KVM_HAVE_VCPU_EVENTS #define __KVM_HAVE_DEBUGREGS +#define __KVM_HAVE_XSAVE +#define __KVM_HAVE_XCRS /* Architectural interrupt line count. */ #define KVM_NR_INTERRUPTS 256 @@ -299,4 +301,24 @@ struct kvm_debugregs { __u64 reserved[9]; }; +/* for KVM_CAP_XSAVE */ +struct kvm_xsave { + __u32 region[1024]; +}; + +#define KVM_MAX_XCRS 16 + +struct kvm_xcr { + __u32 xcr; + __u32 reserved; + __u64 value; +}; + +struct kvm_xcrs { + __u32 nr_xcrs; + __u32 flags; + struct kvm_xcr xcrs[KVM_MAX_XCRS]; + __u64 padding[16]; +}; + #endif /* _ASM_X86_KVM_H */ diff --git a/arch/x86/include/asm/xsave.h b/arch/x86/include/asm/xsave.h index 29ee4e4..32c3666 100644 --- a/arch/x86/include/asm/xsave.h +++ b/arch/x86/include/asm/xsave.h @@ -13,8 +13,11 @@ #define FXSAVE_SIZE512 -#define XSTATE_YMM_SIZE 256 -#define XSTATE_YMM_OFFSET (512 + 64) +#define XSAVE_HDR_SIZE 64 +#define XSAVE_HDR_OFFSETFXSAVE_SIZE + +#define XSAVE_YMM_SIZE 256 +#define XSAVE_YMM_OFFSET(XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET) /* * These are the features that the OS can handle currently. diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index e16a00e..d3d008e 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1680,6 +1680,7 @@ int kvm_dev_ioctl_check_extension(long ext) case KVM_CAP_PCI_SEGMENT: case KVM_CAP_DEBUGREGS: case KVM_CAP_X86_ROBUST_SINGLESTEP: + case KVM_CAP_XSAVE: r = 1; break; case KVM_CAP_COALESCED_MMIO: @@ -1703,6 +1704,9 @@ int kvm_dev_ioctl_check_extension(long ext) case KVM_CAP_MCE: r = KVM_MAX_MCE_BANKS; break; + case KVM_CAP_XCRS: + r = cpu_has_xsave; + break; default: r = 0; break; @@ -2355,6 +2359,77 @@ static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu, return 0; } +static void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu, +struct kvm_xsave *guest_xsave) +{ + if (cpu_has_xsave) + memcpy(guest_xsave-region, + vcpu-arch.guest_fpu.state-xsave, + sizeof(struct xsave_struct)); + else { + memcpy(guest_xsave-region, + vcpu-arch.guest_fpu.state-fxsave, + sizeof(struct i387_fxsave_struct)); + *(u64 *)guest_xsave-region[XSAVE_HDR_OFFSET / sizeof(u32)] = + XSTATE_FPSSE; + } +} + +static int kvm_vcpu_ioctl_x86_set_xsave(struct kvm_vcpu *vcpu, +
[COMMIT master] KVM: MMU: rename 'page' and 'shadow_page' to 'sp'
From: Xiao Guangrong xiaoguangr...@cn.fujitsu.com Rename 'page' and 'shadow_page' to 'sp' to better fit the context Signed-off-by: Xiao Guangrong xiaoguangr...@cn.fujitsu.com Signed-off-by: Marcelo Tosatti mtosa...@redhat.com diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 6cd318d..8d00bb2 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -253,7 +253,7 @@ err: return 0; } -static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page, +static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, u64 *spte, const void *pte) { pt_element_t gpte; @@ -264,7 +264,7 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page, gpte = *(const pt_element_t *)pte; if (~gpte (PT_PRESENT_MASK | PT_ACCESSED_MASK)) { if (!is_present_gpte(gpte)) { - if (page-unsync) + if (sp-unsync) new_spte = shadow_trap_nonpresent_pte; else new_spte = shadow_notrap_nonpresent_pte; @@ -273,7 +273,7 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page, return; } pgprintk(%s: gpte %llx spte %p\n, __func__, (u64)gpte, spte); - pte_access = page-role.access FNAME(gpte_access)(vcpu, gpte); + pte_access = sp-role.access FNAME(gpte_access)(vcpu, gpte); if (gpte_to_gfn(gpte) != vcpu-arch.update_pte.gfn) return; pfn = vcpu-arch.update_pte.pfn; @@ -286,7 +286,7 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page, * we call mmu_set_spte() with reset_host_protection = true beacuse that * vcpu-arch.update_pte.pfn was fetched from get_user_pages(write = 1). */ - mmu_set_spte(vcpu, spte, page-role.access, pte_access, 0, 0, + mmu_set_spte(vcpu, spte, sp-role.access, pte_access, 0, 0, gpte PT_DIRTY_MASK, NULL, PT_PAGE_TABLE_LEVEL, gpte_to_gfn(gpte), pfn, true, true); } @@ -300,7 +300,7 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, int *ptwrite, pfn_t pfn) { unsigned access = gw-pt_access; - struct kvm_mmu_page *shadow_page; + struct kvm_mmu_page *sp; u64 spte, *sptep = NULL; int direct; gfn_t table_gfn; @@ -341,30 +341,30 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, access = ~ACC_WRITE_MASK; /* * It is a large guest pages backed by small host pages, -* So we set @direct(@shadow_page-role.direct)=1, and -* set @table_gfn(@shadow_page-gfn)=the base page frame -* for linear translations. +* So we set @direct(@sp-role.direct)=1, and set +* @table_gfn(@sp-gfn)=the base page frame for linear +* translations. */ table_gfn = gw-gfn ~(KVM_PAGES_PER_HPAGE(level) - 1); } else { direct = 0; table_gfn = gw-table_gfn[level - 2]; } - shadow_page = kvm_mmu_get_page(vcpu, table_gfn, addr, level-1, + sp = kvm_mmu_get_page(vcpu, table_gfn, addr, level-1, direct, access, sptep); if (!direct) { r = kvm_read_guest_atomic(vcpu-kvm, gw-pte_gpa[level - 2], curr_pte, sizeof(curr_pte)); if (r || curr_pte != gw-ptes[level - 2]) { - kvm_mmu_put_page(shadow_page, sptep); + kvm_mmu_put_page(sp, sptep); kvm_release_pfn_clean(pfn); sptep = NULL; break; } } - spte = __pa(shadow_page-spt) + spte = __pa(sp-spt) | PT_PRESENT_MASK | PT_ACCESSED_MASK | PT_WRITABLE_MASK | PT_USER_MASK; *sptep = spte; -- To unsubscribe from this list: send the line unsubscribe kvm-commits in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[COMMIT master] KVM: MMU: clear unsync_child_bitmap completely
From: Xiao Guangrong xiaoguangr...@cn.fujitsu.com In current code, some page's unsync_child_bitmap is not cleared completely in mmu_sync_children(), for example, if two PDPEs shard one PDT, one of PDPE's unsync_child_bitmap is not cleared. Currently, it not harm anything just little overload, but it's the prepare work for the later patch Signed-off-by: Xiao Guangrong xiaoguangr...@cn.fujitsu.com Signed-off-by: Marcelo Tosatti mtosa...@redhat.com diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 6230c38..951af3a 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -1149,33 +1149,38 @@ static int __mmu_unsync_walk(struct kvm_mmu_page *sp, int i, ret, nr_unsync_leaf = 0; for_each_unsync_children(sp-unsync_child_bitmap, i) { + struct kvm_mmu_page *child; u64 ent = sp-spt[i]; - if (is_shadow_present_pte(ent) !is_large_pte(ent)) { - struct kvm_mmu_page *child; - child = page_header(ent PT64_BASE_ADDR_MASK); - - if (child-unsync_children) { - if (mmu_pages_add(pvec, child, i)) - return -ENOSPC; - - ret = __mmu_unsync_walk(child, pvec); - if (!ret) { - __clear_bit(i, sp-unsync_child_bitmap); - sp-unsync_children--; - WARN_ON((int)sp-unsync_children 0); - } else if (ret 0) - nr_unsync_leaf += ret; - else - return ret; - } + if (!is_shadow_present_pte(ent) || is_large_pte(ent)) + goto clear_child_bitmap; + + child = page_header(ent PT64_BASE_ADDR_MASK); + + if (child-unsync_children) { + if (mmu_pages_add(pvec, child, i)) + return -ENOSPC; + + ret = __mmu_unsync_walk(child, pvec); + if (!ret) + goto clear_child_bitmap; + else if (ret 0) + nr_unsync_leaf += ret; + else + return ret; + } else if (child-unsync) { + nr_unsync_leaf++; + if (mmu_pages_add(pvec, child, i)) + return -ENOSPC; + } else +goto clear_child_bitmap; - if (child-unsync) { - nr_unsync_leaf++; - if (mmu_pages_add(pvec, child, i)) - return -ENOSPC; - } - } + continue; + +clear_child_bitmap: + __clear_bit(i, sp-unsync_child_bitmap); + sp-unsync_children--; + WARN_ON((int)sp-unsync_children 0); } -- To unsubscribe from this list: send the line unsubscribe kvm-commits in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[COMMIT master] KVM: MMU: avoid double write protected in sync page path
From: Xiao Guangrong xiaoguangr...@cn.fujitsu.com The sync page is already write protected in mmu_sync_children(), don't write protected it again Signed-off-by: Xiao Guangrong xiaoguangr...@cn.fujitsu.com Signed-off-by: Marcelo Tosatti mtosa...@redhat.com diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index fcf..d60bf90 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -1216,6 +1216,7 @@ static void kvm_mmu_commit_zap_page(struct kvm *kvm, if ((sp)-gfn != (gfn) || (sp)-role.direct || \ (sp)-role.invalid) {} else +/* @sp-gfn should be write-protected at the call site */ static int __kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, struct list_head *invalid_list, bool clear_unsync) { @@ -1224,11 +1225,8 @@ static int __kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, return 1; } - if (clear_unsync) { - if (rmap_write_protect(vcpu-kvm, sp-gfn)) - kvm_flush_remote_tlbs(vcpu-kvm); + if (clear_unsync) kvm_unlink_unsync_page(vcpu-kvm, sp); - } if (vcpu-arch.mmu.sync_page(vcpu, sp)) { kvm_mmu_prepare_zap_page(vcpu-kvm, sp, invalid_list); -- To unsubscribe from this list: send the line unsubscribe kvm-commits in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[COMMIT master] KVM: MMU: cleanup for dirty page judgment
From: Xiao Guangrong xiaoguangr...@cn.fujitsu.com Using wrap function to cleanup page dirty judgment Signed-off-by: Xiao Guangrong xiaoguangr...@cn.fujitsu.com Signed-off-by: Marcelo Tosatti mtosa...@redhat.com diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 8d00bb2..876e705 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -287,7 +287,7 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, * vcpu-arch.update_pte.pfn was fetched from get_user_pages(write = 1). */ mmu_set_spte(vcpu, spte, sp-role.access, pte_access, 0, 0, -gpte PT_DIRTY_MASK, NULL, PT_PAGE_TABLE_LEVEL, +is_dirty_gpte(gpte), NULL, PT_PAGE_TABLE_LEVEL, gpte_to_gfn(gpte), pfn, true, true); } @@ -319,7 +319,7 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, mmu_set_spte(vcpu, sptep, access, gw-pte_access access, user_fault, write_fault, -gw-ptes[gw-level-1] PT_DIRTY_MASK, +is_dirty_gpte(gw-ptes[gw-level-1]), ptwrite, level, gw-gfn, pfn, false, true); break; -- To unsubscribe from this list: send the line unsubscribe kvm-commits in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[COMMIT master] KVM: MMU: cleanup for __mmu_unsync_walk()
From: Xiao Guangrong xiaoguangr...@cn.fujitsu.com Decrease sp-unsync_children after clear unsync_child_bitmap bit Signed-off-by: Xiao Guangrong xiaoguangr...@cn.fujitsu.com Signed-off-by: Marcelo Tosatti mtosa...@redhat.com diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 8bfcb32..6230c38 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -1160,9 +1160,11 @@ static int __mmu_unsync_walk(struct kvm_mmu_page *sp, return -ENOSPC; ret = __mmu_unsync_walk(child, pvec); - if (!ret) + if (!ret) { __clear_bit(i, sp-unsync_child_bitmap); - else if (ret 0) + sp-unsync_children--; + WARN_ON((int)sp-unsync_children 0); + } else if (ret 0) nr_unsync_leaf += ret; else return ret; @@ -1176,8 +1178,6 @@ static int __mmu_unsync_walk(struct kvm_mmu_page *sp, } } - if (find_first_bit(sp-unsync_child_bitmap, 512) == 512) - sp-unsync_children = 0; return nr_unsync_leaf; } -- To unsubscribe from this list: send the line unsubscribe kvm-commits in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[COMMIT master] Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6
From: Marcelo Tosatti mtosa...@redhat.com Signed-off-by: Marcelo Tosatti mtosa...@redhat.com -- To unsubscribe from this list: send the line unsubscribe kvm-commits in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[COMMIT master] KVM: Fix typos in Documentation/kvm/mmu.txt
From: Jason Wang jasow...@redhat.com Signed-off-by: Jason Wang jasow...@redhat.com Signed-off-by: Marcelo Tosatti mtosa...@redhat.com diff --git a/Documentation/kvm/mmu.txt b/Documentation/kvm/mmu.txt index 8cb42b9..142cc51 100644 --- a/Documentation/kvm/mmu.txt +++ b/Documentation/kvm/mmu.txt @@ -77,10 +77,10 @@ Memory Guest memory (gpa) is part of the user address space of the process that is using kvm. Userspace defines the translation between guest addresses and user -addresses (gpa-hva); note that two gpas may alias to the same gva, but not +addresses (gpa-hva); note that two gpas may alias to the same hva, but not vice versa. -These gvas may be backed using any method available to the host: anonymous +These hvas may be backed using any method available to the host: anonymous memory, file backed memory, and device memory. Memory might be paged by the host at any time. -- To unsubscribe from this list: send the line unsubscribe kvm-commits in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[COMMIT master] KVM: x86 emulator: fix pusha instruction emulation
From: Wei Yongjun yj...@cn.fujitsu.com emulate pusha instruction only writeback the last EDI register, but the other registers which need to be writeback is ignored. This patch fixed it. Signed-off-by: Wei Yongjun yj...@cn.fujitsu.com Signed-off-by: Marcelo Tosatti mtosa...@redhat.com diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index a4c2dcd..c990db0 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -1553,6 +1553,64 @@ exception: return X86EMUL_PROPAGATE_FAULT; } +static inline int writeback(struct x86_emulate_ctxt *ctxt, + struct x86_emulate_ops *ops) +{ + int rc; + struct decode_cache *c = ctxt-decode; + u32 err; + + switch (c-dst.type) { + case OP_REG: + /* The 4-byte case *is* correct: +* in 64-bit mode we zero-extend. +*/ + switch (c-dst.bytes) { + case 1: + *(u8 *)c-dst.ptr = (u8)c-dst.val; + break; + case 2: + *(u16 *)c-dst.ptr = (u16)c-dst.val; + break; + case 4: + *c-dst.ptr = (u32)c-dst.val; + break; /* 64b: zero-ext */ + case 8: + *c-dst.ptr = c-dst.val; + break; + } + break; + case OP_MEM: + if (c-lock_prefix) + rc = ops-cmpxchg_emulated( + (unsigned long)c-dst.ptr, + c-dst.orig_val, + c-dst.val, + c-dst.bytes, + err, + ctxt-vcpu); + else + rc = ops-write_emulated( + (unsigned long)c-dst.ptr, + c-dst.val, + c-dst.bytes, + err, + ctxt-vcpu); + if (rc == X86EMUL_PROPAGATE_FAULT) + emulate_pf(ctxt, + (unsigned long)c-dst.ptr, err); + if (rc != X86EMUL_CONTINUE) + return rc; + break; + case OP_NONE: + /* no writeback */ + break; + default: + break; + } + return X86EMUL_CONTINUE; +} + static inline void emulate_push(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) { @@ -1651,11 +1709,12 @@ static int emulate_pop_sreg(struct x86_emulate_ctxt *ctxt, return rc; } -static void emulate_pusha(struct x86_emulate_ctxt *ctxt, +static int emulate_pusha(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) { struct decode_cache *c = ctxt-decode; unsigned long old_esp = c-regs[VCPU_REGS_RSP]; + int rc = X86EMUL_CONTINUE; int reg = VCPU_REGS_RAX; while (reg = VCPU_REGS_RDI) { @@ -1663,8 +1722,18 @@ static void emulate_pusha(struct x86_emulate_ctxt *ctxt, (c-src.val = old_esp) : (c-src.val = c-regs[reg]); emulate_push(ctxt, ops); + + rc = writeback(ctxt, ops); + if (rc != X86EMUL_CONTINUE) + return rc; + ++reg; } + + /* Disable writeback. */ + c-dst.type = OP_NONE; + + return rc; } static int emulate_popa(struct x86_emulate_ctxt *ctxt, @@ -1817,64 +1886,6 @@ static int emulate_ret_far(struct x86_emulate_ctxt *ctxt, return rc; } -static inline int writeback(struct x86_emulate_ctxt *ctxt, - struct x86_emulate_ops *ops) -{ - int rc; - struct decode_cache *c = ctxt-decode; - u32 err; - - switch (c-dst.type) { - case OP_REG: - /* The 4-byte case *is* correct: -* in 64-bit mode we zero-extend. -*/ - switch (c-dst.bytes) { - case 1: - *(u8 *)c-dst.ptr = (u8)c-dst.val; - break; - case 2: - *(u16 *)c-dst.ptr = (u16)c-dst.val; - break; - case 4: - *c-dst.ptr = (u32)c-dst.val; - break; /* 64b: zero-ext */ - case 8: - *c-dst.ptr = c-dst.val; - break; - } - break; - case OP_MEM: - if (c-lock_prefix) - rc = ops-cmpxchg_emulated( - (unsigned long)c-dst.ptr, - c-dst.orig_val, -
[COMMIT master] KVM: x86 emulator: fix group3 instruction decoding
From: Wei Yongjun yj...@cn.fujitsu.com Group 3 instruction with ModRM reg field as 001 is defined as test instruction under AMD arch, and emulate_grp3() is ready for emulate it, so fix the decoding. static inline int emulate_grp3(...) { ... switch (c-modrm_reg) { case 0 ... 1: /* test */ emulate_2op_SrcV(test, c-src, c-dst, ctxt-eflags); ... } Signed-off-by: Wei Yongjun yj...@cn.fujitsu.com Signed-off-by: Avi Kivity a...@redhat.com diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index c990db0..abb8cec 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -336,11 +336,11 @@ static u32 group_table[] = { [Group1A*8] = DstMem | SrcNone | ModRM | Mov | Stack, 0, 0, 0, 0, 0, 0, 0, [Group3_Byte*8] = - ByteOp | SrcImm | DstMem | ModRM, 0, + ByteOp | SrcImm | DstMem | ModRM, ByteOp | SrcImm | DstMem | ModRM, ByteOp | DstMem | SrcNone | ModRM, ByteOp | DstMem | SrcNone | ModRM, 0, 0, 0, 0, [Group3*8] = - DstMem | SrcImm | ModRM, 0, + DstMem | SrcImm | ModRM, DstMem | SrcImm | ModRM, DstMem | SrcNone | ModRM, DstMem | SrcNone | ModRM, 0, 0, 0, 0, [Group4*8] = -- To unsubscribe from this list: send the line unsubscribe kvm-commits in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[COMMIT master] KVM: x86: Allow any LAPIC to accept PIC interrupts
From: Chris Lalancette clala...@redhat.com If the guest wants to accept timer interrupts on a CPU other than the BSP, we need to remove this gate. Signed-off-by: Chris Lalancette clala...@redhat.com Signed-off-by: Marcelo Tosatti mtosa...@redhat.com diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 024f6d1..49573c7 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -1107,13 +1107,11 @@ int kvm_apic_accept_pic_intr(struct kvm_vcpu *vcpu) u32 lvt0 = apic_get_reg(vcpu-arch.apic, APIC_LVT0); int r = 0; - if (kvm_vcpu_is_bsp(vcpu)) { - if (!apic_hw_enabled(vcpu-arch.apic)) - r = 1; - if ((lvt0 APIC_LVT_MASKED) == 0 - GET_APIC_DELIVERY_MODE(lvt0) == APIC_MODE_EXTINT) - r = 1; - } + if (!apic_hw_enabled(vcpu-arch.apic)) + r = 1; + if ((lvt0 APIC_LVT_MASKED) == 0 + GET_APIC_DELIVERY_MODE(lvt0) == APIC_MODE_EXTINT) + r = 1; return r; } -- To unsubscribe from this list: send the line unsubscribe kvm-commits in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[COMMIT master] net: fix deliver_no_wcard regression on loopback device
From: John Fastabend john.r.fastab...@intel.com deliver_no_wcard is not being set in skb_copy_header. In the skb_cloned case it is not being cleared and may cause the skb to be dropped when the loopback device pushes it back up the stack. Signed-off-by: John Fastabend john.r.fastab...@intel.com Acked-by: Eric Dumazet eric.duma...@gmail.com Tested-by: Markus Trippelsdorf mar...@trippelsdorf.de Signed-off-by: David S. Miller da...@davemloft.net diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 9f07e74..bcf2fa3 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -532,6 +532,7 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old) new-ip_summed = old-ip_summed; skb_copy_queue_mapping(new, old); new-priority = old-priority; + new-deliver_no_wcard = old-deliver_no_wcard; #if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE) new-ipvs_property = old-ipvs_property; #endif -- To unsubscribe from this list: send the line unsubscribe kvm-commits in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[COMMIT master] KVM: Fix xsave and xcr save/restore memory leak
From: Avi Kivity a...@redhat.com We allocate temporary kernel buffers for these structures, but never free them. Signed-off-by: Avi Kivity a...@redhat.com diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index d3d008e..d513e57 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2437,6 +2437,8 @@ long kvm_arch_vcpu_ioctl(struct file *filp, void __user *argp = (void __user *)arg; int r; struct kvm_lapic_state *lapic = NULL; + struct kvm_xsave *xsave = NULL; + struct kvm_xcrs *xcrs = NULL; switch (ioctl) { case KVM_GET_LAPIC: { @@ -2632,8 +2634,6 @@ long kvm_arch_vcpu_ioctl(struct file *filp, break; } case KVM_GET_XSAVE: { - struct kvm_xsave *xsave; - xsave = kzalloc(sizeof(struct kvm_xsave), GFP_KERNEL); r = -ENOMEM; if (!xsave) @@ -2648,8 +2648,6 @@ long kvm_arch_vcpu_ioctl(struct file *filp, break; } case KVM_SET_XSAVE: { - struct kvm_xsave *xsave; - xsave = kzalloc(sizeof(struct kvm_xsave), GFP_KERNEL); r = -ENOMEM; if (!xsave) @@ -2663,8 +2661,6 @@ long kvm_arch_vcpu_ioctl(struct file *filp, break; } case KVM_GET_XCRS: { - struct kvm_xcrs *xcrs; - xcrs = kzalloc(sizeof(struct kvm_xcrs), GFP_KERNEL); r = -ENOMEM; if (!xcrs) @@ -2680,8 +2676,6 @@ long kvm_arch_vcpu_ioctl(struct file *filp, break; } case KVM_SET_XCRS: { - struct kvm_xcrs *xcrs; - xcrs = kzalloc(sizeof(struct kvm_xcrs), GFP_KERNEL); r = -ENOMEM; if (!xcrs) @@ -2700,6 +2694,8 @@ long kvm_arch_vcpu_ioctl(struct file *filp, } out: kfree(lapic); + kfree(xsave); + kfree(xcrs); return r; } -- To unsubscribe from this list: send the line unsubscribe kvm-commits in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[COMMIT master] KVM: x86: In DM_LOWEST, only deliver interrupts to vcpus with enabled LAPIC's
From: Chris Lalancette clala...@redhat.com Otherwise we might try to deliver a timer interrupt to a cpu that can't possibly handle it. Signed-off-by: Chris Lalancette clala...@redhat.com Signed-off-by: Marcelo Tosatti mtosa...@redhat.com diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c index 52f412f..06cf61e 100644 --- a/virt/kvm/irq_comm.c +++ b/virt/kvm/irq_comm.c @@ -100,7 +100,7 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src, if (r 0) r = 0; r += kvm_apic_set_irq(vcpu, irq); - } else { + } else if (kvm_lapic_enabled(vcpu)) { if (!lowest) lowest = vcpu; else if (kvm_apic_compare_prio(vcpu, lowest) 0) -- To unsubscribe from this list: send the line unsubscribe kvm-commits in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[COMMIT master] device-assignment: Don't deassign when the assignment fails
From: Alex Williamson alex.william...@redhat.com The last thing assign_device() does is call into KVM_ASSIGN_PCI_DEVICE. If that fails, the device is not assigned, so we shouldn't then try to deassign it. If you try to assign the same device multiple times, you can get into a nasty fail-succeed-fail-succeed loop. And we certainly shouldn't take the assigned_out branch before we've even attempted to assign the device. Signed-off-by: Alex Williamson alex.william...@redhat.com Signed-off-by: Marcelo Tosatti mtosa...@redhat.com diff --git a/hw/device-assignment.c b/hw/device-assignment.c index 2b963b5..7e53a95 100644 --- a/hw/device-assignment.c +++ b/hw/device-assignment.c @@ -1407,12 +1407,12 @@ static int assigned_initfn(struct PCIDevice *pci_dev) if (pci_enable_capability_support(pci_dev, 0, NULL, assigned_device_pci_cap_write_config, assigned_device_pci_cap_init) 0) -goto assigned_out; +goto out; /* assign device to guest */ r = assign_device(dev); if (r 0) -goto assigned_out; +goto out; /* assign irq for the device */ r = assign_irq(dev); -- To unsubscribe from this list: send the line unsubscribe kvm-commits in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[COMMIT master] test: Add IDT framework
From: Avi Kivity a...@redhat.com Signed-off-by: Sheng Yang sh...@linux.intel.com Signed-off-by: Avi Kivity a...@redhat.com Signed-off-by: Marcelo Tosatti mtosa...@redhat.com diff --git a/kvm/test/config-x86-common.mak b/kvm/test/config-x86-common.mak index c97de52..800b635 100644 --- a/kvm/test/config-x86-common.mak +++ b/kvm/test/config-x86-common.mak @@ -59,6 +59,8 @@ $(TEST_DIR)/realmode.o: bits = 32 $(TEST_DIR)/msr.flat: $(cstart.o) $(TEST_DIR)/msr.o +$(TEST_DIR)/idt_test.flat: $(cstart.o) $(TEST_DIR)/idt.o $(TEST_DIR)/idt_test.o + arch_clean: $(RM) $(TEST_DIR)/*.o $(TEST_DIR)/*.flat \ $(TEST_DIR)/.*.d $(TEST_DIR)/lib/.*.d $(TEST_DIR)/lib/*.o diff --git a/kvm/test/config-x86_64.mak b/kvm/test/config-x86_64.mak index d8fd2b5..f9cd121 100644 --- a/kvm/test/config-x86_64.mak +++ b/kvm/test/config-x86_64.mak @@ -5,6 +5,6 @@ ldarch = elf64-x86-64 CFLAGS += -D__x86_64__ tests = $(TEST_DIR)/access.flat $(TEST_DIR)/apic.flat \ - $(TEST_DIR)/emulator.flat + $(TEST_DIR)/emulator.flat $(TEST_DIR)/idt_test.flat include config-x86-common.mak diff --git a/kvm/test/flat.lds b/kvm/test/flat.lds index 4120595..4888f3a 100644 --- a/kvm/test/flat.lds +++ b/kvm/test/flat.lds @@ -4,7 +4,12 @@ SECTIONS stext = .; .text : { *(.init) *(.text) *(.text.*) } . = ALIGN(4K); -.data : { *(.data) } +.data : { + *(.data) + exception_table_start = .; + *(.data.ex) + exception_table_end = .; + } . = ALIGN(16); .rodata : { *(.rodata) } . = ALIGN(16); diff --git a/kvm/test/lib/x86/idt.h b/kvm/test/lib/x86/idt.h new file mode 100644 index 000..6babcb4 --- /dev/null +++ b/kvm/test/lib/x86/idt.h @@ -0,0 +1,19 @@ +#ifndef __IDT_TEST__ +#define __IDT_TEST__ + +void setup_idt(void); + +#define ASM_TRY(catch) \ +movl $0, %%gs:4 \n\t \ +.pushsection .data.ex \n\t\ +.quad f, catch \n\t\ +.popsection \n\t \ +: + +#define UD_VECTOR 6 +#define GP_VECTOR 13 + +unsigned exception_vector(void); +unsigned exception_error_code(void); + +#endif diff --git a/kvm/test/x86/idt.c b/kvm/test/x86/idt.c new file mode 100644 index 000..999b3f0 --- /dev/null +++ b/kvm/test/x86/idt.c @@ -0,0 +1,150 @@ +#include idt.h +#include libcflat.h + +typedef struct { +unsigned short offset0; +unsigned short selector; +unsigned short ist : 3; +unsigned short : 5; +unsigned short type : 4; +unsigned short : 1; +unsigned short dpl : 2; +unsigned short p : 1; +unsigned short offset1; +unsigned offset2; +unsigned reserved; +} idt_entry_t; + +static idt_entry_t idt[256]; + +typedef struct { +unsigned short limit; +unsigned long linear_addr; +} __attribute__((packed)) descriptor_table_t; + +void lidt(idt_entry_t *idt, int nentries) +{ +descriptor_table_t dt; + +dt.limit = nentries * sizeof(*idt) - 1; +dt.linear_addr = (unsigned long)idt; +asm volatile (lidt %0 : : m(dt)); +} + +unsigned short read_cs() +{ +unsigned short r; + +asm volatile (mov %%cs, %0 : =r(r)); +return r; +} + +void memset(void *a, unsigned char v, int n) +{ +unsigned char *x = a; + +while (n--) + *x++ = v; +} + +void set_idt_entry(idt_entry_t *e, void *addr, int dpl) +{ +memset(e, 0, sizeof *e); +e-offset0 = (unsigned long)addr; +e-selector = read_cs(); +e-ist = 0; +e-type = 14; +e-dpl = dpl; +e-p = 1; +e-offset1 = (unsigned long)addr 16; +e-offset2 = (unsigned long)addr 32; +} + +struct ex_regs { +unsigned long rax, rcx, rdx, rbx; +unsigned long dummy, rbp, rsi, rdi; +unsigned long r8, r9, r10, r11; +unsigned long r12, r13, r14, r15; +unsigned long vector; +unsigned long error_code; +unsigned long rip; +unsigned long cs; +unsigned long rflags; +}; + +struct ex_record { +unsigned long rip; +unsigned long handler; +}; + +extern struct ex_record exception_table_start, exception_table_end; + +void do_handle_exception(struct ex_regs *regs) +{ +struct ex_record *ex; +unsigned ex_val; + +ex_val = regs-vector | (regs-error_code 16); + +asm(mov %0, %%gs:4 : : r(ex_val)); + +for (ex = exception_table_start; ex != exception_table_end; ++ex) { +if (ex-rip == regs-rip) { +regs-rip = ex-handler; +return; +} +} +printf(unhandled excecption\n); +exit(7); +} + +asm (.pushsection .text \n\t + ud_fault: \n\t + pushq $0 \n\t + pushq $6 \n\t + jmp handle_exception \n\t + + gp_fault: \n\t + pushq $13 \n\t + jmp handle_exception \n\t + + handle_exception: \n\t + push %r15; push %r14; push %r13; push %r12 \n\t + push %r11; push %r10; push %r9; push %r8 \n\t + push %rdi; push %rsi; push %rbp; sub $8, %rsp \n\t + push %rbx; push %rdx;
Re: [PATCH 1/2] Add 'serial' attribute to virtio-blk devices
Rusty Russell wrote: On Sat, 19 Jun 2010 04:08:02 am Ryan Harper wrote: Create a new attribute for virtio-blk devices that will fetch the serial number of the block device. This attribute can be used by udev to create disk/by-id symlinks for devices that don't have a UUID (filesystem) associated with them. ATA_IDENTIFY strings are special in that they can be up to 20 chars long and aren't required to be NULL-terminated. The buffer is also zero-padded meaning that if the serial is 19 chars or less that we get a NULL terminated string. When copying this value into a string buffer, we must be careful to copy up to the NULL (if it present) and only 20 if it is longer and not to attempt to NULL terminate; this isn't needed. Signed-off-by: Ryan Harper ry...@us.ibm.com Signed-off-by: john cooper john.coo...@redhat.com --- drivers/block/virtio_blk.c | 32 1 files changed, 32 insertions(+), 0 deletions(-) diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 258bc2a..f1ef26f 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -281,6 +281,31 @@ static int index_to_minor(int index) return index PART_BITS; } +/* Copy serial number from *s to *d. Copy operation terminates on either + * encountering a nul in *s or after n bytes have been copied, whichever + * occurs first. *d is not forcibly nul terminated. Return # of bytes copied. + */ +static inline int serial_sysfs(char *d, char *s, int n) +{ +char *di = d; + +while (*s n--) +*d++ = *s++; +return d - di; +} + +static ssize_t virtblk_serial_show(struct device *dev, +struct device_attribute *attr, char *buf) +{ +struct gendisk *disk = dev_to_disk(dev); +char id_str[VIRTIO_BLK_ID_BYTES]; + +if (IS_ERR(virtblk_get_id(disk, id_str))) +return 0; 0? Really? That doesn't seem very informative. Propagating a prospective error from virtblk_get_id() should be possible. Unsure if doing so is more useful from the user's perspective compared to just a nul id string. +return serial_sysfs(buf, id_str, min(VIRTIO_BLK_ID_BYTES, PAGE_SIZE)); How about something like this: BUILD_BUG_ON(PAGE_SIZE VIRTIO_BLK_ID_BYTES + 1); Agreed, that's a better wrench in the gearworks. Note padding buf[] by 1 isn't necessary as indicated below. /* id_str is not necessarily nul-terminated! */ buf[VIRTIO_BLK_ID_BYTES] = '\0'; return virtblk_get_id(disk, buf); The /sys file is rendered according to the length returned from this function and the trailing nul is not interpreted in this context. In fact if a nul is added and included in the byte count of the string it will appear in the /sys file. Thanks, -john -- john.coo...@redhat.com -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 1/2] KVM: Keep slot ID in memory slot structure
May be used for distinguishing between internal and user slots, or for sorting slots in size order. Signed-off-by: Avi Kivity a...@redhat.com --- include/linux/kvm_host.h |1 + virt/kvm/kvm_main.c |1 + 2 files changed, 2 insertions(+), 0 deletions(-) diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 2d96555..d84bf40 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -124,6 +124,7 @@ struct kvm_memory_slot { } *lpage_info[KVM_NR_PAGE_SIZES - 1]; unsigned long userspace_addr; int user_alloc; + int id; }; static inline unsigned long kvm_dirty_bitmap_bytes(struct kvm_memory_slot *memslot) diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 84a0906..add43a3 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -570,6 +570,7 @@ int __kvm_set_memory_region(struct kvm *kvm, new = old = *memslot; + new.id = mem-slot; new.base_gfn = base_gfn; new.npages = npages; new.flags = mem-flags; -- 1.7.1 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 2/2] KVM: Prevent internal slots from being COWed
If a process with a memory slot is COWed, the page will change its address (despite having an elevated reference count). This breaks internal memory slots which have their physical addresses loaded into vmcs registers (see the APIC access memory slot). Signed-off-by: Avi Kivity a...@redhat.com --- arch/x86/kvm/x86.c |5 + 1 files changed, 5 insertions(+), 0 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 33156a3..d9a33e6 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -5633,6 +5633,11 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, int user_alloc) { int npages = memslot-npages; + int map_flags = MAP_PRIVATE | MAP_ANONYMOUS; + + /* Prevent internal slot pages from being moved by fork()/COW. */ + if (memslot-id = KVM_MEMORY_SLOTS) + map_flags = MAP_SHARED | MAP_ANONYMOUS; /*To keep backward compatibility with older userspace, *x86 needs to hanlde !user_alloc case. -- 1.7.1 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 0/2] Fix failures caused by fork() interaction with internal slots
fork() has a WONTFIX bug where a page with an elevated reference count will be COWed such that the page address changes even in the process which has taken the reference. This interacts badly with internal memory slots that install pages in vmcs registers, such as the APIC access page. This patchset disables fork() for these slots. Avi Kivity (2): KVM: Keep slot ID in memory slot structure KVM: Prevent internal slots from being COWed arch/x86/kvm/x86.c |5 + include/linux/kvm_host.h |1 + virt/kvm/kvm_main.c |1 + 3 files changed, 7 insertions(+), 0 deletions(-) -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] Update .gitignore
On 06/21/2010 08:24 AM, Hidetoshi Seto wrote: I think some people have noticed that: $ ./configure $ make $ git status # On branch master # Untracked files: # (use git add file... to include in what will be committed) # # QMP/qmp-commands.txt # libdis-user/ # libdis/ # pc-bios/optionrom/vapic.bin nothing added to commit but untracked files present (use git add to track) Please consider applying this patch to qemu-kvm.git. This is equally applicable to qemu.git, so please sent it to the qemu mailing list, qemu-de...@nongnu.org. -- error compiling committee.c: too many arguments to function -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: kvm-s390: Dont exit SIE on SIGP sense running
On 06/18/2010 12:16 AM, Christian Borntraeger wrote: Avi, Marcello, Newer (guest) kernels use sigp sense running in their spinlock implementation to check if the other cpu is running before yielding the processor. This revealed some wrong guest settings, causing unnecessary exits for every sigp sense running. Applied, thanks. void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) { VCPU_EVENT(vcpu, 3, %s, free cpu); + clear_bit(63 - vcpu-vcpu_id, (unsigned long *)vcpu-kvm-arch.sca-mcn); if (vcpu-kvm-arch.sca-cpu[vcpu-vcpu_id].sda == (__u64) vcpu-arch.sie_block) vcpu-kvm-arch.sca-cpu[vcpu-vcpu_id].sda = 0; Unrelated, do these VCPU_EVENTs want to become ftrace tracepoints? -- error compiling committee.c: too many arguments to function -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 0/2] July 2010 feature removal
As advertised, two features are scheduled for removal now: aliases and kernel-allocated memory regions. Remove them. Avi Kivity (2): KVM: Remove memory alias support KVM: Remove kernel-allocated memory regions Documentation/feature-removal-schedule.txt | 21 Documentation/kvm/api.txt | 36 +--- arch/ia64/kvm/kvm-ia64.c |5 - arch/powerpc/kvm/powerpc.c |5 - arch/s390/kvm/kvm-s390.c |5 - arch/x86/include/asm/kvm_host.h| 21 arch/x86/kvm/mmu.c | 17 +--- arch/x86/kvm/paging_tmpl.h |3 +- arch/x86/kvm/x86.c | 141 arch/x86/kvm/x86.h |7 -- include/linux/kvm.h|1 + include/linux/kvm_host.h |6 - virt/kvm/kvm_main.c| 18 +--- 13 files changed, 12 insertions(+), 274 deletions(-) -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 2/2] KVM: Remove kernel-allocated memory regions
Equivalent (and better) functionality is provided by user-allocated memory regions. Signed-off-by: Avi Kivity a...@redhat.com --- Documentation/feature-removal-schedule.txt | 10 -- Documentation/kvm/api.txt | 24 +--- arch/x86/kvm/x86.c | 16 3 files changed, 1 insertions(+), 49 deletions(-) diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt index e0ffe8d..c8bc454 100644 --- a/Documentation/feature-removal-schedule.txt +++ b/Documentation/feature-removal-schedule.txt @@ -548,16 +548,6 @@ Who: John Stultz johns...@us.ibm.com -What: KVM kernel-allocated memory slots -When: July 2010 -Why: Since 2.6.25, kvm supports user-allocated memory slots, which are - much more flexible than kernel-allocated slots. All current userspace - supports the newer interface and this code can be removed with no - impact. -Who: Avi Kivity a...@redhat.com - - - What: KVM paravirt mmu host support When: January 2011 Why: The paravirt mmu host support is slower than non-paravirt mmu, both diff --git a/Documentation/kvm/api.txt b/Documentation/kvm/api.txt index 7e41594..d9b00f1 100644 --- a/Documentation/kvm/api.txt +++ b/Documentation/kvm/api.txt @@ -160,29 +160,7 @@ Type: vm ioctl Parameters: struct kvm_memory_region (in) Returns: 0 on success, -1 on error -struct kvm_memory_region { - __u32 slot; - __u32 flags; - __u64 guest_phys_addr; - __u64 memory_size; /* bytes */ -}; - -/* for kvm_memory_region::flags */ -#define KVM_MEM_LOG_DIRTY_PAGES 1UL - -This ioctl allows the user to create or modify a guest physical memory -slot. When changing an existing slot, it may be moved in the guest -physical memory space, or its flags may be modified. It may not be -resized. Slots may not overlap. - -The flags field supports just one flag, KVM_MEM_LOG_DIRTY_PAGES, which -instructs kvm to keep track of writes to memory within the slot. See -the KVM_GET_DIRTY_LOG ioctl. - -It is recommended to use the KVM_SET_USER_MEMORY_REGION ioctl instead -of this API, if available. This newer API allows placing guest memory -at specified locations in the host address space, yielding better -control and easy access. +This ioctl is obsolete and has been removed. 4.6 KVM_CREATE_VCPU diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 7d7558e..a962307 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2967,22 +2967,6 @@ long kvm_arch_vm_ioctl(struct file *filp, goto out; break; } - case KVM_SET_MEMORY_REGION: { - struct kvm_memory_region kvm_mem; - struct kvm_userspace_memory_region kvm_userspace_mem; - - r = -EFAULT; - if (copy_from_user(kvm_mem, argp, sizeof kvm_mem)) - goto out; - kvm_userspace_mem.slot = kvm_mem.slot; - kvm_userspace_mem.flags = kvm_mem.flags; - kvm_userspace_mem.guest_phys_addr = kvm_mem.guest_phys_addr; - kvm_userspace_mem.memory_size = kvm_mem.memory_size; - r = kvm_vm_ioctl_set_memory_region(kvm, kvm_userspace_mem, 0); - if (r) - goto out; - break; - } case KVM_SET_NR_MMU_PAGES: r = kvm_vm_ioctl_set_nr_mmu_pages(kvm, arg); if (r) -- 1.7.1 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 1/2] KVM: Remove memory alias support
As advertised in feature-removal-schedule.txt. Equivalent support is provided by overlapping memory regions. Signed-off-by: Avi Kivity a...@redhat.com --- Documentation/feature-removal-schedule.txt | 11 --- Documentation/kvm/api.txt | 12 +--- arch/ia64/kvm/kvm-ia64.c |5 - arch/powerpc/kvm/powerpc.c |5 - arch/s390/kvm/kvm-s390.c |5 - arch/x86/include/asm/kvm_host.h| 21 - arch/x86/kvm/mmu.c | 17 +--- arch/x86/kvm/paging_tmpl.h |3 +- arch/x86/kvm/x86.c | 125 arch/x86/kvm/x86.h |7 -- include/linux/kvm.h|1 + include/linux/kvm_host.h |6 -- virt/kvm/kvm_main.c| 18 +--- 13 files changed, 11 insertions(+), 225 deletions(-) diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt index c268783..e0ffe8d 100644 --- a/Documentation/feature-removal-schedule.txt +++ b/Documentation/feature-removal-schedule.txt @@ -538,17 +538,6 @@ Who: Jan Kiszka jan.kis...@web.de -What: KVM memory aliases support -When: July 2010 -Why: Memory aliasing support is used for speeding up guest vga access - through the vga windows. - - Modern userspace no longer uses this feature, so it's just bitrotted - code and can be removed with no impact. -Who: Avi Kivity a...@redhat.com - - - What: xtime, wall_to_monotonic When: 2.6.36+ Files: kernel/time/timekeeping.c include/linux/time.h diff --git a/Documentation/kvm/api.txt b/Documentation/kvm/api.txt index ffba03f..7e41594 100644 --- a/Documentation/kvm/api.txt +++ b/Documentation/kvm/api.txt @@ -226,17 +226,7 @@ Type: vm ioctl Parameters: struct kvm_memory_alias (in) Returns: 0 (success), -1 (error) -struct kvm_memory_alias { - __u32 slot; /* this has a different namespace than memory slots */ - __u32 flags; - __u64 guest_phys_addr; - __u64 memory_size; - __u64 target_phys_addr; -}; - -Defines a guest physical address space region as an alias to another -region. Useful for aliased address, for example the VGA low memory -window. Should not be used with userspace memory. +This ioctl is obsolete and has been removed. 4.9 KVM_RUN diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c index 91760e8..bd510be 100644 --- a/arch/ia64/kvm/kvm-ia64.c +++ b/arch/ia64/kvm/kvm-ia64.c @@ -1946,11 +1946,6 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu) return vcpu-arch.timer_fired; } -gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn) -{ - return gfn; -} - int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) { return (vcpu-arch.mp_state == KVM_MP_STATE_RUNNABLE) || diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index b5ebdfb..72a4ad8 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -36,11 +36,6 @@ #define CREATE_TRACE_POINTS #include trace.h -gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn) -{ - return gfn; -} - int kvm_arch_vcpu_runnable(struct kvm_vcpu *v) { return !(v-arch.msr MSR_WE) || !!(v-arch.pending_exceptions); diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index da1508d..47acdb1 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -723,11 +723,6 @@ void kvm_arch_flush_shadow(struct kvm *kvm) { } -gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn) -{ - return gfn; -} - static int __init kvm_s390_init(void) { int ret; diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 2ec2e27..a57cdea 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -69,8 +69,6 @@ #define IOPL_SHIFT 12 -#define KVM_ALIAS_SLOTS 4 - #define KVM_PERMILLE_MMU_PAGES 20 #define KVM_MIN_ALLOC_MMU_PAGES 64 #define KVM_MMU_HASH_SHIFT 10 @@ -362,24 +360,7 @@ struct kvm_vcpu_arch { u64 hv_vapic; }; -struct kvm_mem_alias { - gfn_t base_gfn; - unsigned long npages; - gfn_t target_gfn; -#define KVM_ALIAS_INVALID 1UL - unsigned long flags; -}; - -#define KVM_ARCH_HAS_UNALIAS_INSTANTIATION - -struct kvm_mem_aliases { - struct kvm_mem_alias aliases[KVM_ALIAS_SLOTS]; - int naliases; -}; - struct kvm_arch { - struct kvm_mem_aliases *aliases; - unsigned int n_free_mmu_pages; unsigned int n_requested_mmu_pages; unsigned int n_alloc_mmu_pages; @@ -655,8 +636,6 @@ void kvm_disable_tdp(void); int complete_pio(struct kvm_vcpu *vcpu); bool kvm_check_iopl(struct kvm_vcpu *vcpu); -struct kvm_memory_slot *gfn_to_memslot_unaliased(struct kvm *kvm, gfn_t gfn); - static inline struct kvm_mmu_page *page_header(hpa_t shadow_page) { struct page
[PATCH] do not enter vcpu again if it was stopped during IO
To prevent reentering vcpu after IO completion it is not enough to set env-stopped since it is checked only in main loop but control will not get there until next non-IO exit since kvm_run() will reenter vcpu to complete IO instruction. Solve this by sending self-signal to request exit after IO instruction completion. Signed-off-by: Gleb Natapov g...@redhat.com diff --git a/qemu-kvm.c b/qemu-kvm.c index be1dac2..4f7cf6d 100644 --- a/qemu-kvm.c +++ b/qemu-kvm.c @@ -603,6 +603,10 @@ int kvm_run(CPUState *env) r = pre_kvm_run(kvm, env); if (r) return r; +if (env-exit_request) { +env-exit_request = 0; +pthread_kill(env-kvm_cpu_state.thread, SIG_IPI); +} r = ioctl(fd, KVM_RUN, 0); if (r == -1 errno != EINTR errno != EAGAIN) { diff --git a/vl.c b/vl.c index 9e9c176..dcfab13 100644 --- a/vl.c +++ b/vl.c @@ -1817,6 +1817,7 @@ void qemu_system_reset_request(void) } if (cpu_single_env) { cpu_single_env-stopped = 1; +cpu_exit(cpu_single_env); } qemu_notify_event(); } -- Gleb. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH] QEMU: Update .gitignore
(2010/06/21 17:19), Avi Kivity wrote: On 06/21/2010 08:24 AM, Hidetoshi Seto wrote: I think some people have noticed that: $ ./configure $ make $ git status # On branch master # Untracked files: # (use git add file... to include in what will be committed) # # QMP/qmp-commands.txt # libdis-user/ # libdis/ # pc-bios/optionrom/vapic.bin nothing added to commit but untracked files present (use git add to track) Please consider applying this patch to qemu-kvm.git. This is equally applicable to qemu.git, so please sent it to the qemu mailing list, qemu-de...@nongnu.org. Thanks for your advice, Avi. Now this mail is sent to qemu ML, w/ above quotes as short history. Could someone pick this up? Thanks, H.Seto = Subject: [PATCH] QEMU: Update .gitignore Add some files/directories to .gitignore - vapic.bin A generated binary file. - libdis/ and libdis-user/ These are directories generated by ./configure. - QMP/qmp-commands.txt A generated text. Signed-off-by: Hidetoshi Seto seto.hideto...@jp.fujitsu.com --- .gitignore |4 1 files changed, 4 insertions(+), 0 deletions(-) diff --git a/.gitignore b/.gitignore index 2d7f439..fa4f241 100644 --- a/.gitignore +++ b/.gitignore @@ -9,6 +9,8 @@ config-target.* libhw32 libhw64 libuser +libdis +libdis-user qemu-doc.html qemu-tech.html qemu-doc.info @@ -26,6 +28,7 @@ qemu-img-cmds.texi qemu-img-cmds.h qemu-io qemu-monitor.texi +QMP/qmp-commands.txt .gdbinit *.a *.aux @@ -50,4 +53,5 @@ pc-bios/optionrom/linuxboot.bin pc-bios/optionrom/multiboot.bin pc-bios/optionrom/multiboot.raw pc-bios/optionrom/extboot.bin +pc-bios/optionrom/vapic.bin .stgit-* -- 1.7.0 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH V2 1/5] ara virt interface of perf to support kvm guest os statistics collection in guest os
Here is the version 2. ChangeLog since V1: Mostly changes based on Avi's suggestions. 1) Use a id to identify the perf_event between host and guest; 2) Changes lots of codes to deal with malicious guest os; 3) Add a perf_event number limitation per gust os instance; 4) Support guest os on the top of another guest os scenario. But I didn't test it yet as there is no environment. The design is to add 2 pointers in struct perf_event. One is used by host and the other is used by guest. 5) Fix the bug to support 'perf stat'. The key is sync count data back to guest when guest tries to disable the perf_event at host side. 6) Add a clear ABI of PV perf. I don't implement live migration feature. Avi, Is live migration necessary on pv perf support? Based on Ingo's idea, I implement a para virt interface for perf to support statistics collection in guest os. That means we could run tool perf in guest os directly. Great thanks to Peter Zijlstra. He is really the architect and gave me architecture design suggestions. I also want to thank Yangsheng and LinMing for their generous help. The design is: 1) Add a kvm_pmu whose callbacks mostly just calls hypercall to vmexit to host kernel; 2) Create a host perf_event per guest perf_event; 3) Host kernel syncs perf_event count/overflows data changes to guest perf_event when processing perf_event overflows after NMI arrives. Host kernel inject NMI to guest kernel if a guest event overflows. 4) Guest kernel goes through all enabled event on current cpu and output data when they overflows. 5) No change in user space. Below is an example. #perf top -- PerfTop:7954 irqs/sec kernel:79.5% exact: 0.0% [1000Hz cycles], (all, 8 CPUs) -- samples pcnt function DSO ___ _ _ 5315.00 4.9% copy_user_generic_string /lib/modules/2.6.35-rc1-tip-guestperf/build/vmlinux 3342.00 3.1% add_preempt_count /lib/modules/2.6.35-rc1-tip-guestperf/build/vmlinux 3338.00 3.1% sub_preempt_count /lib/modules/2.6.35-rc1-tip-guestperf/build/vmlinux 2454.00 2.3% pvclock_clocksource_read /lib/modules/2.6.35-rc1-tip-guestperf/build/vmlinux 2434.00 2.3% tcp_sendmsg /lib/modules/2.6.35-rc1-tip-guestperf/build/vmlinux 2090.00 1.9% child_run /bm/tmp/benchmarks/run_bmtbench/dbench/dbench-3.03/tbench 2081.00 1.9% debug_smp_processor_id /lib/modules/2.6.35-rc1-tip-guestperf/build/vmlinux 2003.00 1.9% __GI_strstr /lib64/libc-2.11.so 1999.00 1.9% __strchr_sse2/lib64/libc-2.11.so 1983.00 1.8% tcp_ack /lib/modules/2.6.35-rc1-tip-guestperf/build/vmlinux 1800.00 1.7% tcp_transmit_skb /lib/modules/2.6.35-rc1-tip-guestperf/build/vmlinux 1727.00 1.6% schedule /lib/modules/2.6.35-rc1-tip-guestperf/build/vmlinux 1706.00 1.6% __libc_recv /lib64/libc-2.11.so 1702.00 1.6% __GI_memchr /lib64/libc-2.11.so 1580.00 1.5% tcp_recvmsg /lib/modules/2.6.35-rc1-tip-guestperf/build/vmlinux The patch is against tip/master tree of June 20st. Signed-off-by: Zhang Yanmin yanmin_zh...@linux.intel.com --- --- linux-2.6_tip0620/Documentation/kvm/paravirt-perf.txt 1970-01-01 08:00:00.0 +0800 +++ linux-2.6_tip0620perfkvm/Documentation/kvm/paravirt-perf.txt 2010-06-21 15:21:39.312999849 +0800 @@ -0,0 +1,133 @@ +The x86 kvm paravirt perf event interface +=== + +This paravirt interface is responsible for supporting guest os perf event +collections. If guest os supports this interface, users could run command +perf in guest os directly. + +Design + + +Guest os calls a series of hypercalls to communicate with host kernel to +create/enable/disable/close perf events. Host kernel notifies guest os +by injecting an NMI to guest os when an event overflows. Guets os need +go through all its active events to check if they overflow, and output +performance statistics if they do. + +ABI += + +1) Detect if host kernel supports paravirt perf interface: +#define KVM_FEATURE_PV_PERF 4 +Host kernel defines above cpuid bit. Guest os calls
[PATCH V2 2/5] ara virt interface of perf to support kvm guest os statistics collection in guest os
The 2nd patch is to change the definition of perf_event to facilitate perf attr copy when a hypercall happens. Signed-off-by: Zhang Yanmin yanmin_zh...@linux.intel.com --- --- linux-2.6_tip0620/include/linux/perf_event.h2010-06-21 15:19:52.821999849 +0800 +++ linux-2.6_tip0620perfkvm/include/linux/perf_event.h 2010-06-21 16:53:49.283999849 +0800 @@ -188,7 +188,10 @@ struct perf_event_attr { __u64 sample_type; __u64 read_format; - __u64 disabled : 1, /* off by default*/ + union { + __u64 flags; + struct { + __u64 disabled : 1, /* off by default*/ inherit: 1, /* children inherit it */ pinned : 1, /* must always be on PMU */ exclusive : 1, /* only group on PMU */ @@ -217,6 +220,8 @@ struct perf_event_attr { mmap_data : 1, /* non-exec mmap data*/ __reserved_1 : 46; + }; + }; union { __u32 wakeup_events;/* wakeup every n events */ @@ -465,12 +470,6 @@ enum perf_callchain_context { # include asm/local64.h #endif -struct perf_guest_info_callbacks { - int (*is_in_guest) (void); - int (*is_user_mode) (void); - unsigned long (*get_guest_ip) (void); -}; - #ifdef CONFIG_HAVE_HW_BREAKPOINT #include asm/hw_breakpoint.h #endif @@ -753,6 +752,20 @@ struct perf_event { perf_overflow_handler_t overflow_handler; + /* +* pointers used by kvm perf paravirt interface. +* +* 1) Used in host kernel and points to host_perf_shadow which +* has information about guest perf_event +*/ + void*host_perf_shadow; + /* +* 2) Used in guest kernel and points to guest_perf_shadow which +* is used as a communication area with host kernel. Host kernel +* copies overflow data to it when an event overflows. +*/ + void*guest_perf_shadow; + #ifdef CONFIG_EVENT_TRACING struct ftrace_event_call*tp_event; struct event_filter *filter; @@ -838,6 +851,16 @@ struct perf_output_handle { int sample; }; +struct perf_guest_info_callbacks { + /* Support collect guest statistics from host side */ + int (*is_in_guest) (void); + int (*is_user_mode) (void); + unsigned long (*get_guest_ip) (void); + + /* Support paravirt interface */ + void (*copy_event_to_shadow) (struct perf_event *event, int overflows); +}; + #ifdef CONFIG_PERF_EVENTS /* @@ -871,6 +894,10 @@ perf_event_create_kernel_counter(struct perf_overflow_handler_t callback); extern u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running); +extern void perf_event_output(struct perf_event *event, int nmi, + struct perf_sample_data *data, struct pt_regs *regs); +void perf_event_attach(struct perf_event *event); +void perf_event_detach(struct perf_event *event); struct perf_sample_data { u64 type; @@ -1023,6 +1050,14 @@ perf_event_task_sched_in(struct task_str static inline void perf_event_task_sched_out(struct task_struct *task, struct task_struct *next) { } + +static inline void +perf_event_output(struct perf_event *event, int nmi, + struct perf_sample_data *data, struct pt_regs *regs){ } + +static inline void perf_event_attach(struct perf_event *event) { } +static inline void perf_event_detach(struct perf_event *event) { } + static inline void perf_event_task_tick(struct task_struct *task) { } static inline int perf_event_init_task(struct task_struct *child) { return 0; } --- linux-2.6_tip0620/kernel/watchdog.c 2010-06-21 15:20:48.517999849 +0800 +++ linux-2.6_tip0620perfkvm/kernel/watchdog.c 2010-06-21 15:21:39.315999849 +0800 @@ -197,8 +197,6 @@ static struct perf_event_attr wd_hw_attr .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES, .size = sizeof(struct perf_event_attr), - .pinned = 1, - .disabled = 1, }; /* Callback function for perf event subsystem */ @@ -361,6 +359,8 @@ static int watchdog_nmi_enable(int cpu) /* Try to register using hardware perf events */ wd_attr = wd_hw_attr; wd_attr-sample_period = hw_nmi_get_sample_period(); + wd_attr-pinned = 1; + wd_attr-disabled = 1; event = perf_event_create_kernel_counter(wd_attr, cpu, -1,
[PATCH V2 3/5] ara virt interface of perf to support kvm guest os statistics collection in guest os
The 3rd patch is to implement para virt perf at host kernel. Signed-off-by: Zhang Yanmin yanmin_zh...@linux.intel.com --- --- linux-2.6_tip0620/arch/x86/include/asm/kvm_para.h 2010-06-21 15:19:38.992999849 +0800 +++ linux-2.6_tip0620perfkvm/arch/x86/include/asm/kvm_para.h2010-06-21 15:21:39.308999849 +0800 @@ -2,6 +2,7 @@ #define _ASM_X86_KVM_PARA_H #include linux/types.h +#include linux/list.h #include asm/hyperv.h /* This CPUID returns the signature 'KVMKVMKVM' in ebx, ecx, and edx. It @@ -19,7 +20,8 @@ /* This indicates that the new set of kvmclock msrs * are available. The use of 0x11 and 0x12 is deprecated */ -#define KVM_FEATURE_CLOCKSOURCE23 +#define KVM_FEATURE_CLOCKSOURCE2 3 +#define KVM_FEATURE_PV_PERF4 /* The last 8 bits are used to indicate how to interpret the flags field * in pvclock structure. If no bits are set, all flags are ignored. @@ -33,7 +35,14 @@ #define MSR_KVM_WALL_CLOCK_NEW 0x4b564d00 #define MSR_KVM_SYSTEM_TIME_NEW 0x4b564d01 -#define KVM_MAX_MMU_OP_BATCH 32 +#define KVM_MAX_MMU_OP_BATCH 32 + +/* Operations for KVM_PERF_OP */ +#define KVM_PERF_OP_OPEN 1 +#define KVM_PERF_OP_CLOSE 2 +#define KVM_PERF_OP_ENABLE 3 +#define KVM_PERF_OP_DISABLE4 +#define KVM_PERF_OP_READ 5 /* Operations for KVM_HC_MMU_OP */ #define KVM_MMU_OP_WRITE_PTE1 @@ -64,6 +73,85 @@ struct kvm_mmu_op_release_pt { #ifdef __KERNEL__ #include asm/processor.h +/* + * data communication area about perf_event between + * Host kernel and guest kernel + */ +struct guest_perf_event { + u64 count; + atomic_t overflows; +}; + +/* + * In host kernel, perf_event-host_perf_shadow points to + * host_perf_shadow which records some information + * about the guest. + */ +struct host_perf_shadow { + /* guest perf_event id passed from guest os */ + int id; + /* +* Host kernel saves data into data member counter firstly. +* kvm will get data from this counter and calls kvm functions +* to copy or add data back to guets os before entering guest os +* next time +*/ + struct guest_perf_event counter; + /* guest_event_addr is gpa_t pointing to guest os guest_perf_event*/ + __u64 guest_event_addr; + + /* +* Link to of kvm.kvm_arch.shadow_hash_table +*/ + struct list_head shadow_entry; + struct kvm_vcpu *vcpu; + + struct perf_event *host_event; + /* +* Below counter is to prevent malicious guest os to try to +* close/enable event at the same time. +*/ + atomic_t ref_counter; +}; + +/* + * In guest kernel, perf_event-guest_shadow points to + * guest_perf_shadow which records some information + * about the guest. + */ +struct guest_perf_shadow { + /* guest perf_event id passed from guest os */ + int id; + /* +* Host kernel kvm saves data into data member counter +*/ + struct guest_perf_event counter; +}; + +/* + * guest_perf_attr is used when guest calls hypercall to + * open a new perf_event at host side. Mostly, it's a copy of + * perf_event_attr and deletes something not used by host kernel. + */ +struct guest_perf_attr { + __u32 type; + __u64 config; + __u64 sample_period; + __u64 sample_type; + __u64 read_format; + __u64 flags; + __u32 bp_type; + __u64 bp_addr; + __u64 bp_len; +}; + +struct guest_perf_event_param { + __u64 attr_addr; + __u64 guest_event_addr; + /* In case there is an alignment issue, we put id as the last one */ + int id; +}; + extern void kvmclock_init(void); --- linux-2.6_tip0620/arch/x86/include/asm/kvm_host.h 2010-06-21 15:19:39.01849 +0800 +++ linux-2.6_tip0620perfkvm/arch/x86/include/asm/kvm_host.h2010-06-21 15:21:39.308999849 +0800 @@ -24,6 +24,7 @@ #include asm/desc.h #include asm/mtrr.h #include asm/msr-index.h +#include asm/perf_event.h #define KVM_MAX_VCPUS 64 #define KVM_MEMORY_SLOTS 32 @@ -360,6 +361,18 @@ struct kvm_vcpu_arch { /* fields used by HYPER-V emulation */ u64 hv_vapic; + + /* +* Fields used by PARAVIRT perf interface: +* +* kvm checks overflow_events before entering guest os, +* and copy data back to guest os. +* event_mutex is to avoid a race between NMI perf event overflow +* handler, event close, and enable/disable. +*/ + struct mutex event_mutex; + int overflows; + struct perf_event *overflow_events[X86_PMC_IDX_MAX]; }; struct kvm_mem_alias { @@ -377,6 +390,9 @@ struct kvm_mem_aliases { int naliases; }; +#define KVM_PARAVIRT_PERF_EVENT_ENTRY_BITS (10)
[PATCH V2 5/5] ara virt interface of perf to support kvm guest os statistics collection in guest os
The 5th patch is applied to the latest qemu-kvm tree. Signed-off-by: Zhang Yanmin yanmin_zh...@linux.intel.com --- diff -Nraup qemu-kvm_0621/kvm/include/linux/kvm.h qemu-kvm_0621_perf/kvm/include/linux/kvm.h --- qemu-kvm_0621/kvm/include/linux/kvm.h 2010-06-21 11:00:28.0 +0800 +++ qemu-kvm_0621_perf/kvm/include/linux/kvm.h 2010-06-21 13:23:51.537999849 +0800 @@ -530,6 +530,7 @@ struct kvm_enable_cap { #ifdef __KVM_HAVE_XCRS #define KVM_CAP_XCRS 56 #endif +#define KVM_CAP_PV_PERF 57 #ifdef KVM_CAP_IRQ_ROUTING diff -Nraup qemu-kvm_0621/kvm/include/x86/asm/kvm_para.h qemu-kvm_0621_perf/kvm/include/x86/asm/kvm_para.h --- qemu-kvm_0621/kvm/include/x86/asm/kvm_para.h2010-06-21 11:00:28.0 +0800 +++ qemu-kvm_0621_perf/kvm/include/x86/asm/kvm_para.h 2010-06-21 13:27:04.375999849 +0800 @@ -15,6 +15,7 @@ #define KVM_FEATURE_CLOCKSOURCE0 #define KVM_FEATURE_NOP_IO_DELAY 1 #define KVM_FEATURE_MMU_OP 2 +#define KVM_FEATURE_PV_PERF4 #define MSR_KVM_WALL_CLOCK 0x11 #define MSR_KVM_SYSTEM_TIME 0x12 diff -Nraup qemu-kvm_0621/target-i386/kvm.c qemu-kvm_0621_perf/target-i386/kvm.c --- qemu-kvm_0621/target-i386/kvm.c 2010-06-21 11:00:29.0 +0800 +++ qemu-kvm_0621_perf/target-i386/kvm.c2010-06-21 13:00:14.136999850 +0800 @@ -150,6 +150,9 @@ struct kvm_para_features { #ifdef KVM_CAP_PV_MMU { KVM_CAP_PV_MMU, KVM_FEATURE_MMU_OP }, #endif +#ifdef KVM_CAP_PV_PERF +{ KVM_CAP_PV_PERF, KVM_FEATURE_PV_PERF }, +#endif { -1, -1 } }; -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH V2 4/5] ara virt interface of perf to support kvm guest os statistics collection in guest os
The 4th patch is to implement para virt perf at guest side. Signed-off-by: Zhang Yanmin yanmin_zh...@linux.intel.com --- --- linux-2.6_tip0620/arch/x86/Kconfig 2010-06-21 15:19:39.180999849 +0800 +++ linux-2.6_tip0620perfkvm/arch/x86/Kconfig 2010-06-21 15:21:39.30849 +0800 @@ -552,6 +552,14 @@ config KVM_GUEST This option enables various optimizations for running under the KVM hypervisor. +config KVM_PERF + bool KVM Guest perf support + select PARAVIRT + select PERF_EVENT + ---help--- + This option enables various optimizations for running perf in + guest os under the KVM hypervisor. + source arch/x86/lguest/Kconfig config PARAVIRT --- linux-2.6_tip0620/arch/x86/kernel/cpu/perf_event.c 2010-06-21 15:19:39.964999849 +0800 +++ linux-2.6_tip0620perfkvm/arch/x86/kernel/cpu/perf_event.c 2010-06-21 16:44:36.602999849 +0800 @@ -25,6 +25,7 @@ #include linux/highmem.h #include linux/cpu.h #include linux/bitops.h +#include linux/kvm_para.h #include asm/apic.h #include asm/stacktrace.h @@ -583,10 +584,20 @@ static void x86_pmu_disable_all(void) } } +#ifdef CONFIG_KVM_PERF +static int kvm_hw_perf_enable(void); +static int kvm_hw_perf_disable(void); +#endif + void hw_perf_disable(void) { struct cpu_hw_events *cpuc = __get_cpu_var(cpu_hw_events); +#ifdef CONFIG_KVM_PERF + if (!kvm_hw_perf_disable()) + return; +#endif + if (!x86_pmu_initialized()) return; @@ -810,6 +821,11 @@ void hw_perf_enable(void) struct hw_perf_event *hwc; int i, added = cpuc-n_added; +#ifdef CONFIG_KVM_PERF + if (!kvm_hw_perf_enable()) + return; +#endif + if (!x86_pmu_initialized()) return; @@ -1264,6 +1280,7 @@ x86_get_event_constraints(struct cpu_hw_ #include perf_event_intel_lbr.c #include perf_event_intel_ds.c #include perf_event_intel.c +#include perf_event_kvm.c static int __cpuinit x86_pmu_notifier(struct notifier_block *self, unsigned long action, void *hcpu) @@ -1317,6 +1334,11 @@ void __init init_hw_perf_events(void) pr_info(Performance Events: ); +#ifdef CONFIG_KVM_PERF + if (!kvm_init_hw_perf_events()) + return; +#endif + switch (boot_cpu_data.x86_vendor) { case X86_VENDOR_INTEL: err = intel_pmu_init(); @@ -1541,6 +1563,13 @@ const struct pmu *hw_perf_event_init(str const struct pmu *tmp; int err; +#ifdef CONFIG_KVM_PERF + if (kvm_para_available()) { + tmp = kvm_hw_perf_event_init(event); + return tmp; + } +#endif + err = __hw_perf_event_init(event); if (!err) { /* --- linux-2.6_tip0620/arch/x86/kernel/cpu/perf_event_kvm.c 1970-01-01 08:00:00.0 +0800 +++ linux-2.6_tip0620perfkvm/arch/x86/kernel/cpu/perf_event_kvm.c 2010-06-21 16:44:56.735999849 +0800 @@ -0,0 +1,426 @@ +/* + * Performance events + * + * Copyright (C) 2010 Intel Corporation + * Zhang Yanmin yanmin.zh...@intel.com + * + * For licencing details see kernel-base/COPYING + */ + +#ifdef CONFIG_KVM_PERF + +static atomic_t guest_perf_id; /*Global id counter per guest os*/ + +static inline int get_new_perf_event_id(void) +{ + return atomic_inc_return(guest_perf_id); +} + +#ifdef CONFIG_X86_LOCAL_APIC + +static bool kvm_reserve_pmc_hardware(void) +{ + if (nmi_watchdog == NMI_LOCAL_APIC) + disable_lapic_nmi_watchdog(); + + return true; +} + +static void kvm_release_pmc_hardware(void) +{ + if (nmi_watchdog == NMI_LOCAL_APIC) + enable_lapic_nmi_watchdog(); +} + +#else + +static bool kvm_reserve_pmc_hardware(void) { return true; } +static void kvm_release_pmc_hardware(void) {} + +#endif + +static void kvm_hw_perf_event_destroy(struct perf_event *event) +{ + struct guest_perf_shadow *shadow = event-guest_perf_shadow; + + BUG_ON(!shadow); + kvm_hypercall2(KVM_PERF_OP, KVM_PERF_OP_CLOSE, shadow-id); + + kfree(shadow); + event-guest_perf_shadow = NULL; + + if (atomic_dec_and_mutex_lock(active_events, pmc_reserve_mutex)) { + kvm_release_pmc_hardware(); + mutex_unlock(pmc_reserve_mutex); + } +} + +/* The guest might also run as a host */ +static int check_ontop_guest_overflow(struct perf_event *event, int overflows) +{ + struct host_perf_shadow *host_shadow = event-host_perf_shadow; + if (!host_shadow) + return 0; + + if (perf_guest_cbs) + perf_guest_cbs-copy_event_to_shadow(event, overflows); + + return 1; +} + +static int +check_event_overflow(struct perf_event *event, struct pt_regs *regs) +{ + struct perf_sample_data data; + struct guest_perf_shadow *guest_shadow = event-guest_perf_shadow; + s32 overflows; + int i; + int handled = 0; + + local64_set(event-count,
[PATCH] KVM Test: Fix invalid literal bug in ioquit
Sometime check_cmd could not finish in setting time. Then o=, so int(o) will cause ValueError: invalid literal for int() with base 10: '' So change to check return status. Signed-off-by: Feng Yang fy...@redhat.com --- client/tests/kvm/tests/ioquit.py |6 +++--- client/tests/kvm/tests_base.cfg.sample |2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/client/tests/kvm/tests/ioquit.py b/client/tests/kvm/tests/ioquit.py index 389a867..8126139 100644 --- a/client/tests/kvm/tests/ioquit.py +++ b/client/tests/kvm/tests/ioquit.py @@ -23,13 +23,13 @@ def run_ioquit(test, params, env): (s, o) = session.get_command_status_output(bg_cmd, timeout=60) check_cmd = params.get(check_cmd) (s, o) = session2.get_command_status_output(check_cmd, timeout=60) -if int(o) = 0: +if s: raise error.TestError(Fail to add IO workload for Guest OS) logging.info(Sleep for a while) time.sleep(random.randrange(30,100)) -(s, o) = session2.get_command_status_output(check_cmd, timeout=300) -if int(o) = 0: +(s, o) = session2.get_command_status_output(check_cmd, timeout=60) +if s: logging.info(IO workload finished before the VM was killed) logging.info(Kill the virtual machine) vm.process.close() diff --git a/client/tests/kvm/tests_base.cfg.sample b/client/tests/kvm/tests_base.cfg.sample index ce88235..0fd5543 100644 --- a/client/tests/kvm/tests_base.cfg.sample +++ b/client/tests/kvm/tests_base.cfg.sample @@ -411,7 +411,7 @@ variants: - ioquit: type = ioquit background_cmd = for i in 1 2 3 4; do (nohup dd if=/dev/urandom of=/tmp/file bs=102400 count=1000 ) done -check_cmd = ps -a |grep dd |wc -l +check_cmd = ps -a |grep dd login_timeout = 360 - qemu_img: -- 1.5.5.6 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: kvm-s390: Dont exit SIE on SIGP sense running
Am Montag 21 Juni 2010, 10:27:50 schrieb Avi Kivity: [...] VCPU_EVENT(vcpu, 3, %s, free cpu); + clear_bit(63 - vcpu-vcpu_id, (unsigned long *)vcpu-kvm-arch.sca-mcn); if (vcpu-kvm-arch.sca-cpu[vcpu-vcpu_id].sda == (__u64) vcpu-arch.sie_block) vcpu-kvm-arch.sca-cpu[vcpu-vcpu_id].sda = 0; Unrelated, do these VCPU_EVENTs want to become ftrace tracepoints? Hmm, dont know. Currently this calls into a s390 debug tracing facility (arch/s390/kernel/debug.c) which is heavily used by our service folks. There are commands for crash and lcrash to show these s390 debug traces from a dump. Maybe its worth to investigate if we should change some of these events to have both ftrace-tracepoints and the debug traces. Christian -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: kvm-s390: Dont exit SIE on SIGP sense running
On Monday 21 June 2010, Christian Borntraeger wrote: Hmm, dont know. Currently this calls into a s390 debug tracing facility (arch/s390/kernel/debug.c) which is heavily used by our service folks. There are commands for crash and lcrash to show these s390 debug traces from a dump. Maybe its worth to investigate if we should change some of these events to have both ftrace-tracepoints and the debug traces. I think that it would be worthwhile to convert the entire s390 debug code to become tracepoints, either one by one or making it a subclass with the existing interfaces. Arnd -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] KVM Test: Fix invalid literal bug in ioquit
On 06/21/2010 01:07 PM, Feng Yang wrote: Sometime check_cmd could not finish in setting time. Then o=, so int(o) will cause ValueError: invalid literal for int() with base 10: '' So change to check return status. Signed-off-by: Feng Yang fy...@redhat.com --- client/tests/kvm/tests/ioquit.py |6 +++--- client/tests/kvm/tests_base.cfg.sample |2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/client/tests/kvm/tests/ioquit.py b/client/tests/kvm/tests/ioquit.py index 389a867..8126139 100644 --- a/client/tests/kvm/tests/ioquit.py +++ b/client/tests/kvm/tests/ioquit.py @@ -23,13 +23,13 @@ def run_ioquit(test, params, env): (s, o) = session.get_command_status_output(bg_cmd, timeout=60) check_cmd = params.get(check_cmd) (s, o) = session2.get_command_status_output(check_cmd, timeout=60) -if int(o) = 0: +if s: raise error.TestError(Fail to add IO workload for Guest OS) Please use 'if s != 0' because in case of a timeout s is None. logging.info(Sleep for a while) time.sleep(random.randrange(30,100)) -(s, o) = session2.get_command_status_output(check_cmd, timeout=300) -if int(o) = 0: +(s, o) = session2.get_command_status_output(check_cmd, timeout=60) +if s: Same here. logging.info(IO workload finished before the VM was killed) logging.info(Kill the virtual machine) vm.process.close() diff --git a/client/tests/kvm/tests_base.cfg.sample b/client/tests/kvm/tests_base.cfg.sample index ce88235..0fd5543 100644 --- a/client/tests/kvm/tests_base.cfg.sample +++ b/client/tests/kvm/tests_base.cfg.sample @@ -411,7 +411,7 @@ variants: - ioquit: type = ioquit background_cmd = for i in 1 2 3 4; do (nohup dd if=/dev/urandom of=/tmp/file bs=102400 count=1000 ) done -check_cmd = ps -a |grep dd |wc -l +check_cmd = ps -a |grep dd login_timeout = 360 - qemu_img: -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[ kvm-Bugs-1858940 ] Kernel panic - not syncing: IO-APIC + timer doesn't work.
Bugs item #1858940, was opened at 2007-12-27 15:08 Message generated for change (Comment added) made by jessorensen You can respond by visiting: https://sourceforge.net/tracker/?func=detailatid=893831aid=1858940group_id=180599 Please note that this message will contain a full copy of the comment thread, including the initial issue submission, for this request, not just the latest update. Category: None Group: None Status: Closed Resolution: Fixed Priority: 5 Private: No Submitted By: Kai Londenberg (kai_londenberg) Assigned to: Nobody/Anonymous (nobody) Summary: Kernel panic - not syncing: IO-APIC + timer doesn't work. Initial Comment: The problem: When booting a Guest with -smp 2 option (and without -no-acpi, since that's essential for SMP), I get a Kernel panic. My Setup: Host: 64_86 bit Ubuntu 7.10 with custom built 2.6.23.12 Kernel. AMD Dual CPU with Virtualization extensions. Guest: Ubuntu 6.06 - Server - with pre-compiled 2.6.15.29 server Kernel, with SMP Support. I logged the kernel output via serial console redirection into a file. -- Comment By: Jes Sorensen (jessorensen) Date: 2010-06-21 13:32 Message: Ubuntu 6.06 x86_64 boots fine with -smp 2 on an AMD system with a recent KVM / qemu-kvm combo. Closing -- Comment By: TJ (tjworld) Date: 2008-01-30 21:40 Message: Logged In: YES user_id=1048563 Originator: NO I'm experiencing the same problem on Ubuntu Gutsy 7.10 x86_64 (2.6.22-14-generic). I have working guest images of Ubuntu Gutsy 7.10 x86 with kvm-51 but hadn't used them in a while. Tried to boot them earlier using the same launch scripts as usual and the process hung with the CPU looping at 100% usage. I then downloaded, built, and installed kvm-60 thinking it was some strange issue that hadn't manifested previously. The same problem occurs with kvm-60. I then booted the images with modified grub settings to watch the kernel log and saw it fails to find a timer as reported here. I eventually found that by adding -no-acpi to the launch command-line they would boot successfully. I'm not sure what changed in the interim but obviously something did. This is the working launch options: qemuctl -qemu vdeq kvm -name Gutsy-Desktop -boot c -m 512 -hda /home/all/VirtualMachines/Ubuntu-Gutsy-Desktop-x86.ovl -k en-gb -net nic,model=rtl8139,macaddr=56:44:45:30:30:31,vlan=0 -soundhw es1370 -usb -net vde,sock=/var/run/kvm0.ctl,vlan=0 -no-acpi -- Comment By: Technologov (technologov) Date: 2007-12-27 16:19 Message: Logged In: YES user_id=1839746 Originator: NO 1. If you use KVM from 2.6.23 kernel, then I strongly recommend you to try the newest KVM-58, as it has some fixes for AMD. 2. What bitness ? Guest OS is 32 or 64-bit ? 3. ACPI is essential only for Windows SMP. On Linux however, SMP works, and ACPI on/off doesn't affects it. -Alexey Technologov -- Comment By: Kai Londenberg (kai_londenberg) Date: 2007-12-27 15:11 Message: Logged In: YES user_id=1299941 Originator: YES Using kvm 58, started with -smp 2 option. -- Comment By: Kai Londenberg (kai_londenberg) Date: 2007-12-27 15:09 Message: Logged In: YES user_id=1299941 Originator: YES File Added: boot.log -- You can respond by visiting: https://sourceforge.net/tracker/?func=detailatid=893831aid=1858940group_id=180599 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH V2 1/5] ara virt interface of perf to support kvm guest os statistics collection in guest os
On 06/21/2010 12:31 PM, Zhang, Yanmin wrote: Here is the version 2. ChangeLog since V1: Mostly changes based on Avi's suggestions. 1) Use a id to identify the perf_event between host and guest; 2) Changes lots of codes to deal with malicious guest os; 3) Add a perf_event number limitation per gust os instance; 4) Support guest os on the top of another guest os scenario. But I didn't test it yet as there is no environment. The design is to add 2 pointers in struct perf_event. One is used by host and the other is used by guest. 5) Fix the bug to support 'perf stat'. The key is sync count data back to guest when guest tries to disable the perf_event at host side. 6) Add a clear ABI of PV perf. Please use meaningful subject lines for individual patches. I don't implement live migration feature. Avi, Is live migration necessary on pv perf support? Yes. --- linux-2.6_tip0620/Documentation/kvm/paravirt-perf.txt 1970-01-01 08:00:00.0 +0800 +++ linux-2.6_tip0620perfkvm/Documentation/kvm/paravirt-perf.txt 2010-06-21 15:21:39.312999849 +0800 @@ -0,0 +1,133 @@ +The x86 kvm paravirt perf event interface +=== + +This paravirt interface is responsible for supporting guest os perf event +collections. If guest os supports this interface, users could run command +perf in guest os directly. + +Design + + +Guest os calls a series of hypercalls to communicate with host kernel to +create/enable/disable/close perf events. Host kernel notifies guest os +by injecting an NMI to guest os when an event overflows. Guets os need +go through all its active events to check if they overflow, and output +performance statistics if they do. + +ABI += + +1) Detect if host kernel supports paravirt perf interface: +#define KVM_FEATURE_PV_PERF 4 +Host kernel defines above cpuid bit. Guest os calls cpuid to check if host +os retuns this bit. If it does, it mean host kernel supports paravirt perf +interface. + +2) Open a new event at host side: +kvm_hypercall3(KVM_PERF_OP, KVM_PERF_OP_OPEN, param_addr_low32bit, +param_addr_high32bit); + +#define KVM_PERF_OP3 +/* Operations for KVM_PERF_OP */ +#define KVM_PERF_OP_OPEN1 +#define KVM_PERF_OP_CLOSE 2 +#define KVM_PERF_OP_ENABLE 3 +#define KVM_PERF_OP_DISABLE 4 +#define KVM_PERF_OP_READ5 +/* + * guest_perf_attr is used when guest calls hypercall to + * open a new perf_event at host side. Mostly, it's a copy of + * perf_event_attr and deletes something not used by host kernel. + */ +struct guest_perf_attr { +__u32 type; Need padding here, otherwise the structure is different on 32-bit and 64-bit guests. +__u64 config; +__u64 sample_period; +__u64 sample_type; +__u64 read_format; +__u64 flags; and here. +__u32 bp_type; +__u64 bp_addr; +__u64 bp_len; Do we actually support breakpoints on the guest? Note the hardware breakpoints are also usable by the guest, so if the host uses them, we won't be able to emulate them correctly. We can let the guest to breakpoint perf monitoring itself and drop this feature. +}; What about documentation for individual fields? Esp. type, config, and flags, but also the others. +/* + * data communication area about perf_event between + * Host kernel and guest kernel + */ +struct guest_perf_event { +u64 count; +atomic_t overflows; Please use __u64 and __u32, assume guests don't have Linux internal types (though of course the first guest _is_ Linux). Add padding to 64-bit. +}; +struct guest_perf_event_param { +__u64 attr_addr; +__u64 guest_event_addr; +/* In case there is an alignment issue, we put id as the last one */ +int id; Add explicit padding to be sure. Also makes sense to add a flags field for future expansion. +}; + +param_addr_low32bit and param_addr_high32bit compose a u64 integer which means +the physical address of parameter struct guest_perf_event_param. +struct guest_perf_event_param consists of 3 members. attr_addr has the +physical address of parameter struct guest_perf_attr. guest_event_addr has the +physical address of a parameter whose type is struct guest_perf_eventi which +has to be aligned with 4 bytes. +guest os need allocate an exclusive id per event in this guest os instance, and save it to +guest_perf_event_param-id. Later on, the id is the only method to notify host +kernel about on what event guest os wants host kernel to operate. Need a way to expose the maximum number of events available to the guest. I suggest exposing it
Re: [PATCH V2 2/5] ara virt interface of perf to support kvm guest os statistics collection in guest os
On 06/21/2010 12:31 PM, Zhang, Yanmin wrote: The 2nd patch is to change the definition of perf_event to facilitate perf attr copy when a hypercall happens. Signed-off-by: Zhang Yanminyanmin_zh...@linux.intel.com --- --- linux-2.6_tip0620/include/linux/perf_event.h2010-06-21 15:19:52.821999849 +0800 +++ linux-2.6_tip0620perfkvm/include/linux/perf_event.h 2010-06-21 16:53:49.283999849 +0800 @@ -188,7 +188,10 @@ struct perf_event_attr { __u64 sample_type; __u64 read_format; Assuming these flags are available to the guest? - __u64 disabled : 1, /* off by default*/ + union { + __u64 flags; + struct { + __u64 disabled : 1, /* off by default*/ inherit: 1, /* children inherit it */ inherit is meaningless for a guest. pinned : 1, /* must always be on PMU */ We cannot allow a guest to pin a counter. The other flags are also problematic. I'd like to see virt-specific flags (probably we'll only need kernel/user and nested_hv for nested virtualization). Something that is worrying is that we don't expose group information. perf will multiplex the events for us, but there will be a loss in accuracy. #ifdef CONFIG_HAVE_HW_BREAKPOINT #includeasm/hw_breakpoint.h #endif @@ -753,6 +752,20 @@ struct perf_event { perf_overflow_handler_t overflow_handler; + /* +* pointers used by kvm perf paravirt interface. +* +* 1) Used in host kernel and points to host_perf_shadow which +* has information about guest perf_event +*/ + void*host_perf_shadow; Can we have real types instead of void pointers? + /* +* 2) Used in guest kernel and points to guest_perf_shadow which +* is used as a communication area with host kernel. Host kernel +* copies overflow data to it when an event overflows. +*/ + void*guest_perf_shadow; It's strange to see both guest and host parts in the same patch. Splitting to separate patches will really help review. @@ -1626,9 +1629,22 @@ void perf_event_task_tick(struct task_st if (ctx ctx-nr_events ctx-nr_events != ctx-nr_active) rotate = 1; - perf_ctx_adjust_freq(cpuctx-ctx); - if (ctx) - perf_ctx_adjust_freq(ctx); +#ifdef CONFIG_KVM_PERF + if (kvm_para_available()) { + /* +* perf_ctx_adjust_freq causes lots of pmu-read which would +* trigger too many vmexit to host kernel. We disable it +* under para virt situation +*/ + adjust_freq = 0; + } +#endif Perhaps we can have a batch read interface which will read many counters at once. This would reduce the number of exits. Also adjust the frequency less frequently. + + if (adjust_freq) { + perf_ctx_adjust_freq(cpuctx-ctx); + if (ctx) + perf_ctx_adjust_freq(ctx); + } -- error compiling committee.c: too many arguments to function -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH V2 3/5] ara virt interface of perf to support kvm guest os statistics collection in guest os
On 06/21/2010 12:31 PM, Zhang, Yanmin wrote: The 3rd patch is to implement para virt perf at host kernel. @@ -64,6 +73,85 @@ struct kvm_mmu_op_release_pt { #ifdef __KERNEL__ #includeasm/processor.h +/* + * In host kernel, perf_event-host_perf_shadow points to + * host_perf_shadow which records some information + * about the guest. + */ +struct host_perf_shadow { + /* guest perf_event id passed from guest os */ + int id; + /* +* Host kernel saves data into data member counter firstly. +* kvm will get data from this counter and calls kvm functions +* to copy or add data back to guets os before entering guest os +* next time +*/ + struct guest_perf_event counter; + /* guest_event_addr is gpa_t pointing to guest os guest_perf_event*/ + __u64 guest_event_addr; So just use gpa_t as the type. + + /* +* Link to of kvm.kvm_arch.shadow_hash_table +*/ + struct list_head shadow_entry; + struct kvm_vcpu *vcpu; + + struct perf_event *host_event; + /* +* Below counter is to prevent malicious guest os to try to +* close/enable event at the same time. +*/ + atomic_t ref_counter; If events are made per-vcpu (like real hardware), races become impossible. +}; Please move this structure to include/linux/kvm_host.h. No need to spam kvm_para.h. Note it's not x86 specific (though you can leave arch enabling to arch maintainers). + +/* + * In guest kernel, perf_event-guest_shadow points to + * guest_perf_shadow which records some information + * about the guest. + */ +struct guest_perf_shadow { + /* guest perf_event id passed from guest os */ + int id; + /* +* Host kernel kvm saves data into data member counter +*/ + struct guest_perf_event counter; +}; Don't ordinary perf structures already have a counter ID which we can reuse? + +/* + * guest_perf_attr is used when guest calls hypercall to + * open a new perf_event at host side. Mostly, it's a copy of + * perf_event_attr and deletes something not used by host kernel. + */ +struct guest_perf_attr { + __u32 type; + __u64 config; + __u64 sample_period; + __u64 sample_type; + __u64 read_format; + __u64 flags; + __u32 bp_type; + __u64 bp_addr; + __u64 bp_len; +}; This is really not a guest or host structure, it's part of the interface. So please rename it (and similar) kvm_pv_perf_*. @@ -24,6 +24,7 @@ #includeasm/desc.h #includeasm/mtrr.h #includeasm/msr-index.h +#includeasm/perf_event.h #define KVM_MAX_VCPUS 64 #define KVM_MEMORY_SLOTS 32 @@ -360,6 +361,18 @@ struct kvm_vcpu_arch { /* fields used by HYPER-V emulation */ u64 hv_vapic; + + /* +* Fields used by PARAVIRT perf interface: +* +* kvm checks overflow_events before entering guest os, +* and copy data back to guest os. +* event_mutex is to avoid a race between NMI perf event overflow +* handler, event close, and enable/disable. +*/ + struct mutex event_mutex; No race can exist. The host NMI handler cannot take any mutex so it must be immune to races. The guest NMI handlers and callbacks are all serialized by the guest itself. + int overflows; + struct perf_event *overflow_events[X86_PMC_IDX_MAX]; }; KVM_PV_PERF_MAX_EVENTS (which needs to be exposed to the guest via cpuid). struct kvm_mem_alias { @@ -377,6 +390,9 @@ struct kvm_mem_aliases { int naliases; }; +#define KVM_PARAVIRT_PERF_EVENT_ENTRY_BITS (10) +#define KVM_PARAVIRT_PERF_EVENT_ENTRY_NUM (1KVM_PARAVIRT_PERF_EVENT_ENTRY_BITS) What are these? + struct kvm_arch { struct kvm_mem_aliases *aliases; @@ -415,6 +431,15 @@ struct kvm_arch { /* fields used by HYPER-V emulation */ u64 hv_guest_os_id; u64 hv_hypercall; + + /* +* fields used by PARAVIRT perf interface: +* Used to organize all host perf_events representing guest +* perf_event on a specific kvm instance +*/ + atomic_t kvm_pv_event_num; + spinlock_t shadow_lock; + struct list_head *shadow_hash_table; Need to be per-vcpu. Also wrap in a kvm_vcpu_perf structure, the names are very generic. Why do we need the hash table? Use the index directly? /* * hypercalls use architecture specific --- linux-2.6_tip0620/arch/x86/kvm/vmx.c2010-06-21 15:19:39.322999849 +0800 +++ linux-2.6_tip0620perfkvm/arch/x86/kvm/vmx.c 2010-06-21 15:21:39.310999849 +0800 @@ -3647,6 +3647,7 @@ static int vmx_handle_exit(struct kvm_vc struct vcpu_vmx *vmx = to_vmx(vcpu); u32 exit_reason =
Re: [PATCH V2 5/5] ara virt interface of perf to support kvm guest os statistics collection in guest os
On 06/21/2010 12:31 PM, Zhang, Yanmin wrote: The 5th patch is applied to the latest qemu-kvm tree. --- qemu-kvm_0621/target-i386/kvm.c 2010-06-21 11:00:29.0 +0800 +++ qemu-kvm_0621_perf/target-i386/kvm.c2010-06-21 13:00:14.136999850 +0800 @@ -150,6 +150,9 @@ struct kvm_para_features { #ifdef KVM_CAP_PV_MMU { KVM_CAP_PV_MMU, KVM_FEATURE_MMU_OP }, #endif +#ifdef KVM_CAP_PV_PERF +{ KVM_CAP_PV_PERF, KVM_FEATURE_PV_PERF }, +#endif { -1, -1 } }; Not really necessary any more - if you expose the cpuid bit via KVM_GET_SUPPORTED_CPUID2 then 'qemu -cpu host' will automatically enable it. On the other hand, do update target-i386/cpuid.c:kvm_feature_name so people can enable the feature using qemu -cpu ...,+kvmperf. -- error compiling committee.c: too many arguments to function -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [Qemu-devel] [PATCH 1/2] Add 'serial' attribute to virtio-blk devices
On Fri, Jun 18, 2010 at 01:38:02PM -0500, Ryan Harper wrote: Create a new attribute for virtio-blk devices that will fetch the serial number of the block device. This attribute can be used by udev to create disk/by-id symlinks for devices that don't have a UUID (filesystem) associated with them. ATA_IDENTIFY strings are special in that they can be up to 20 chars long and aren't required to be NULL-terminated. The buffer is also zero-padded meaning that if the serial is 19 chars or less that we get a NULL terminated string. When copying this value into a string buffer, we must be careful to copy up to the NULL (if it present) and only 20 if it is longer and not to attempt to NULL terminate; this isn't needed. Why is this virtio-blk specific? In a later mail you mention you want to use it for udev. So please export this from scsi/libata as well and we have one proper interface that we can use for all devices. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[ kvm-Bugs-1841658 ] OpenSolaris 64bit panic with kvm-54
Bugs item #1841658, was opened at 2007-11-30 13:11 Message generated for change (Comment added) made by jessorensen You can respond by visiting: https://sourceforge.net/tracker/?func=detailatid=893831aid=1841658group_id=180599 Please note that this message will contain a full copy of the comment thread, including the initial issue submission, for this request, not just the latest update. Category: None Group: None Status: Open Resolution: None Priority: 3 Private: No Submitted By: Carlo Marcelo Arenas Belon (carenas) Assigned to: Nobody/Anonymous (nobody) Summary: OpenSolaris 64bit panic with kvm-54 Initial Comment: Wouldn't mark it as a regression per-se as vanilla kvm-53 wouldn't work (because of the need for IDE patches to get it to run/install), but vanilla kvm-54 or kvm-54 + the same patches added to kvm-53 and including pre-kvm-55 patches like 71be592a14aa8d127315b2c47bf83cc0d810a341 wouldn't work. The panic is observed in kvm-54 (--no-kvm runs ok, and --no-kvm-irqchip doesn't help) while running nexenta OpenSolaris alpha 7 or beta 1 (other OpenSolaris distributions most likely affected as well) and with the following trace : panic[cpu0]/thread=fffec2de2260: BAD TRAP: type=e (#pf Page fault) rp=ff0001735f30 addr=0 occurred in module unix due to a NULL pointer dereference dbus: #pf Page fault Bad kernel fault at addr=0x0 pid=278, pc=0xfb83c189, sp=0xff0001736028, eflags=0x10246 cr0: 80050033pg,wp,ne,et,mp,pe cr4: 6b8xmme,fxsr,pge,pae,pse,de cr2: 0 cr3: 7dc4000 cr8: 0 rdi:0 rsi: fffec0025630 rdx: fffec2de2260 rcx:1 r8: fffec0025630 r9:3 rax:0 rbx:0 rbp: ff0001736080 r10:1 r11: fffec1ad31e0 r12:0 r13: fffec0025680 r14: c0025488 r15:0 fsb:0 gsb: fbc26ef0 ds: 4b es: 4b fs:0 gs: 1c3 trp:e err:0 rip: fb83c189 cs: 30 rfl:10246 rsp: ff0001736028 ss: 38 ff0001735e10 unix:die+c8 () ff0001735f20 unix:trap+135b () ff0001735f30 unix:cmntrap+e9 () ff0001736080 unix:mutex_exit+9 () ff00017360c0 genunix:kmem_alloc+88 () ff0001736110 zfs:zio_push_transform+3a () ff0001736190 zfs:zio_create+256 () ff0001736240 zfs:zio_vdev_child_io+97 () ff0001736320 zfs:vdev_cache_read+182 () ff0001736370 zfs:vdev_disk_io_start+41 () ff0001736390 zfs:vdev_io_start+1d () ff00017363d0 zfs:zio_vdev_io_start+123 () ff00017363f0 zfs:zio_next_stage_async+bb () ff0001736410 zfs:zio_nowait+11 () ff0001736450 zfs:vdev_mirror_io_start+18f () ff0001736490 zfs:zio_vdev_io_start+131 () ff00017364b0 zfs:zio_next_stage+b3 () ff00017364e0 zfs:zio_ready+10e () ff0001736500 zfs:zio_next_stage+b3 () ff0001736550 zfs:zio_wait_for_children+5d () ff0001736570 zfs:zio_wait_children_ready+20 () ff0001736590 zfs:zio_next_stage_async+bb () ff00017365b0 zfs:zio_nowait+11 () ff0001736660 zfs:arc_read+4e8 () ff0001736700 zfs:dbuf_read_impl+129 () ff0001736760 zfs:dbuf_read+c5 () ff0001736810 zfs:dmu_buf_hold_array_by_dnode+1c4 () ff00017368a0 zfs:dmu_buf_hold_array+74 () ff0001736930 zfs:dmu_read_uio+4d () ff00017369c0 zfs:zfs_read+15e () ff0001736a30 genunix:fop_read+69 () ff0001736af0 genunix:vn_rdwr+161 () ff0001736c70 genunix:gexec+11c () ff0001736e90 genunix:exec_common+41d () ff0001736ec0 genunix:exece+1b () ff0001736f10 unix:brand_sys_sysenter+1f2 () while running in a Gentoo Linux 2007.0 host with Intel(R) Core(TM)2 CPU 6320. 32bit OpenSolaris works fine -- Comment By: Jes Sorensen (jessorensen) Date: 2010-06-21 14:55 Message: Hi, I pulled down the iso image you mentioned, and it seems to boot fine for me here. I was able to run the install to a local disk image and boot it again afterwards. This is using a 64 bit guest CPU on a Fedora 12 system. What flags are you using to launch it when you see the crash? Are you running on an Intel or an AMD system and did you specify SMP by any chance? Cheers, Jes -- Comment By: Stefan Neufeind (neufeind) Date: 2010-06-18 22:08 Message: simply from the OpenSolaris-website: http://hub.opensolaris.org/bin/view/Main/downloads Release 2009.06 for x86, installing it with x86_64-processor in KVM -- Comment By: Jes Sorensen (jessorensen) Date: 2010-06-18 17:22 Message: Interesting, can you give me a pointer to where I can download that ISO image from? Thanks, Jes
[PATCH] KVM: PPC: Make BAT only guest segments work
When a guest sets its SR entry to invalid, we may still find a corresponding entry in a BAT. So we need to make sure we're not faulting on invalid SR entries, but instead just claim them to be BAT resolved. This resolves breakage experienced when using libogc based guests. Signed-off-by: Alexander Graf ag...@suse.de --- arch/powerpc/kvm/book3s_32_mmu.c |8 1 files changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/kvm/book3s_32_mmu.c b/arch/powerpc/kvm/book3s_32_mmu.c index 0b10503..3292d76 100644 --- a/arch/powerpc/kvm/book3s_32_mmu.c +++ b/arch/powerpc/kvm/book3s_32_mmu.c @@ -354,10 +354,10 @@ static int kvmppc_mmu_book3s_32_esid_to_vsid(struct kvm_vcpu *vcpu, ulong esid, *vsid = VSID_REAL_DR | gvsid; break; case MSR_DR|MSR_IR: - if (!sr-valid) - return -1; - - *vsid = sr-vsid; + if (sr-valid) + *vsid = sr-vsid; + else + *vsid = VSID_BAT | gvsid; break; default: BUG(); -- 1.6.0.2 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH] KVM: PPC: Use kernel hash function
The linux kernel already provides a hash function. Let's reuse that instead of reinventing the wheel! Signed-off-by: Alexander Graf ag...@suse.de --- arch/powerpc/kvm/book3s_32_mmu_host.c | 10 ++ arch/powerpc/kvm/book3s_64_mmu_host.c | 11 ++- 2 files changed, 4 insertions(+), 17 deletions(-) diff --git a/arch/powerpc/kvm/book3s_32_mmu_host.c b/arch/powerpc/kvm/book3s_32_mmu_host.c index 0bb6600..37779a5 100644 --- a/arch/powerpc/kvm/book3s_32_mmu_host.c +++ b/arch/powerpc/kvm/book3s_32_mmu_host.c @@ -19,6 +19,7 @@ */ #include linux/kvm_host.h +#include linux/hash.h #include asm/kvm_ppc.h #include asm/kvm_book3s.h @@ -182,14 +183,7 @@ static int kvmppc_mmu_hpte_cache_next(struct kvm_vcpu *vcpu) * a hash, so we don't waste cycles on looping */ static u16 kvmppc_sid_hash(struct kvm_vcpu *vcpu, u64 gvsid) { - return (u16)(((gvsid (SID_MAP_BITS * 7)) SID_MAP_MASK) ^ -((gvsid (SID_MAP_BITS * 6)) SID_MAP_MASK) ^ -((gvsid (SID_MAP_BITS * 5)) SID_MAP_MASK) ^ -((gvsid (SID_MAP_BITS * 4)) SID_MAP_MASK) ^ -((gvsid (SID_MAP_BITS * 3)) SID_MAP_MASK) ^ -((gvsid (SID_MAP_BITS * 2)) SID_MAP_MASK) ^ -((gvsid (SID_MAP_BITS * 1)) SID_MAP_MASK) ^ -((gvsid (SID_MAP_BITS * 0)) SID_MAP_MASK)); + return hash_64(gvsid, SID_MAP_BITS); } diff --git a/arch/powerpc/kvm/book3s_64_mmu_host.c b/arch/powerpc/kvm/book3s_64_mmu_host.c index e4b5744..5d0e281 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_host.c +++ b/arch/powerpc/kvm/book3s_64_mmu_host.c @@ -20,6 +20,7 @@ */ #include linux/kvm_host.h +#include linux/hash.h #include asm/kvm_ppc.h #include asm/kvm_book3s.h @@ -164,17 +165,9 @@ static int kvmppc_mmu_hpte_cache_next(struct kvm_vcpu *vcpu) * a hash, so we don't waste cycles on looping */ static u16 kvmppc_sid_hash(struct kvm_vcpu *vcpu, u64 gvsid) { - return (u16)(((gvsid (SID_MAP_BITS * 7)) SID_MAP_MASK) ^ -((gvsid (SID_MAP_BITS * 6)) SID_MAP_MASK) ^ -((gvsid (SID_MAP_BITS * 5)) SID_MAP_MASK) ^ -((gvsid (SID_MAP_BITS * 4)) SID_MAP_MASK) ^ -((gvsid (SID_MAP_BITS * 3)) SID_MAP_MASK) ^ -((gvsid (SID_MAP_BITS * 2)) SID_MAP_MASK) ^ -((gvsid (SID_MAP_BITS * 1)) SID_MAP_MASK) ^ -((gvsid (SID_MAP_BITS * 0)) SID_MAP_MASK)); + return hash_64(gvsid, SID_MAP_BITS); } - static struct kvmppc_sid_map *find_sid_vsid(struct kvm_vcpu *vcpu, u64 gvsid) { struct kvmppc_sid_map *map; -- 1.6.0.2 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH] KVM: PPC: Remove obsolete kvmppc_mmu_find_pte
Initially we had to search for pte entries to invalidate them. Since the logic has improved since then, we can just get rid of the search function. Signed-off-by: Alexander Graf ag...@suse.de --- arch/powerpc/include/asm/kvm_book3s.h |1 - arch/powerpc/kvm/book3s_32_mmu_host.c | 20 arch/powerpc/kvm/book3s_64_mmu_host.c | 20 3 files changed, 0 insertions(+), 41 deletions(-) diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h index 6f74d93..4e99559 100644 --- a/arch/powerpc/include/asm/kvm_book3s.h +++ b/arch/powerpc/include/asm/kvm_book3s.h @@ -115,7 +115,6 @@ extern void kvmppc_mmu_book3s_32_init(struct kvm_vcpu *vcpu); extern int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte); extern int kvmppc_mmu_map_segment(struct kvm_vcpu *vcpu, ulong eaddr); extern void kvmppc_mmu_flush_segments(struct kvm_vcpu *vcpu); -extern struct kvmppc_pte *kvmppc_mmu_find_pte(struct kvm_vcpu *vcpu, u64 ea, bool data); extern int kvmppc_ld(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, bool data); extern int kvmppc_st(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, bool data); extern void kvmppc_book3s_queue_irqprio(struct kvm_vcpu *vcpu, unsigned int vec); diff --git a/arch/powerpc/kvm/book3s_32_mmu_host.c b/arch/powerpc/kvm/book3s_32_mmu_host.c index 37779a5..904f5ac 100644 --- a/arch/powerpc/kvm/book3s_32_mmu_host.c +++ b/arch/powerpc/kvm/book3s_32_mmu_host.c @@ -151,26 +151,6 @@ void kvmppc_mmu_pte_pflush(struct kvm_vcpu *vcpu, ulong pa_start, ulong pa_end) } } -struct kvmppc_pte *kvmppc_mmu_find_pte(struct kvm_vcpu *vcpu, u64 ea, bool data) -{ - int i; - u64 guest_vp; - - guest_vp = vcpu-arch.mmu.ea_to_vp(vcpu, ea, false); - for (i=0; ivcpu-arch.hpte_cache_offset; i++) { - struct hpte_cache *pte; - - pte = vcpu-arch.hpte_cache[i]; - if (!pte-host_va) - continue; - - if (pte-pte.vpage == guest_vp) - return pte-pte; - } - - return NULL; -} - static int kvmppc_mmu_hpte_cache_next(struct kvm_vcpu *vcpu) { if (vcpu-arch.hpte_cache_offset == HPTEG_CACHE_NUM) diff --git a/arch/powerpc/kvm/book3s_64_mmu_host.c b/arch/powerpc/kvm/book3s_64_mmu_host.c index 5d0e281..4ccdde1 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_host.c +++ b/arch/powerpc/kvm/book3s_64_mmu_host.c @@ -133,26 +133,6 @@ void kvmppc_mmu_pte_pflush(struct kvm_vcpu *vcpu, ulong pa_start, ulong pa_end) } } -struct kvmppc_pte *kvmppc_mmu_find_pte(struct kvm_vcpu *vcpu, u64 ea, bool data) -{ - int i; - u64 guest_vp; - - guest_vp = vcpu-arch.mmu.ea_to_vp(vcpu, ea, false); - for (i=0; ivcpu-arch.hpte_cache_offset; i++) { - struct hpte_cache *pte; - - pte = vcpu-arch.hpte_cache[i]; - if (!pte-host_va) - continue; - - if (pte-pte.vpage == guest_vp) - return pte-pte; - } - - return NULL; -} - static int kvmppc_mmu_hpte_cache_next(struct kvm_vcpu *vcpu) { if (vcpu-arch.hpte_cache_offset == HPTEG_CACHE_NUM) -- 1.6.0.2 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 1/2] KVM: PPC: Add generic hpte management functions
Currently the shadow paging code keeps an array of entries it knows about. Whenever the guest invalidates an entry, we loop through that entry, trying to invalidate matching parts. While this is a really simple implementation, it is probably the most ineffective one possible. So instead, let's keep an array of lists around that are indexed by a hash. This way each PTE can be added by 4 list_add, removed by 4 list_del invocations and the search only needs to loop through entries that share the same hash. This patch implements said lookup and exports generic functions that both the 32-bit and 64-bit backend can use. Signed-off-by: Alexander Graf ag...@suse.de --- arch/powerpc/kvm/book3s_mmu_hpte.c | 287 1 files changed, 287 insertions(+), 0 deletions(-) create mode 100644 arch/powerpc/kvm/book3s_mmu_hpte.c diff --git a/arch/powerpc/kvm/book3s_mmu_hpte.c b/arch/powerpc/kvm/book3s_mmu_hpte.c new file mode 100644 index 000..8ee0f1e --- /dev/null +++ b/arch/powerpc/kvm/book3s_mmu_hpte.c @@ -0,0 +1,287 @@ +/* + * Copyright (C) 2010 SUSE Linux Products GmbH. All rights reserved. + * + * Authors: + * Alexander Graf ag...@suse.de + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#include linux/kvm_host.h +#include linux/hash.h +#include linux/slab.h + +#include asm/kvm_ppc.h +#include asm/kvm_book3s.h +#include asm/machdep.h +#include asm/mmu_context.h +#include asm/hw_irq.h + +#define PTE_SIZE 12 + +/* #define DEBUG_MMU */ +/* #define DEBUG_SLB */ + +#ifdef DEBUG_MMU +#define dprintk_mmu(a, ...) printk(KERN_INFO a, __VA_ARGS__) +#else +#define dprintk_mmu(a, ...) do { } while(0) +#endif + +#ifdef DEBUG_SLB +#define dprintk_slb(a, ...) printk(KERN_INFO a, __VA_ARGS__) +#else +#define dprintk_slb(a, ...) do { } while(0) +#endif + +static inline u64 kvmppc_mmu_hash_pte(u64 eaddr) { + return hash_64(eaddr PTE_SIZE, HPTEG_HASH_BITS); +} + +static inline u64 kvmppc_mmu_hash_vpte(u64 vpage) { + return hash_64(vpage 0xfULL, HPTEG_HASH_BITS); +} + +static inline u64 kvmppc_mmu_hash_vpte_long(u64 vpage) { + return hash_64((vpage 0xff000ULL) 12, HPTEG_HASH_BITS); +} + +void kvmppc_mmu_hpte_cache_map(struct kvm_vcpu *vcpu, struct hpte_cache *pte) +{ + u64 index; + + /* Add to ePTE list */ + index = kvmppc_mmu_hash_pte(pte-pte.eaddr); + list_add(pte-list_pte, vcpu-arch.hpte_hash_pte[index]); + + /* Add to vPTE list */ + index = kvmppc_mmu_hash_vpte(pte-pte.vpage); + list_add(pte-list_vpte, vcpu-arch.hpte_hash_vpte[index]); + + /* Add to vPTE_long list */ + index = kvmppc_mmu_hash_vpte_long(pte-pte.vpage); + list_add(pte-list_vpte_long, vcpu-arch.hpte_hash_vpte_long[index]); + + /* Add to all list */ + list_add(pte-list_all, vcpu-arch.hpte_all); +} + +static void invalidate_pte(struct kvm_vcpu *vcpu, struct hpte_cache *pte) +{ + dprintk_mmu(KVM: Flushing SPT: 0x%lx (0x%llx) - 0x%llx\n, + pte-pte.eaddr, pte-pte.vpage, pte-host_va); + + /* Different for 32 and 64 bit */ + kvmppc_mmu_invalidate_pte(vcpu, pte); + + if (pte-pte.may_write) + kvm_release_pfn_dirty(pte-pfn); + else + kvm_release_pfn_clean(pte-pfn); + + list_del(pte-list_pte); + list_del(pte-list_vpte); + list_del(pte-list_vpte_long); + list_del(pte-list_all); + + kmem_cache_free(vcpu-arch.hpte_cache, pte); +} + +static void kvmppc_mmu_pte_flush_all(struct kvm_vcpu *vcpu) +{ + struct hpte_cache *pte, *tmp; + + list_for_each_entry_safe(pte, tmp, vcpu-arch.hpte_all, list_all) { + /* Jump over the helper entry */ + if (pte-list_all == vcpu-arch.hpte_all) + continue; + + invalidate_pte(vcpu, pte); + } +} + +void kvmppc_mmu_pte_flush(struct kvm_vcpu *vcpu, ulong guest_ea, ulong ea_mask) +{ + u64 i; + + dprintk_mmu(KVM: Flushing %d Shadow PTEs: 0x%lx 0x%lx\n, + vcpu-arch.hpte_cache_count, guest_ea, ea_mask); + + switch (ea_mask) { + case ~0xfffUL: + { + struct list_head *list; + struct hpte_cache *pte, *tmp; + + /* Find the list of entries in the map */ + list =
[PATCH 2/2] KVM: PPC: Make use of hash based Shadow MMU
We just introduced generic functions to handle shadow pages on PPC. This patch makes the respective backends make use of them, getting rid of a lot of duplicate code along the way. Signed-off-by: Alexander Graf ag...@suse.de --- arch/powerpc/include/asm/kvm_book3s.h |7 ++ arch/powerpc/include/asm/kvm_host.h | 16 - arch/powerpc/kvm/Makefile |2 + arch/powerpc/kvm/book3s_32_mmu_host.c | 104 +++- arch/powerpc/kvm/book3s_64_mmu_host.c | 98 ++ 5 files changed, 39 insertions(+), 188 deletions(-) diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h index 4e99559..a96e405 100644 --- a/arch/powerpc/include/asm/kvm_book3s.h +++ b/arch/powerpc/include/asm/kvm_book3s.h @@ -115,6 +115,13 @@ extern void kvmppc_mmu_book3s_32_init(struct kvm_vcpu *vcpu); extern int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte); extern int kvmppc_mmu_map_segment(struct kvm_vcpu *vcpu, ulong eaddr); extern void kvmppc_mmu_flush_segments(struct kvm_vcpu *vcpu); + +extern void kvmppc_mmu_hpte_cache_map(struct kvm_vcpu *vcpu, struct hpte_cache *pte); +extern struct hpte_cache *kvmppc_mmu_hpte_cache_next(struct kvm_vcpu *vcpu); +extern void kvmppc_mmu_hpte_destroy(struct kvm_vcpu *vcpu); +extern int kvmppc_mmu_hpte_init(struct kvm_vcpu *vcpu); +extern void kvmppc_mmu_invalidate_pte(struct kvm_vcpu *vcpu, struct hpte_cache *pte); + extern int kvmppc_ld(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, bool data); extern int kvmppc_st(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, bool data); extern void kvmppc_book3s_queue_irqprio(struct kvm_vcpu *vcpu, unsigned int vec); diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 0c9ad86..0e3fc82 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -38,7 +38,9 @@ #define KVM_NR_PAGE_SIZES 1 #define KVM_PAGES_PER_HPAGE(x) (1UL31) -#define HPTEG_CACHE_NUM 1024 +#define HPTEG_CACHE_NUM(1 15) +#define HPTEG_HASH_BITS13 +#define HPTEG_HASH_NUM (1 HPTEG_HASH_BITS) struct kvm; struct kvm_run; @@ -151,6 +153,10 @@ struct kvmppc_mmu { }; struct hpte_cache { + struct list_head list_all; + struct list_head list_pte; + struct list_head list_vpte; + struct list_head list_vpte_long; u64 host_va; u64 pfn; ulong slot; @@ -282,8 +288,12 @@ struct kvm_vcpu_arch { unsigned long pending_exceptions; #ifdef CONFIG_PPC_BOOK3S - struct hpte_cache hpte_cache[HPTEG_CACHE_NUM]; - int hpte_cache_offset; + struct kmem_cache *hpte_cache; + struct list_head hpte_hash_pte[HPTEG_HASH_NUM]; + struct list_head hpte_hash_vpte[HPTEG_HASH_NUM]; + struct list_head hpte_hash_vpte_long[HPTEG_HASH_NUM]; + struct list_head hpte_all; + int hpte_cache_count; #endif }; diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile index ff43606..d45c818 100644 --- a/arch/powerpc/kvm/Makefile +++ b/arch/powerpc/kvm/Makefile @@ -45,6 +45,7 @@ kvm-book3s_64-objs := \ book3s.o \ book3s_emulate.o \ book3s_interrupts.o \ + book3s_mmu_hpte.o \ book3s_64_mmu_host.o \ book3s_64_mmu.o \ book3s_32_mmu.o @@ -57,6 +58,7 @@ kvm-book3s_32-objs := \ book3s.o \ book3s_emulate.o \ book3s_interrupts.o \ + book3s_mmu_hpte.o \ book3s_32_mmu_host.o \ book3s_32_mmu.o kvm-objs-$(CONFIG_KVM_BOOK3S_32) := $(kvm-book3s_32-objs) diff --git a/arch/powerpc/kvm/book3s_32_mmu_host.c b/arch/powerpc/kvm/book3s_32_mmu_host.c index 904f5ac..0b51ef8 100644 --- a/arch/powerpc/kvm/book3s_32_mmu_host.c +++ b/arch/powerpc/kvm/book3s_32_mmu_host.c @@ -58,105 +58,19 @@ static ulong htab; static u32 htabmask; -static void invalidate_pte(struct kvm_vcpu *vcpu, struct hpte_cache *pte) +void kvmppc_mmu_invalidate_pte(struct kvm_vcpu *vcpu, struct hpte_cache *pte) { volatile u32 *pteg; - dprintk_mmu(KVM: Flushing SPTE: 0x%llx (0x%llx) - 0x%llx\n, - pte-pte.eaddr, pte-pte.vpage, pte-host_va); - + /* Remove from host HTAB */ pteg = (u32*)pte-slot; - pteg[0] = 0; + + /* And make sure it's gone from the TLB too */ asm volatile (sync); asm volatile (tlbie %0 : : r (pte-pte.eaddr) : memory); asm volatile (sync); asm volatile (tlbsync); - - pte-host_va = 0; - - if (pte-pte.may_write) - kvm_release_pfn_dirty(pte-pfn); - else - kvm_release_pfn_clean(pte-pfn); -} - -void kvmppc_mmu_pte_flush(struct kvm_vcpu *vcpu, ulong guest_ea, ulong ea_mask) -{ - int i; - - dprintk_mmu(KVM: Flushing %d Shadow PTEs: 0x%x 0x%x\n, - vcpu-arch.hpte_cache_offset, guest_ea, ea_mask); -
[RFC] Getting specific device from qdev structs
Hi, I'm working on implementing AMD IOMMU emulation in QEMU/KVM and I'm also creating an API for address translation and access checking. Ideally, this API should work with different kinds of devices and IOMMUs. These operations would typically require specific device information to figure out which IOMMU is responsible and how it refers to the actual device (bus-device-function number for example). At the same time, I need to get this from deep within AIO/DMA code, so adding specific members in those structures doesn't seem to be the best way. So I've been looking for a way to obtain things like a PCIDevice from a more generic structure (say from hw/qdev.h), e.g. DeviceInfo. Is there something like that already implemented? My searches turned up nothing. If not, perhaps something like this would be acceptable? enum DeviceType { DEV_TYPE_PCI, DEV_TYPE_ISA, [...] }; struct GenericDevice { enum DeviceType type; union { PCIDevice *pci_dev; ISADevice *isa_dev; [...] }; }; /* * Embed this in DeviceState for example. Make it * somehow accesible from AIO/DMA code. */ Or some container_of() / DO_UPCAST() magic might do: struct GenericDevice { enum DeviceType type; DeviceState qdev; }; /* Embed this in PCIDevice and pass a pointer to GenericDevice around. */ struct PCIDevice { GenericDevice gdev; [...] } int iommu_translate(struct GenericDevice *dev, [other args]) { PCIDevice *pci_dev; ISADevice *isa_dev; switch (dev-type) { case DEV_TYPE_PCI: pci_dev = container_of(dev, PCIDevice, gdev); return iommu_pci_translate(pci_dev, [other args]); case DEV_TYPE_ISA: isa_dev = container_of(dev, ISADevice, gdev); return iommu_pci_translate(isa_dev, [other args]); [...] default: break; } [sensible default] return 0; } Note we can't actually do any container_of() magic without recording the type of the container structure somewhere. What do you think? I'd appreciate some help here. Perhaps there are other (simpler) ways I didn't think of. Thanks, Eduard -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH V2 3/5] ara virt interface of perf to support kvm guest os statistics collection in guest os
On Mon, Jun 21, 2010 at 05:31:43PM +0800, Zhang, Yanmin wrote: The 3rd patch is to implement para virt perf at host kernel. Signed-off-by: Zhang Yanmin yanmin_zh...@linux.intel.com --- --- linux-2.6_tip0620/arch/x86/include/asm/kvm_para.h 2010-06-21 15:19:38.992999849 +0800 +++ linux-2.6_tip0620perfkvm/arch/x86/include/asm/kvm_para.h 2010-06-21 15:21:39.308999849 +0800 @@ -2,6 +2,7 @@ #define _ASM_X86_KVM_PARA_H #include linux/types.h +#include linux/list.h #include asm/hyperv.h /* This CPUID returns the signature 'KVMKVMKVM' in ebx, ecx, and edx. It @@ -19,7 +20,8 @@ /* This indicates that the new set of kvmclock msrs * are available. The use of 0x11 and 0x12 is deprecated */ -#define KVM_FEATURE_CLOCKSOURCE23 +#define KVM_FEATURE_CLOCKSOURCE2 3 +#define KVM_FEATURE_PV_PERF 4 /* The last 8 bits are used to indicate how to interpret the flags field * in pvclock structure. If no bits are set, all flags are ignored. @@ -33,7 +35,14 @@ #define MSR_KVM_WALL_CLOCK_NEW 0x4b564d00 #define MSR_KVM_SYSTEM_TIME_NEW 0x4b564d01 -#define KVM_MAX_MMU_OP_BATCH 32 +#define KVM_MAX_MMU_OP_BATCH 32 + +/* Operations for KVM_PERF_OP */ +#define KVM_PERF_OP_OPEN 1 +#define KVM_PERF_OP_CLOSE2 +#define KVM_PERF_OP_ENABLE 3 +#define KVM_PERF_OP_DISABLE 4 +#define KVM_PERF_OP_READ 5 /* Operations for KVM_HC_MMU_OP */ #define KVM_MMU_OP_WRITE_PTE1 @@ -64,6 +73,85 @@ struct kvm_mmu_op_release_pt { #ifdef __KERNEL__ #include asm/processor.h +/* + * data communication area about perf_event between + * Host kernel and guest kernel + */ +struct guest_perf_event { + u64 count; + atomic_t overflows; +}; + +/* + * In host kernel, perf_event-host_perf_shadow points to + * host_perf_shadow which records some information + * about the guest. + */ +struct host_perf_shadow { + /* guest perf_event id passed from guest os */ + int id; + /* + * Host kernel saves data into data member counter firstly. + * kvm will get data from this counter and calls kvm functions + * to copy or add data back to guets os before entering guest os + * next time + */ + struct guest_perf_event counter; + /* guest_event_addr is gpa_t pointing to guest os guest_perf_event*/ + __u64 guest_event_addr; + + /* + * Link to of kvm.kvm_arch.shadow_hash_table + */ + struct list_head shadow_entry; + struct kvm_vcpu *vcpu; + + struct perf_event *host_event; + /* + * Below counter is to prevent malicious guest os to try to + * close/enable event at the same time. + */ + atomic_t ref_counter; +}; + +/* + * In guest kernel, perf_event-guest_shadow points to + * guest_perf_shadow which records some information + * about the guest. + */ +struct guest_perf_shadow { + /* guest perf_event id passed from guest os */ + int id; + /* + * Host kernel kvm saves data into data member counter + */ + struct guest_perf_event counter; +}; + +/* + * guest_perf_attr is used when guest calls hypercall to + * open a new perf_event at host side. Mostly, it's a copy of + * perf_event_attr and deletes something not used by host kernel. + */ +struct guest_perf_attr { + __u32 type; + __u64 config; + __u64 sample_period; + __u64 sample_type; + __u64 read_format; + __u64 flags; + __u32 bp_type; + __u64 bp_addr; + __u64 bp_len; +}; + +struct guest_perf_event_param { + __u64 attr_addr; + __u64 guest_event_addr; + /* In case there is an alignment issue, we put id as the last one */ + int id; +}; + extern void kvmclock_init(void); --- linux-2.6_tip0620/arch/x86/include/asm/kvm_host.h 2010-06-21 15:19:39.01849 +0800 +++ linux-2.6_tip0620perfkvm/arch/x86/include/asm/kvm_host.h 2010-06-21 15:21:39.308999849 +0800 @@ -24,6 +24,7 @@ #include asm/desc.h #include asm/mtrr.h #include asm/msr-index.h +#include asm/perf_event.h #define KVM_MAX_VCPUS 64 #define KVM_MEMORY_SLOTS 32 @@ -360,6 +361,18 @@ struct kvm_vcpu_arch { /* fields used by HYPER-V emulation */ u64 hv_vapic; + + /* + * Fields used by PARAVIRT perf interface: + * + * kvm checks overflow_events before entering guest os, + * and copy data back to guest os. + * event_mutex is to avoid a race between NMI perf event overflow + * handler, event close, and enable/disable. + */ + struct mutex event_mutex; + int overflows; + struct perf_event *overflow_events[X86_PMC_IDX_MAX]; }; struct kvm_mem_alias { @@ -377,6 +390,9 @@ struct
Re: [Qemu-devel] [RFC] Getting specific device from qdev structs
So I've been looking for a way to obtain things like a PCIDevice from a more generic structure (say from hw/qdev.h), If you're having to figure out what kind of a device you have then I think you're already doing something else wrong. I'd expect the bits of code that needs to identify devices to be inherently bus specific. If you've got some sort of on-cpu IOMMU which is asking did this come from an ISA device, or a PCI device?, then I suspect you've got your abstraction layers wrong. This should already have been handled by the pci/isa to cpu bridge. Paul -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [RFC] High availability in KVM
On Thu, 17 Jun 2010 12:15:20 +0900 Fernando Luis Vazquez Cao ferna...@oss.ntt.co.jp wrote: * qemu-kvm Currently KVM is only notified about memory errors detected by the MCE subsystem. When running on newer x86 hardware, if MCE detects an error on user-space it signals the corresponding process with SIGBUS. Qemu, upon receiving the signal, checks the problematic address which the kernel stored in siginfo and decides whether to inject the MCE to the virtual machine. An obvious limitation is that we would like to be notified about other types of error too and, as suggested before, a file-based interface that can be sys_poll'ed might be needed for that. On a different note, in a HA environment the qemu policy described above is not adequate; when a notification of a hardware error that our policy determines to be serious arrives the first thing we want to do is to put the virtual machine in a quiesced state to avoid further wreckage. If we injected the error into the guest we would risk a guest panic that might detectable only by polling or, worse, being killed by the kernel, which means that postmortem analysis of the guest is not possible. Once we had the guests in a quiesced state, where all the buffers have been flushed and the hardware sources released, we would have two modes of operation that can be used together and complement each other. - Proactive: A qmp event describing the error (severity, topology, etc) is emitted. The HA software would have to register to receive hardware error events, possibly using the libvirt bindings. Upon receiving the event the HA software would know that the guest is in a failover-safe quiesced state so it could do without fencing and proceed to the failover stage directly. This seems to match the BLOCK_IO_ERROR event we have today: when a disk error happens, an event is emitted and the virtual machine can be automatically stopped (there's a configuration option for this). On the other hand, there's a number of ways to do this differently. I think the first thing to do is to agree on what qemu's behavior is going to be, then we decide how to expose this info to qmp clients. - Passive: Polling resource agents that need to check the state of the guest generally use libvirt or a wrapper such as virsh. When the state is SHUTOFF or CRASHED the resource agent proceeds to the facing stage, which might be expensive and usually involves killing the qemu process. We propose adding a new state that indicates the failover-safe state described before. In this state the HA software would not need to use fencing techniques and since the qemu process is not killed postmortem analysis of the virtual machine is still possible. It wouldn't be polling, I guess. We already have events for most state changes. So, when the machine stops, reboots, etc.. the client would be notified and then it could inspect the virtual machine by using query commands. This method would be preferable in case we also want this information available in the user Monitor and/or if the event gets too messy because of the amount of information we want to put in it. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: Regarding NAT configuration with KVM
This is the libvirt default network configuration. Please see the libvirt documentation and mailing list for support. As an aside -- you'll probably want to use a bridged configuration rather than the NATted one. If you really do want to stick with the NAT configuration, you'll need to make sure the routing tables on the other hosts guide packets destined for 192.168.122.0/24 back to the VM host. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [Qemu-devel] [RFC] Getting specific device from qdev structs
On Mon, Jun 21, 2010 at 03:07:13PM +0100, Paul Brook wrote: So I've been looking for a way to obtain things like a PCIDevice from a more generic structure (say from hw/qdev.h), If you're having to figure out what kind of a device you have then I think you're already doing something else wrong. I'd expect the bits of code that needs to identify devices to be inherently bus specific. If you've got some sort of on-cpu IOMMU which is asking did this come from an ISA device, or a PCI device?, then I suspect you've got your abstraction layers wrong. This should already have been handled by the pci/isa to cpu bridge. Paul Hi, Thanks for your reply. This isn't about a specific IOMMU. Let me describe the situation better: 1. I'm implementing the AMD IOMMU, which is a PCI IOMMU (not in the CPU). 2. Devices need address translation and checking through this IOMMU. 3. But in the future there might be other IOMMU implementations, possibly for other bus types. Yes, I could (and have already done to test my code) modify device code to ask the AMD IOMMU for translation. But we have stuff like AIO, which isn't really bus-specific and would result in spagetti code if I add PCI-specific stuff, then somebody else does the same for other buses and so on. Moreover, even for PCI, it isn't really straightforward to obtain the bus-device-function number required to do translation from AIO code (e.g. I needed to add the devfn or a pointer to the actual PCIDevice to BMDMAState to get it working for PIIX). So I considered providing a generic IOMMU translation/checking API that could be used by all devices and all IOMMUs. Generally getting the {PCI,ISA,Whatever}Device should be enough, I think. If the IOMMU can't handle that specific bus, that's no problem, we can have generic code do identity mapping without any access checking. If somebody comes along and wants to implement another IOMMU emulation, all he needs is to provide implementations for those functions. Eduard -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Question regarding KVM networking
hi, all I have a question regarding KVM's networking stack implementation. Does KVM hook into NetFilter to intercept packets destined for the guest, or PF_PACKET? Thanks for your help, in advance Regard, Kangkook-- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [Qemu-devel] [RFC] Getting specific device from qdev structs
Thanks for your reply. This isn't about a specific IOMMU. Let me describe the situation better: 1. I'm implementing the AMD IOMMU, which is a PCI IOMMU (not in the CPU). 2. Devices need address translation and checking through this IOMMU. 3. But in the future there might be other IOMMU implementations, possibly for other bus types. Yes, I could (and have already done to test my code) modify device code to ask the AMD IOMMU for translation. But we have stuff like AIO, which isn't really bus-specific and would result in spagetti code if I add PCI-specific stuff, then somebody else does the same for other buses and so on. Moreover, even for PCI, it isn't really straightforward to obtain the bus-device-function number required to do translation from AIO code (e.g. I needed to add the devfn or a pointer to the actual PCIDevice to BMDMAState to get it working for PIIX). A bus-device-function number is inherently PCI specific. So I considered providing a generic IOMMU translation/checking API that could be used by all devices and all IOMMUs. Generally getting the {PCI,ISA,Whatever}Device should be enough, I think. If the IOMMU can't handle that specific bus, that's no problem, we can have generic code do identity mapping without any access checking. If somebody comes along and wants to implement another IOMMU emulation, all he needs is to provide implementations for those functions. The actual code to handle address remapping can be bus agnostic. The code to create the mappings is inherently bus specific. i.e. the generic code needs to ask the bus bridge how do I translate this access onto your parent bus. For example, consider a PCI bridge (Device A) with an IOMMU. On that PCI bus resides a PCI-ISA bridge (Device B) that also has an IOMMU. Device C is a bus- master ISA device[1]. Accesses from device C cause the memory mapping code to walk down the bus structure. First the ISA IOMMU translates that into an access from device B. Then the PCI IOMMU translates this into a system bus access from device A. The code to determine each of these mappings is inherently bus specific. That code trivially knows how to access bus-specific information from its devices. However the framework used to chain these mappings and perform the actual transfer should be bus agnostic. While the IOMMU actually resides in the host bridge, it probably makes most sense to associate it with the bus itself. When the host device creates the bus it can also create the IOMMU. This should handle both explicit (PCI) and implicit (SBUS) slave-side bus interfaces. Paul [1] I don't think ISA supports bus-master devices, but ignore that for now. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: Question regarding KVM networking
On 06/21/2010 09:51 AM, Kangkook Jee wrote: Does KVM hook into NetFilter to intercept packets destined for the guest, or PF_PACKET? None of the above. With -net user, KVM uses techniques derived from SLiRP to simulate a network stack with only traditional userspace UNIX socket calls. With -net tap, it uses the standard ethertap interface. -net socket and -net dump likewise do nothing unconventional. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH] Search the LAPIC's for one that will accept a PIC interrupt.
Older versions of 32-bit linux have a Checking 'hlt' instruction test where they repeatedly call the 'hlt' instruction, and then expect a timer interrupt to kick the CPU out of halt. This happens before any LAPIC or IOAPIC setup happens, which means that all of the APIC's are in virtual wire mode at this point. Unfortunately, the current implementation of virtual wire mode is hardcoded to only kick the BSP, so if a crash+kexec occurs on a different vcpu, it will never get kicked. This patch makes pic_unlock() do the equivalent of kvm_irq_delivery_to_apic() for the IOAPIC code. That is, it runs through all of the vcpus looking for one that is in virtual wire mode. In the normal case where LAPICs and IOAPICs are configured, this won't be used at all. In the bootstrap phase of a modern OS, before the LAPICs and IOAPICs are configured, this will have exactly the same behavior as today; VCPU0 is always looked at first, so it will always get out of the loop after the first iteration. This will only go through the loop more than once during a kexec/kdump, in which case it will only do it a few times until the kexec'ed kernel programs the LAPIC and IOAPIC. Signed-off-by: Chris Lalancette clala...@redhat.com --- arch/x86/kvm/i8259.c | 17 + 1 files changed, 13 insertions(+), 4 deletions(-) diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c index 2c73f44..85ecabc 100644 --- a/arch/x86/kvm/i8259.c +++ b/arch/x86/kvm/i8259.c @@ -44,16 +44,25 @@ static void pic_unlock(struct kvm_pic *s) __releases(s-lock) { bool wakeup = s-wakeup_needed; - struct kvm_vcpu *vcpu; + struct kvm_vcpu *vcpu, *found = NULL; + int i; s-wakeup_needed = false; raw_spin_unlock(s-lock); if (wakeup) { - vcpu = s-kvm-bsp_vcpu; - if (vcpu) - kvm_vcpu_kick(vcpu); + kvm_for_each_vcpu(i, vcpu, s-kvm) { + if (kvm_apic_accept_pic_intr(vcpu)) { + found = vcpu; + break; + } + } + + if (!found) + found = s-kvm-bsp_vcpu; + + kvm_vcpu_kick(found); } } -- 1.6.6.1 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[ kvm-Bugs-1899961 ] NIC not working properly with WS2008 RC1 x64
Bugs item #1899961, was opened at 2008-02-22 22:19 Message generated for change (Comment added) made by jessorensen You can respond by visiting: https://sourceforge.net/tracker/?func=detailatid=893831aid=1899961group_id=180599 Please note that this message will contain a full copy of the comment thread, including the initial issue submission, for this request, not just the latest update. Category: None Group: None Status: Closed Resolution: Fixed Priority: 5 Private: No Submitted By: Daniel (danielhs) Assigned to: Nobody/Anonymous (nobody) Summary: NIC not working properly with WS2008 RC1 x64 Initial Comment: Using KVM-60 with Windows Server 2008 x64, the NIC is not working properly. Can't do any kind of network activities. The windows network monitor indicates that a lot of data has been received, but none sent. Link to windows server 2008 rc1 standard edition http://www.microsoft.com/downloads/details.aspx?FamilyId=B8144EBA-9EFD-475F-9DD3-A264A00BF5A1displaylang=en I'm using the install from Ubuntu Hardy. I've had no problems networking using Windows XP x64 under same KVM/Ubuntu install. -- Comment By: Jes Sorensen (jessorensen) Date: 2010-06-21 17:43 Message: Tested with 2008 x64 R1 in here using the e1000 driver - it all seems to work now. closing -- Comment By: Daniel (danielhs) Date: 2008-03-12 15:58 Message: Logged In: YES user_id=1609821 Originator: YES Yes. I believe that at the time I *was* using KVM-60 at the time. But, as I said before, all I did was use the install that was on the Ubuntu Hardy repository. So I'm not sure which version it was. I think it was kvm-60. Either way, I'm not using that version at this point. Daniel -- Comment By: Technologov (technologov) Date: 2008-03-12 14:41 Message: Logged In: YES user_id=1839746 Originator: NO Unreproducible. I have it all working. Are you sure you're working on KVM-60? Try: # modinfo kvm-intel -Technologov -- Comment By: Daniel (danielhs) Date: 2008-02-23 22:54 Message: Logged In: YES user_id=1609821 Originator: YES Looks like new e1000 driver in kvm-61 fixes this problem entirely. Might want to add a note or something like that to make others aware -- You can respond by visiting: https://sourceforge.net/tracker/?func=detailatid=893831aid=1899961group_id=180599 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 1/2] Add 'serial' attribute to virtio-blk devices
* john cooper john.coo...@redhat.com [2010-06-21 01:11]: Rusty Russell wrote: On Sat, 19 Jun 2010 04:08:02 am Ryan Harper wrote: Create a new attribute for virtio-blk devices that will fetch the serial number of the block device. This attribute can be used by udev to create disk/by-id symlinks for devices that don't have a UUID (filesystem) associated with them. ATA_IDENTIFY strings are special in that they can be up to 20 chars long and aren't required to be NULL-terminated. The buffer is also zero-padded meaning that if the serial is 19 chars or less that we get a NULL terminated string. When copying this value into a string buffer, we must be careful to copy up to the NULL (if it present) and only 20 if it is longer and not to attempt to NULL terminate; this isn't needed. Signed-off-by: Ryan Harper ry...@us.ibm.com Signed-off-by: john cooper john.coo...@redhat.com --- drivers/block/virtio_blk.c | 32 1 files changed, 32 insertions(+), 0 deletions(-) diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 258bc2a..f1ef26f 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -281,6 +281,31 @@ static int index_to_minor(int index) return index PART_BITS; } +/* Copy serial number from *s to *d. Copy operation terminates on either + * encountering a nul in *s or after n bytes have been copied, whichever + * occurs first. *d is not forcibly nul terminated. Return # of bytes copied. + */ +static inline int serial_sysfs(char *d, char *s, int n) +{ + char *di = d; + + while (*s n--) + *d++ = *s++; + return d - di; +} + +static ssize_t virtblk_serial_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct gendisk *disk = dev_to_disk(dev); + char id_str[VIRTIO_BLK_ID_BYTES]; + + if (IS_ERR(virtblk_get_id(disk, id_str))) + return 0; 0? Really? That doesn't seem very informative. Propagating a prospective error from virtblk_get_id() should be possible. Unsure if doing so is more useful from the user's perspective compared to just a nul id string. I'm not sure we can do any thing else here; maybe printk a warning? Documentation/filesystems/sysfs.txt says that showing attributes should always return the number of chars put into the buffer; so when there is an error; zero is the right value to return since we're not filling the buffer. + return serial_sysfs(buf, id_str, min(VIRTIO_BLK_ID_BYTES, PAGE_SIZE)); How about something like this: BUILD_BUG_ON(PAGE_SIZE VIRTIO_BLK_ID_BYTES + 1); Agreed, that's a better wrench in the gearworks. Note padding buf[] by 1 isn't necessary as indicated below. Yep; that's a good one to take. /* id_str is not necessarily nul-terminated! */ buf[VIRTIO_BLK_ID_BYTES] = '\0'; return virtblk_get_id(disk, buf); The /sys file is rendered according to the length returned from this function and the trailing nul is not interpreted in this context. In fact if a nul is added and included in the byte count of the string it will appear in the /sys file. Yeah; I like the simplicity; but we do need to know how long the string is so we can return that value. Thanks, -john -- john.coo...@redhat.com -- Ryan Harper Software Engineer; Linux Technology Center IBM Corp., Austin, Tx ry...@us.ibm.com -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [Qemu-devel] [PATCH 1/2] Add 'serial' attribute to virtio-blk devices
* Christoph Hellwig h...@lst.de [2010-06-21 07:46]: On Fri, Jun 18, 2010 at 01:38:02PM -0500, Ryan Harper wrote: Create a new attribute for virtio-blk devices that will fetch the serial number of the block device. This attribute can be used by udev to create disk/by-id symlinks for devices that don't have a UUID (filesystem) associated with them. ATA_IDENTIFY strings are special in that they can be up to 20 chars long and aren't required to be NULL-terminated. The buffer is also zero-padded meaning that if the serial is 19 chars or less that we get a NULL terminated string. When copying this value into a string buffer, we must be careful to copy up to the NULL (if it present) and only 20 if it is longer and not to attempt to NULL terminate; this isn't needed. Why is this virtio-blk specific? In a later mail you mention you want to use it for udev. So please export this from scsi/libata as well and we have one proper interface that we can use for all devices. ATA and SCSI devices are already supported via ata_id and scsi_id commands included in udev. Qemu implements the drive serial part for them and udev creates proper disk/by-id links. This patch is about filling the gap for virtio-blk devices which cannot work with ata_id and scsi_id. -- Ryan Harper Software Engineer; Linux Technology Center IBM Corp., Austin, Tx ry...@us.ibm.com -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 1/2] Add 'serial' attribute to virtio-blk devices
Ryan Harper wrote: * john cooper john.coo...@redhat.com [2010-06-21 01:11]: Rusty Russell wrote: On Sat, 19 Jun 2010 04:08:02 am Ryan Harper wrote: Create a new attribute for virtio-blk devices that will fetch the serial number of the block device. This attribute can be used by udev to create disk/by-id symlinks for devices that don't have a UUID (filesystem) associated with them. ATA_IDENTIFY strings are special in that they can be up to 20 chars long and aren't required to be NULL-terminated. The buffer is also zero-padded meaning that if the serial is 19 chars or less that we get a NULL terminated string. When copying this value into a string buffer, we must be careful to copy up to the NULL (if it present) and only 20 if it is longer and not to attempt to NULL terminate; this isn't needed. Signed-off-by: Ryan Harper ry...@us.ibm.com Signed-off-by: john cooper john.coo...@redhat.com --- drivers/block/virtio_blk.c | 32 1 files changed, 32 insertions(+), 0 deletions(-) diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 258bc2a..f1ef26f 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -281,6 +281,31 @@ static int index_to_minor(int index) return index PART_BITS; } +/* Copy serial number from *s to *d. Copy operation terminates on either + * encountering a nul in *s or after n bytes have been copied, whichever + * occurs first. *d is not forcibly nul terminated. Return # of bytes copied. + */ +static inline int serial_sysfs(char *d, char *s, int n) +{ + char *di = d; + + while (*s n--) + *d++ = *s++; + return d - di; +} + +static ssize_t virtblk_serial_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct gendisk *disk = dev_to_disk(dev); + char id_str[VIRTIO_BLK_ID_BYTES]; + + if (IS_ERR(virtblk_get_id(disk, id_str))) + return 0; 0? Really? That doesn't seem very informative. Propagating a prospective error from virtblk_get_id() should be possible. Unsure if doing so is more useful from the user's perspective compared to just a nul id string. I'm not sure we can do any thing else here; maybe printk a warning? Documentation/filesystems/sysfs.txt says that showing attributes should always return the number of chars put into the buffer; so when there is an error; zero is the right value to return since we're not filling the buffer. So we return a nul string in the case the qemu user didn't specify an id string and also in the case a legacy qemu doesn't support retrieval of an id string. Not too much difference and if needed going forward the error return can be elaborated. /* id_str is not necessarily nul-terminated! */ buf[VIRTIO_BLK_ID_BYTES] = '\0'; return virtblk_get_id(disk, buf); The /sys file is rendered according to the length returned from this function and the trailing nul is not interpreted in this context. In fact if a nul is added and included in the byte count of the string it will appear in the /sys file. Yeah; I like the simplicity; but we do need to know how long the string is so we can return that value. Which we're getting from serial_sysfs() without having to accommodate an unused nul. I'd hazard the primary reason the sysfs calling code keys off a return of byte count vs. traversing the string itself is due to the called function almost always having the byte count available. -john -- john.coo...@redhat.com -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 0/4] Preparation for PIC-APIC rewiring
Looks good. On Sun, Jun 20, 2010 at 07:21:54PM +0300, Avi Kivity wrote: These four patches were part of an optimization patchset I've been neglecting, but are equally useful as preparation for fixing the PIC-APIC wiring issues. Avi Kivity (4): KVM: i8259: reduce excessive abstraction for pic_irq_request() KVM: i8259: simplify pic_irq_request() calling sequence KVM: Add mini-API for vcpu-requests KVM: Reduce atomic operations on vcpu-requests arch/x86/kvm/i8259.c | 18 ++ arch/x86/kvm/irq.h |4 arch/x86/kvm/lapic.c |2 +- arch/x86/kvm/mmu.c |6 +++--- arch/x86/kvm/svm.c |2 +- arch/x86/kvm/timer.c |2 +- arch/x86/kvm/vmx.c |2 +- arch/x86/kvm/x86.c | 27 +-- include/linux/kvm_host.h | 20 virt/kvm/kvm_main.c |4 ++-- 10 files changed, 48 insertions(+), 39 deletions(-) -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 2/2] KVM: Prevent internal slots from being COWed
On Mon, Jun 21, 2010 at 11:18:13AM +0300, Avi Kivity wrote: If a process with a memory slot is COWed, the page will change its address (despite having an elevated reference count). This breaks internal memory slots which have their physical addresses loaded into vmcs registers (see the APIC access memory slot). Signed-off-by: Avi Kivity a...@redhat.com --- arch/x86/kvm/x86.c |5 + 1 files changed, 5 insertions(+), 0 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 33156a3..d9a33e6 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -5633,6 +5633,11 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, int user_alloc) { int npages = memslot-npages; + int map_flags = MAP_PRIVATE | MAP_ANONYMOUS; + + /* Prevent internal slot pages from being moved by fork()/COW. */ + if (memslot-id = KVM_MEMORY_SLOTS) + map_flags = MAP_SHARED | MAP_ANONYMOUS; /*To keep backward compatibility with older userspace, *x86 needs to hanlde !user_alloc case. Forgot to use map_flags below. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 1/2] KVM: Remove memory alias support
On Mon, Jun 21, 2010 at 11:51:58AM +0300, Avi Kivity wrote: As advertised in feature-removal-schedule.txt. Equivalent support is provided by overlapping memory regions. Signed-off-by: Avi Kivity a...@redhat.com --- Documentation/feature-removal-schedule.txt | 11 --- Documentation/kvm/api.txt | 12 +--- arch/ia64/kvm/kvm-ia64.c |5 - arch/powerpc/kvm/powerpc.c |5 - arch/s390/kvm/kvm-s390.c |5 - arch/x86/include/asm/kvm_host.h| 21 - arch/x86/kvm/mmu.c | 17 +--- arch/x86/kvm/paging_tmpl.h |3 +- arch/x86/kvm/x86.c | 125 arch/x86/kvm/x86.h |7 -- include/linux/kvm.h|1 + include/linux/kvm_host.h |6 -- virt/kvm/kvm_main.c| 18 +--- 13 files changed, 11 insertions(+), 225 deletions(-) diff --git a/include/linux/kvm.h b/include/linux/kvm.h index 6fd40f5..ea294e6 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -619,6 +619,7 @@ struct kvm_clock_data { */ #define KVM_CREATE_VCPU _IO(KVMIO, 0x41) #define KVM_GET_DIRTY_LOG _IOW(KVMIO, 0x42, struct kvm_dirty_log) +/* KVM_SET_MEMORY_REGION is obsolete: */ SET_MEMORY_ALIAS? #define KVM_SET_MEMORY_ALIAS _IOW(KVMIO, 0x43, struct kvm_memory_alias) #define KVM_SET_NR_MMU_PAGES _IO(KVMIO, 0x44) #define KVM_GET_NR_MMU_PAGES _IO(KVMIO, 0x45) -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] do not enter vcpu again if it was stopped during IO
On Mon, Jun 21, 2010 at 12:01:52PM +0300, Gleb Natapov wrote: To prevent reentering vcpu after IO completion it is not enough to set env-stopped since it is checked only in main loop but control will not get there until next non-IO exit since kvm_run() will reenter vcpu to complete IO instruction. Solve this by sending self-signal to request exit after IO instruction completion. Signed-off-by: Gleb Natapov g...@redhat.com diff --git a/qemu-kvm.c b/qemu-kvm.c index be1dac2..4f7cf6d 100644 --- a/qemu-kvm.c +++ b/qemu-kvm.c @@ -603,6 +603,10 @@ int kvm_run(CPUState *env) r = pre_kvm_run(kvm, env); if (r) return r; +if (env-exit_request) { +env-exit_request = 0; +pthread_kill(env-kvm_cpu_state.thread, SIG_IPI); +} r = ioctl(fd, KVM_RUN, 0); Can't you check for env-stopped instead? if (r == -1 errno != EINTR errno != EAGAIN) { diff --git a/vl.c b/vl.c index 9e9c176..dcfab13 100644 --- a/vl.c +++ b/vl.c @@ -1817,6 +1817,7 @@ void qemu_system_reset_request(void) } if (cpu_single_env) { cpu_single_env-stopped = 1; +cpu_exit(cpu_single_env); } qemu_notify_event(); } -- Gleb. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 1/2] Add 'serial' attribute to virtio-blk devices
On Tue, 22 Jun 2010 02:13:21 am Ryan Harper wrote: * john cooper john.coo...@redhat.com [2010-06-21 01:11]: Rusty Russell wrote: On Sat, 19 Jun 2010 04:08:02 am Ryan Harper wrote: Create a new attribute for virtio-blk devices that will fetch the serial number of the block device. This attribute can be used by udev to create disk/by-id symlinks for devices that don't have a UUID (filesystem) associated with them. ATA_IDENTIFY strings are special in that they can be up to 20 chars long and aren't required to be NULL-terminated. The buffer is also zero-padded meaning that if the serial is 19 chars or less that we get a NULL terminated string. When copying this value into a string buffer, we must be careful to copy up to the NULL (if it present) and only 20 if it is longer and not to attempt to NULL terminate; this isn't needed. Signed-off-by: Ryan Harper ry...@us.ibm.com Signed-off-by: john cooper john.coo...@redhat.com --- drivers/block/virtio_blk.c | 32 1 files changed, 32 insertions(+), 0 deletions(-) diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 258bc2a..f1ef26f 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -281,6 +281,31 @@ static int index_to_minor(int index) return index PART_BITS; } +/* Copy serial number from *s to *d. Copy operation terminates on either + * encountering a nul in *s or after n bytes have been copied, whichever + * occurs first. *d is not forcibly nul terminated. Return # of bytes copied. + */ +static inline int serial_sysfs(char *d, char *s, int n) +{ +char *di = d; + +while (*s n--) +*d++ = *s++; +return d - di; +} + +static ssize_t virtblk_serial_show(struct device *dev, +struct device_attribute *attr, char *buf) +{ +struct gendisk *disk = dev_to_disk(dev); +char id_str[VIRTIO_BLK_ID_BYTES]; + +if (IS_ERR(virtblk_get_id(disk, id_str))) +return 0; 0? Really? That doesn't seem very informative. Propagating a prospective error from virtblk_get_id() should be possible. Unsure if doing so is more useful from the user's perspective compared to just a nul id string. I'm not sure we can do any thing else here; maybe printk a warning? Documentation/filesystems/sysfs.txt says that showing attributes should always return the number of chars put into the buffer; so when there is an error; zero is the right value to return since we're not filling the buffer. Ideally, the file shouldn't be set up if we don't have an ID. But we never did add a feature bit for this :( At a glance, we'll get -EIO if the host doesn't support it (or any other transport error). -ENOMEM if we run out of memory. printk is dumb, but it's nice to differentiate host didn't supply one vs something went wrong. How about return 0 on -EIO? Whatever is easiest for udev is best here. /* id_str is not necessarily nul-terminated! */ buf[VIRTIO_BLK_ID_BYTES] = '\0'; return virtblk_get_id(disk, buf); The /sys file is rendered according to the length returned from this function and the trailing nul is not interpreted in this context. In fact if a nul is added and included in the byte count of the string it will appear in the /sys file. Yeah; I like the simplicity; but we do need to know how long the string is so we can return that value. So we're looking at something like: /* id_str is not necessarily nul-terminated! */ buf[VIRTIO_BLK_ID_BYTES] = '\0'; err = virtblk_get_id(disk, buf); if (!err) return strlen(buf); if (err == -EIO) /* Unsupported? Make it empty. */ return 0; return err; Then, please *test*! Thanks, Rusty. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH] KVM: Remove redundant change of return value
From: Amos Kong ak...@redhat.com In the following situation, assign zero to 'r' is redundant, just remove them. r = foo(); if (r) goto out; r = 0; ... Signed-off-by: Amos Kong ak...@redhat.com --- arch/x86/kvm/x86.c |7 --- 1 files changed, 0 insertions(+), 7 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 33156a3..a23bfa0 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2477,7 +2477,6 @@ long kvm_arch_vcpu_ioctl(struct file *filp, r = kvm_vcpu_ioctl_set_lapic(vcpu, u.lapic); if (r) goto out; - r = 0; break; } case KVM_INTERRUPT: { @@ -2489,14 +2488,12 @@ long kvm_arch_vcpu_ioctl(struct file *filp, r = kvm_vcpu_ioctl_interrupt(vcpu, irq); if (r) goto out; - r = 0; break; } case KVM_NMI: { r = kvm_vcpu_ioctl_nmi(vcpu); if (r) goto out; - r = 0; break; } case KVM_SET_CPUID: { @@ -3227,7 +3224,6 @@ long kvm_arch_vm_ioctl(struct file *filp, r = kvm_vm_ioctl_set_irqchip(kvm, chip); if (r) goto set_irqchip_out; - r = 0; set_irqchip_out: kfree(chip); if (r) @@ -3260,7 +3256,6 @@ long kvm_arch_vm_ioctl(struct file *filp, r = kvm_vm_ioctl_set_pit(kvm, u.ps); if (r) goto out; - r = 0; break; } case KVM_GET_PIT2: { @@ -3286,7 +3281,6 @@ long kvm_arch_vm_ioctl(struct file *filp, r = kvm_vm_ioctl_set_pit2(kvm, u.ps2); if (r) goto out; - r = 0; break; } case KVM_REINJECT_CONTROL: { @@ -3297,7 +3291,6 @@ long kvm_arch_vm_ioctl(struct file *filp, r = kvm_vm_ioctl_reinject(kvm, control); if (r) goto out; - r = 0; break; } case KVM_XEN_HVM_CONFIG: { -- 1.7.0.4 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[KVM-AUTOTEST PATCH] KVM test: kvm_preprocessing.py: test for vm.is_alive() instead of vm.is_dead()
vm.is_alive() verifies that the monitor is responsive, which is required for taking screendumps. Signed-off-by: Michael Goldish mgold...@redhat.com --- client/tests/kvm/kvm_preprocessing.py |2 +- 1 files changed, 1 insertions(+), 1 deletions(-) diff --git a/client/tests/kvm/kvm_preprocessing.py b/client/tests/kvm/kvm_preprocessing.py index 1ed4ec2..ee279bd 100644 --- a/client/tests/kvm/kvm_preprocessing.py +++ b/client/tests/kvm/kvm_preprocessing.py @@ -406,7 +406,7 @@ def _take_screendumps(test, params, env): while True: for vm in kvm_utils.env_get_all_vms(env): -if vm.is_dead(): +if not vm.is_alive(): continue try: vm.monitor.screendump(temp_filename) -- 1.5.4.1 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[KVM-AUTOTEST PATCH] KVM test: concentrate image and cdrom filename prefixes in tests.cfg.sample
Don't prefix image_name and cdrom at the end of tests_base.cfg.sample. Instead, do it all in tests.cfg.sample, to make it clearer to users editing the file. Signed-off-by: Michael Goldish mgold...@redhat.com --- client/tests/kvm/tests.cfg.sample | 10 -- client/tests/kvm/tests_base.cfg.sample |2 -- 2 files changed, 4 insertions(+), 8 deletions(-) diff --git a/client/tests/kvm/tests.cfg.sample b/client/tests/kvm/tests.cfg.sample index a55a320..b303686 100644 --- a/client/tests/kvm/tests.cfg.sample +++ b/client/tests/kvm/tests.cfg.sample @@ -4,19 +4,17 @@ include tests_base.cfg include cdkeys.cfg -# Modify/comment the following lines if you wish to modify -# the paths of the image files, ISO files, step files or qemu binaries. +# Modify/comment the following lines if you wish to modify the paths of the +# image files, ISO files or qemu binaries. # # As for the defaults: # * qemu and qemu-img are expected to be found under /usr/bin/qemu-kvm and # /usr/bin/qemu-img respectively. # * All image files are expected under /tmp/kvm_autotest_root/images/ # * All iso files are expected under /tmp/kvm_autotest_root/isos/ -# * All step files are expected under /tmp/kvm_autotest_root/steps/ qemu_img_binary = /usr/bin/qemu-img -image_name.* ?= /tmp/kvm_autotest_root/ -cdrom.* ?= /tmp/kvm_autotest_root/ -steps ?= /tmp/kvm_autotest_root/ +image_name.* ?= /tmp/kvm_autotest_root/images/ +cdrom.* ?= /tmp/kvm_autotest_root/isos/ # Here are the test sets variants. The variant 'qemu_kvm_windows_quick' is # fully commented, the following ones have comments only on noteworthy points diff --git a/client/tests/kvm/tests_base.cfg.sample b/client/tests/kvm/tests_base.cfg.sample index ec61a5e..2c78cfc 100644 --- a/client/tests/kvm/tests_base.cfg.sample +++ b/client/tests/kvm/tests_base.cfg.sample @@ -1447,6 +1447,4 @@ variants: devices_requested = 7 -image_name.* ?= images/ -cdrom.* ?= isos/ steps ?= steps/ -- 1.5.4.1 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[KVM-AUTOTEST PATCH] KVM test: scan_results.py: fix handling of empty result list
If there are no test results, max() tries to operate on an empty sequence and throws an exception. Signed-off-by: Michael Goldish mgold...@redhat.com --- client/tests/kvm/scan_results.py |2 +- 1 files changed, 1 insertions(+), 1 deletions(-) diff --git a/client/tests/kvm/scan_results.py b/client/tests/kvm/scan_results.py index f7073e4..a339a85 100755 --- a/client/tests/kvm/scan_results.py +++ b/client/tests/kvm/scan_results.py @@ -74,7 +74,7 @@ def main(resfiles): continue results = parse_results(text) result_lists.append((resfile, results)) -name_width = max(name_width, max(len(r[0]) for r in results)) +name_width = max([name_width] + [len(r[0]) for r in results]) print_result((Test, Status, Seconds, Info), name_width) print_result((, --, ---, ), name_width) -- 1.5.4.1 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [RFC] High availability in KVM
(2010/06/21 23:19), Luiz Capitulino wrote: On a different note, in a HA environment the qemu policy described above is not adequate; when a notification of a hardware error that our policy determines to be serious arrives the first thing we want to do is to put the virtual machine in a quiesced state to avoid further wreckage. If we injected the error into the guest we would risk a guest panic that might detectable only by polling or, worse, being killed by the kernel, which means that postmortem analysis of the guest is not possible. Once we had the guests in a quiesced state, where all the buffers have been flushed and the hardware sources released, we would have two modes of operation that can be used together and complement each other. - Proactive: A qmp event describing the error (severity, topology, etc) is emitted. The HA software would have to register to receive hardware error events, possibly using the libvirt bindings. Upon receiving the event the HA software would know that the guest is in a failover-safe quiesced state so it could do without fencing and proceed to the failover stage directly. This seems to match the BLOCK_IO_ERROR event we have today: when a disk error happens, an event is emitted and the virtual machine can be automatically stopped (there's a configuration option for this). On the other hand, there's a number of ways to do this differently. I think the first thing to do is to agree on what qemu's behavior is going to be, then we decide how to expose this info to qmp clients. I would like to support qemu/KVM bugs too in the same framework. Even though there are some debugging ways, the easiest and most reliable one would be using the frozen state of the guest at the moment the bug happened. We've already experienced some qemu crashes which seemed to be caused by a KVM's emulation failure in our test environment. Although we could guess what happened by checking some messages like the exit reason, the guest state might have been more help. So what I want to get is: - new qemu/KVM mode in which guests are automatically stopped in a failover-safe state if qemu/KVM becomes impossible to continue, - new interface between qemu and HA to handle the failover-safe state, Although I personally don't mind whether the interface is event based or polling based, one important problem from the HA's point of view would be: * how to treat errors which can be caused in different layers uniformly. E.g. if the problem is caused by guest side, qemu may normally exit without sending any events to HA. So an interface for polling may be helpful even when we choose event driven one. Takuya - Passive: Polling resource agents that need to check the state of the guest generally use libvirt or a wrapper such as virsh. When the state is SHUTOFF or CRASHED the resource agent proceeds to the facing stage, which might be expensive and usually involves killing the qemu process. We propose adding a new state that indicates the failover-safe state described before. In this state the HA software would not need to use fencing techniques and since the qemu process is not killed postmortem analysis of the virtual machine is still possible. It wouldn't be polling, I guess. We already have events for most state changes. So, when the machine stops, reboots, etc.. the client would be notified and then it could inspect the virtual machine by using query commands. This method would be preferable in case we also want this information available in the user Monitor and/or if the event gets too messy because of the amount of information we want to put in it. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH V2 1/5] ara virt interface of perf to support kvm guest os statistics collection in guest os
On Mon, 2010-06-21 at 14:45 +0300, Avi Kivity wrote: On 06/21/2010 12:31 PM, Zhang, Yanmin wrote: Here is the version 2. ChangeLog since V1: Mostly changes based on Avi's suggestions. 1) Use a id to identify the perf_event between host and guest; 2) Changes lots of codes to deal with malicious guest os; 3) Add a perf_event number limitation per gust os instance; 4) Support guest os on the top of another guest os scenario. But I didn't test it yet as there is no environment. The design is to add 2 pointers in struct perf_event. One is used by host and the other is used by guest. 5) Fix the bug to support 'perf stat'. The key is sync count data back to guest when guest tries to disable the perf_event at host side. 6) Add a clear ABI of PV perf. Please use meaningful subject lines for individual patches. Yes, I should. I rushed to send the patches out yesterday afternoon as I need to take company shuttle back home. I don't implement live migration feature. Avi, Is live migration necessary on pv perf support? Yes. Ok. With the PV perf interface, host perf saves all counter info into perf_event structure. To support live migration, we need save all host perf_event structure, or at least perf_event-count and perf_event-attr. Then, recreate the host perf_event after migration. I check qemu-kvm codes and it seems most live migration is to save cpu states. So it seems it's hard for perf pv interface to match current live migration. Any suggestion? --- linux-2.6_tip0620/Documentation/kvm/paravirt-perf.txt 1970-01-01 08:00:00.0 +0800 +++ linux-2.6_tip0620perfkvm/Documentation/kvm/paravirt-perf.txt 2010-06-21 15:21:39.312999849 +0800 @@ -0,0 +1,133 @@ +The x86 kvm paravirt perf event interface +=== + +This paravirt interface is responsible for supporting guest os perf event +collections. If guest os supports this interface, users could run command +perf in guest os directly. + +Design + + +Guest os calls a series of hypercalls to communicate with host kernel to +create/enable/disable/close perf events. Host kernel notifies guest os +by injecting an NMI to guest os when an event overflows. Guets os need +go through all its active events to check if they overflow, and output +performance statistics if they do. + +ABI += + +1) Detect if host kernel supports paravirt perf interface: +#define KVM_FEATURE_PV_PERF 4 +Host kernel defines above cpuid bit. Guest os calls cpuid to check if host +os retuns this bit. If it does, it mean host kernel supports paravirt perf +interface. + +2) Open a new event at host side: +kvm_hypercall3(KVM_PERF_OP, KVM_PERF_OP_OPEN, param_addr_low32bit, +param_addr_high32bit); + +#define KVM_PERF_OP3 +/* Operations for KVM_PERF_OP */ +#define KVM_PERF_OP_OPEN1 +#define KVM_PERF_OP_CLOSE 2 +#define KVM_PERF_OP_ENABLE 3 +#define KVM_PERF_OP_DISABLE 4 +#define KVM_PERF_OP_READ5 +/* + * guest_perf_attr is used when guest calls hypercall to + * open a new perf_event at host side. Mostly, it's a copy of + * perf_event_attr and deletes something not used by host kernel. + */ +struct guest_perf_attr { +__u32 type; Need padding here, otherwise the structure is different on 32-bit and 64-bit guests. Ok. I will change it. +__u64 config; +__u64 sample_period; +__u64 sample_type; +__u64 read_format; +__u64 flags; and here. I will rearrange the whole structure. +__u32 bp_type; +__u64 bp_addr; +__u64 bp_len; Do we actually support breakpoints on the guest? Note the hardware breakpoints are also usable by the guest, so if the host uses them, we won't be able to emulate them correctly. We can let the guest to breakpoint perf monitoring itself and drop this feature. Ok, I will disable breakpoint feature of pv interface. +}; What about documentation for individual fields? Esp. type, config, and flags, but also the others. They are really perf implementation specific. Even perf_event definition has no document but code comments. I will add simple explanation around the new structure definition. +/* + * data communication area about perf_event between + * Host kernel and guest kernel + */ +struct guest_perf_event { +u64 count; +atomic_t overflows; Please use __u64 and __u32, assume guests don't have Linux internal types (though of course the first guest _is_ Linux). This structure is used by both host and
[PATCH 01/13] KVM test: kvm_utils.py: add a primitive logging mechanism for kvm_subprocess
From: Michael Goldish mgold...@redhat.com Add log_line() which logs a single line to a given file. The file's path is given relative to a certain base dir. Add set_log_dir() which sets the base dir. This is useful for logging the output of kvm_subprocess. kvm_subprocess can take a callback function, which it calls with each line of output it gets from the running subprocess. Redirecting kvm_subprocess's output to the regular log files is done by passing it logging.debug or logging.info. However, in order to log to other files, we'd have to pass kvm_subprocess a custom logger method, e.g. our_custom_logger.debug. Unfortunately, such methods (called instancemethods) cannot be pickled, and kvm_subprocess relies on pickling. This patch offers an easy yet somewhat dirty solution to the problem. Signed-off-by: Michael Goldish mgold...@redhat.com --- client/tests/kvm/kvm_utils.py | 37 + 1 files changed, 37 insertions(+), 0 deletions(-) diff --git a/client/tests/kvm/kvm_utils.py b/client/tests/kvm/kvm_utils.py index 040124b..367e1e5 100644 --- a/client/tests/kvm/kvm_utils.py +++ b/client/tests/kvm/kvm_utils.py @@ -733,6 +733,43 @@ def find_free_ports(start_port, end_port, count): return ports +# An easy way to log lines to files when the logging system can't be used + +_open_log_files = {} +_log_file_dir = /tmp + + +def log_line(filename, line): + +Write a line to a file. '\n' is appended to the line. + +@param filename: Path of file to write to, either absolute or relative to +the dir set by set_log_file_dir(). +@param line: Line to write. + +global _open_log_files, _log_file_dir +if filename not in _open_log_files: +path = get_path(_log_file_dir, filename) +try: +os.makedirs(os.path.dirname(path)) +except OSError: +pass +_open_log_files[filename] = open(path, w) +timestr = time.strftime(%Y-%m-%d %H:%M:%S) +_open_log_files[filename].write(%s: %s\n % (timestr, line)) +_open_log_files[filename].flush() + + +def set_log_file_dir(dir): + +Set the base directory for log files created by log_line(). + +@param dir: Directory for log files. + +global _log_file_dir +_log_file_dir = dir + + # The following are miscellaneous utility functions. def get_path(base_path, user_path): -- 1.7.0.1 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 02/13] KVM test: add the auto_close option to all kvm_subprocess classes
From: Michael Goldish mgold...@redhat.com Allow all kvm_subprocess classes (kvm_spawn, kvm_tail, kvm_expect, kvm_shell_session) to close automatically if auto_close is given and True. By default auto_close if False for all classes except kvm_shell_session, for which it is True by default. Signed-off-by: Michael Goldish mgold...@redhat.com --- client/tests/kvm/kvm_subprocess.py | 50 +--- 1 files changed, 29 insertions(+), 21 deletions(-) diff --git a/client/tests/kvm/kvm_subprocess.py b/client/tests/kvm/kvm_subprocess.py index 2d70146..73edc5d 100755 --- a/client/tests/kvm/kvm_subprocess.py +++ b/client/tests/kvm/kvm_subprocess.py @@ -285,7 +285,8 @@ class kvm_spawn: resumes _tail() if needed. -def __init__(self, command=None, id=None, echo=False, linesep=\n): +def __init__(self, command=None, id=None, auto_close=False, echo=False, + linesep=\n): Initialize the class and run command as a child process. @@ -293,6 +294,8 @@ class kvm_spawn: server. @param id: ID of an already running server, if accessing a running server, or None if starting a new one. +@param auto_close: If True, close() the instance automatically when its +reference count drops to zero (default False). @param echo: Boolean indicating whether echo should be initially enabled for the pseudo terminal running the subprocess. This parameter has an effect only when starting a new server. @@ -316,6 +319,7 @@ class kvm_spawn: self.id) # Remember some attributes +self.auto_close = auto_close self.echo = echo self.linesep = linesep @@ -378,7 +382,12 @@ class kvm_spawn: def __getinitargs__(self): # Save some information when pickling -- will be passed to the # constructor upon unpickling -return (None, self.id, self.echo, self.linesep) +return (None, self.id, self.auto_close, self.echo, self.linesep) + + +def __del__(self): +if self.auto_close: +self.close() def _add_reader(self, reader): @@ -554,10 +563,9 @@ class kvm_tail(kvm_spawn): When this class is unpickled, it automatically resumes reporting output. -def __init__(self, command=None, id=None, echo=False, linesep=\n, - termination_func=None, termination_params=(), - output_func=None, output_params=(), - output_prefix=): +def __init__(self, command=None, id=None, auto_close=False, echo=False, + linesep=\n, termination_func=None, termination_params=(), + output_func=None, output_params=(), output_prefix=): Initialize the class and run command as a child process. @@ -565,6 +573,8 @@ class kvm_tail(kvm_spawn): server. @param id: ID of an already running server, if accessing a running server, or None if starting a new one. +@param auto_close: If True, close() the instance automatically when its +reference count drops to zero (default False). @param echo: Boolean indicating whether echo should be initially enabled for the pseudo terminal running the subprocess. This parameter has an effect only when starting a new server. @@ -587,7 +597,7 @@ class kvm_tail(kvm_spawn): self._add_close_hook(kvm_tail._join_thread) # Init the superclass -kvm_spawn.__init__(self, command, id, echo, linesep) +kvm_spawn.__init__(self, command, id, auto_close, echo, linesep) # Remember some attributes self.termination_func = termination_func @@ -751,10 +761,9 @@ class kvm_expect(kvm_tail): It also provides all of kvm_tail's functionality. -def __init__(self, command=None, id=None, echo=False, linesep=\n, - termination_func=None, termination_params=(), - output_func=None, output_params=(), - output_prefix=): +def __init__(self, command=None, id=None, auto_close=False, echo=False, + linesep=\n, termination_func=None, termination_params=(), + output_func=None, output_params=(), output_prefix=): Initialize the class and run command as a child process. @@ -762,6 +771,8 @@ class kvm_expect(kvm_tail): server. @param id: ID of an already running server, if accessing a running server, or None if starting a new one. +@param auto_close: If True, close() the instance automatically when its +reference count drops to zero (default False). @param echo: Boolean indicating whether echo should be initially enabled for the pseudo terminal running the subprocess. This
[PATCH 03/13] KVM test: restructure remote_login() and remote_scp()
- Add _remote_login() and _remote_scp() which, instead of taking a command line, take an existing session and operate on it. This is useful for logging into existing always-open sessions, such as serial console sessions. - Merge ssh/telnet/netcat into remote_login(). Signed-off-by: Michael Goldish mgold...@redhat.com --- client/tests/kvm/kvm_utils.py | 209 +++-- client/tests/kvm/kvm_vm.py| 11 +-- 2 files changed, 100 insertions(+), 120 deletions(-) diff --git a/client/tests/kvm/kvm_utils.py b/client/tests/kvm/kvm_utils.py index 367e1e5..d4edbaa 100644 --- a/client/tests/kvm/kvm_utils.py +++ b/client/tests/kvm/kvm_utils.py @@ -451,143 +451,183 @@ def check_kvm_source_dir(source_dir): # The following are functions used for SSH, SCP and Telnet communication with # guests. -def remote_login(command, password, prompt, linesep=\n, timeout=10): +def _remote_login(session, password, prompt, timeout=10): -Log into a remote host (guest) using SSH or Telnet. Run the given command -using kvm_spawn and provide answers to the questions asked. If timeout -expires while waiting for output from the child (e.g. a password prompt -or a shell prompt) -- fail. +Log into a remote host (guest) using SSH or Telnet. Wait for questions +and provide answers. If timeout expires while waiting for output from the +child (e.g. a password prompt or a shell prompt) -- fail. @brief: Log into a remote host (guest) using SSH or Telnet. -@param command: The command to execute (e.g. ssh r...@localhost) +@param session: A kvm_expect or kvm_shell_session instance to operate on @param password: The password to send in reply to a password prompt @param prompt: The shell prompt that indicates a successful login -@param linesep: The line separator to send instead of \\n -(sometimes \\r\\n is required) @param timeout: The maximal time duration (in seconds) to wait for each step of the login procedure (i.e. the Are you sure prompt, the password prompt, the shell prompt, etc) -@return Return the kvm_spawn object on success and None on failure. +@return: True on success and False otherwise. -sub = kvm_subprocess.kvm_shell_session(command, - linesep=linesep, - prompt=prompt) - password_prompt_count = 0 -logging.debug(Trying to login with command '%s' % command) - while True: -(match, text) = sub.read_until_last_line_matches( +(match, text) = session.read_until_last_line_matches( [r[Aa]re you sure, r[Pp]assword:\s*$, r^\s*[Ll]ogin:\s*$, r[Cc]onnection.*closed, r[Cc]onnection.*refused, r[Pp]lease wait, prompt], timeout=timeout, internal_timeout=0.5) if match == 0: # Are you sure you want to continue connecting logging.debug(Got 'Are you sure...'; sending 'yes') -sub.sendline(yes) +session.sendline(yes) continue elif match == 1: # password: if password_prompt_count == 0: logging.debug(Got password prompt; sending '%s' % password) -sub.sendline(password) +session.sendline(password) password_prompt_count += 1 continue else: logging.debug(Got password prompt again) -sub.close() -return None +return False elif match == 2: # login: logging.debug(Got unexpected login prompt) -sub.close() -return None +return False elif match == 3: # Connection closed logging.debug(Got 'Connection closed') -sub.close() -return None +return False elif match == 4: # Connection refused logging.debug(Got 'Connection refused') -sub.close() -return None +return False elif match == 5: # Please wait logging.debug(Got 'Please wait') timeout = 30 continue elif match == 6: # prompt logging.debug(Got shell prompt -- logged in) -return sub +return session else: # match == None logging.debug(Timeout elapsed or process terminated) -sub.close() -return None +return False -def remote_scp(command, password, transfer_timeout=600, login_timeout=10): +def _remote_scp(session, password, transfer_timeout=600, login_timeout=10): -Run the given command using kvm_spawn and provide answers to the questions -asked. If transfer_timeout expires while waiting for the transfer to -complete, fail. If login_timeout expires while waiting for output from
[PATCH 04/13] KVM test: send username in remote_login()
In order to let the serial console work, we must let the remote_login() send the username when met the username prompt. This patch fails the progress if if it met the username prompt twice. Signed-off-by: Jason Wang jasow...@redhat.com Signed-off-by: Michael Goldish mgold...@redhat.com --- client/tests/kvm/kvm_utils.py | 16 1 files changed, 12 insertions(+), 4 deletions(-) diff --git a/client/tests/kvm/kvm_utils.py b/client/tests/kvm/kvm_utils.py index d4edbaa..0c40b55 100644 --- a/client/tests/kvm/kvm_utils.py +++ b/client/tests/kvm/kvm_utils.py @@ -451,7 +451,7 @@ def check_kvm_source_dir(source_dir): # The following are functions used for SSH, SCP and Telnet communication with # guests. -def _remote_login(session, password, prompt, timeout=10): +def _remote_login(session, username, password, prompt, timeout=10): Log into a remote host (guest) using SSH or Telnet. Wait for questions and provide answers. If timeout expires while waiting for output from the @@ -460,6 +460,7 @@ def _remote_login(session, password, prompt, timeout=10): @brief: Log into a remote host (guest) using SSH or Telnet. @param session: A kvm_expect or kvm_shell_session instance to operate on +@param username: The username to send in reply to a login prompt @param password: The password to send in reply to a password prompt @param prompt: The shell prompt that indicates a successful login @param timeout: The maximal time duration (in seconds) to wait for each @@ -469,6 +470,7 @@ def _remote_login(session, password, prompt, timeout=10): @return: True on success and False otherwise. password_prompt_count = 0 +login_prompt_count = 0 while True: (match, text) = session.read_until_last_line_matches( @@ -490,8 +492,14 @@ def _remote_login(session, password, prompt, timeout=10): logging.debug(Got password prompt again) return False elif match == 2: # login: -logging.debug(Got unexpected login prompt) -return False +if login_prompt_count == 0: +logging.debug(Got username prompt; sending '%s' % username) +session.sendline(username) +login_prompt_count += 1 +continue +else: +logging.debug(Got username prompt again) +return False elif match == 3: # Connection closed logging.debug(Got 'Connection closed') return False @@ -596,7 +604,7 @@ def remote_login(client, host, port, username, password, prompt, linesep=\n, logging.debug(Trying to login with command '%s' % cmd) session = kvm_subprocess.kvm_shell_session(cmd, linesep=linesep, prompt=prompt) -if _remote_login(session, password, prompt, timeout): +if _remote_login(session, username, password, prompt, timeout): return session else: session.close() -- 1.7.0.1 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 05/13] KVM test: remote_login(): make the login re suitable for serial console
From: Michael Goldish mgold...@redhat.com Current matching re ^\s*[Ll]ogin:\s*$ is not suitable for the serial console, so change it to [Ll]ogin:\s*$. Signed-off-by: Jason Wang jasow...@redhat.com Signed-off-by: Michael Goldish mgold...@redhat.com --- client/tests/kvm/kvm_utils.py |2 +- 1 files changed, 1 insertions(+), 1 deletions(-) diff --git a/client/tests/kvm/kvm_utils.py b/client/tests/kvm/kvm_utils.py index 0c40b55..354450c 100644 --- a/client/tests/kvm/kvm_utils.py +++ b/client/tests/kvm/kvm_utils.py @@ -474,7 +474,7 @@ def _remote_login(session, username, password, prompt, timeout=10): while True: (match, text) = session.read_until_last_line_matches( -[r[Aa]re you sure, r[Pp]assword:\s*$, r^\s*[Ll]ogin:\s*$, +[r[Aa]re you sure, r[Pp]assword:\s*$, r[Ll]ogin:\s*$, r[Cc]onnection.*closed, r[Cc]onnection.*refused, r[Pp]lease wait, prompt], timeout=timeout, internal_timeout=0.5) -- 1.7.0.1 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 06/13] KVM test: kvm_vm.py: redirect the serial console to a unix socket
From: Michael Goldish mgold...@redhat.com Based on Jason Wang's patch. Signed-off-by: Jason Wang jasow...@redhat.com Signed-off-by: Michael Goldish mgold...@redhat.com --- client/tests/kvm/kvm_vm.py | 13 + 1 files changed, 13 insertions(+), 0 deletions(-) diff --git a/client/tests/kvm/kvm_vm.py b/client/tests/kvm/kvm_vm.py index 225f26a..1edecb9 100755 --- a/client/tests/kvm/kvm_vm.py +++ b/client/tests/kvm/kvm_vm.py @@ -206,6 +206,9 @@ class VM: def add_qmp_monitor(help, filename): return -qmp unix:'%s',server,nowait % filename +def add_serial(help, filename): +return -serial unix:'%s',server,nowait % filename + def add_mem(help, mem): return -m %s % mem @@ -314,6 +317,9 @@ class VM: else: qemu_cmd += add_human_monitor(help, monitor_filename) +# Add serial console redirection +qemu_cmd += add_serial(help, self.get_serial_console_filename()) + for image_name in kvm_utils.get_sub_dict_names(params, images): image_params = kvm_utils.get_sub_dict(params, image_name) if image_params.get(boot_drive) == no: @@ -774,6 +780,13 @@ class VM: kvm_utils.get_sub_dict_names(self.params, monitors)] +def get_serial_console_filename(self): + +Return the serial console filename. + +return /tmp/serial-%s % self.instance + + def get_testlog_filename(self): Return the testlog filename. -- 1.7.0.1 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 07/13] KVM test: kvm_vm.py: log serial console output and allow serial login
From: Michael Goldish mgold...@redhat.com Log serial console output to files in the debug dir. Also, add VM.serial_login() which attempts to log in via the serial console. Note: - Tests must NOT close() a serial console session, because it needs to remain open for the following tests to use. - Instead, tests must use session.sendline(exit) for serial console sessions: session = vm.serial_login() try: ... finally: session.sendline(exit) - Only one serial console session per VM is available at a time. Calling serial_login() twice for the same VM, without an exit between the calls, will probably fail and/or do bad stuff. Signed-off-by: Michael Goldish mgold...@redhat.com --- client/tests/kvm/kvm_vm.py | 41 + 1 files changed, 41 insertions(+), 0 deletions(-) diff --git a/client/tests/kvm/kvm_vm.py b/client/tests/kvm/kvm_vm.py index 1edecb9..71df892 100755 --- a/client/tests/kvm/kvm_vm.py +++ b/client/tests/kvm/kvm_vm.py @@ -106,6 +106,7 @@ class VM: @param address_cache: A dict that maps MAC addresses to IP addresses self.process = None +self.serial_console = None self.redirs = {} self.vnc_port = 5900 self.uuid = None @@ -634,6 +635,15 @@ class VM: return False logging.debug(VM appears to be alive with PID %s, self.get_pid()) + +# Establish a session with the serial console -- requires a version +# of netcat that supports -U +self.serial_console = kvm_subprocess.kvm_shell_session( +nc -U %s % self.get_serial_console_filename(), +auto_close=False, +output_func=kvm_utils.log_line, +output_params=(serial-%s.log % name,)) + return True finally: @@ -707,6 +717,8 @@ class VM: self.pci_assignable.release_devs() if self.process: self.process.close() +if self.serial_console: +self.serial_console.close() for f in ([self.get_testlog_filename()] + self.get_monitor_filenames()): try: @@ -975,6 +987,35 @@ class VM: remote_path, local_path, timeout) +def serial_login(self, timeout=10): + +Log into the guest via the serial console. +If timeout expires while waiting for output from the guest (e.g. a +password prompt or a shell prompt) -- fail. + +@param timeout: Time (seconds) before giving up logging into the guest. +@return: kvm_spawn object on success and None on failure. + +username = self.params.get(username, ) +password = self.params.get(password, ) +prompt = self.params.get(shell_prompt, [\#\$]) +linesep = eval('%s' % self.params.get(shell_linesep, r\n)) +status_test_command = self.params.get(status_test_command, ) + +if self.serial_console: +self.serial_console.set_linesep(linesep) +self.serial_console.set_status_test_command(status_test_command) +else: +return None + +# Make sure we get a login prompt +self.serial_console.sendline() + +if kvm_utils._remote_login(self.serial_console, username, password, + prompt, timeout): +return self.serial_console + + def send_key(self, keystr): Send a key event to the VM. -- 1.7.0.1 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 08/13] KVM test: kvm.py: set log file dir for kvm_subprocess logging
From: Michael Goldish mgold...@redhat.com Signed-off-by: Michael Goldish mgold...@redhat.com --- client/tests/kvm/kvm.py |4 1 files changed, 4 insertions(+), 0 deletions(-) diff --git a/client/tests/kvm/kvm.py b/client/tests/kvm/kvm.py index bab1e6f..0799cff 100644 --- a/client/tests/kvm/kvm.py +++ b/client/tests/kvm/kvm.py @@ -31,6 +31,10 @@ class kvm(test.test): logging.debug(%s = %s, key, params[key]) self.write_test_keyval({key: params[key]}) +# Set the log file dir for the logging mechanism used by kvm_subprocess +# (this must be done before unpickling env) +kvm_utils.set_log_file_dir(self.debugdir) + # Open the environment file logging.info(Unpickling env. You may see some harmless error messages.) -- 1.7.0.1 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 09/13] KVM test: kvm_subprocess.py: don't sanitize text before passing it to callbacks
From: Michael Goldish mgold...@redhat.com Converting the text to utf-8 seems to cause trouble when converting back (e.g. when writing to files). The logging system seems to be fine with unsanitized text, so let's not sanitize it. Signed-off-by: Michael Goldish mgold...@redhat.com --- client/tests/kvm/kvm_subprocess.py |6 ++ 1 files changed, 2 insertions(+), 4 deletions(-) diff --git a/client/tests/kvm/kvm_subprocess.py b/client/tests/kvm/kvm_subprocess.py index 73edc5d..93a8429 100755 --- a/client/tests/kvm/kvm_subprocess.py +++ b/client/tests/kvm/kvm_subprocess.py @@ -688,9 +688,7 @@ class kvm_tail(kvm_spawn): def print_line(text): # Pre-pend prefix and remove trailing whitespace text = self.output_prefix + text.rstrip() -# Sanitize text -text = text.decode(utf-8, replace) -# Pass it to output_func +# Pass text to output_func try: params = self.output_params + (text,) self.output_func(*params) @@ -888,7 +886,7 @@ class kvm_expect(kvm_tail): if str.endswith(\n): str = str[:-1] for line in str.split(\n): -print_func(line.decode(utf-8, replace)) +print_func(line) data += newdata done = False -- 1.7.0.1 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 10/13] KVM test: log output of all shell sessions and SCP transfers
From: Michael Goldish mgold...@redhat.com Use kvm_utils.log_line() to log the output of all shell sessions and SCP transfers. Signed-off-by: Michael Goldish mgold...@redhat.com --- client/tests/kvm/kvm_utils.py | 33 ++--- client/tests/kvm/kvm_vm.py| 15 --- 2 files changed, 38 insertions(+), 10 deletions(-) diff --git a/client/tests/kvm/kvm_utils.py b/client/tests/kvm/kvm_utils.py index 354450c..12508b6 100644 --- a/client/tests/kvm/kvm_utils.py +++ b/client/tests/kvm/kvm_utils.py @@ -572,7 +572,7 @@ def _remote_scp(session, password, transfer_timeout=600, login_timeout=10): def remote_login(client, host, port, username, password, prompt, linesep=\n, - timeout=10): + log_filename=None, timeout=10): Log into a remote host (guest) using SSH/Telnet/Netcat. @@ -584,6 +584,7 @@ def remote_login(client, host, port, username, password, prompt, linesep=\n, @param prompt: Shell prompt (regular expression) @param linesep: The line separator to use when sending lines (e.g. '\\n' or '\\r\\n') +@param log_filename: If specified, log all output to this file @param timeout: The maximal time duration (in seconds) to wait for each step of the login procedure (i.e. the Are you sure prompt or the password prompt) @@ -601,16 +602,21 @@ def remote_login(client, host, port, username, password, prompt, linesep=\n, else: logging.error(Unknown remote shell client: %s % client) return + logging.debug(Trying to login with command '%s' % cmd) session = kvm_subprocess.kvm_shell_session(cmd, linesep=linesep, prompt=prompt) if _remote_login(session, username, password, prompt, timeout): +if log_filename: +session.set_output_func(log_line) +session.set_output_params((log_filename,)) return session else: session.close() -def remote_scp(command, password, transfer_timeout=600, login_timeout=10): +def remote_scp(command, password, log_filename=None, transfer_timeout=600, + login_timeout=10): Transfer file(s) to a remote host (guest) using SCP. @@ -619,6 +625,7 @@ def remote_scp(command, password, transfer_timeout=600, login_timeout=10): @param command: The command to execute (e.g. scp -r foobar r...@localhost:/tmp/). @param password: The password to send in reply to a password prompt. +@param log_filename: If specified, log all output to this file @param transfer_timeout: The time duration (in seconds) to wait for the transfer to complete. @param login_timeout: The maximal time duration (in seconds) to wait for @@ -629,7 +636,17 @@ def remote_scp(command, password, transfer_timeout=600, login_timeout=10): logging.debug(Trying to SCP with command '%s', timeout %ss, command, transfer_timeout) -session = kvm_subprocess.kvm_expect(command) + +if log_filename: +output_func = log_line +output_params = (log_filename,) +else: +output_func = None +output_params = () + +session = kvm_subprocess.kvm_expect(command, +output_func=output_func, +output_params=output_params) try: return _remote_scp(session, password, transfer_timeout, login_timeout) finally: @@ -637,7 +654,7 @@ def remote_scp(command, password, transfer_timeout=600, login_timeout=10): def scp_to_remote(host, port, username, password, local_path, remote_path, - timeout=600): + log_filename=None, timeout=600): Copy files to a remote host (guest). @@ -646,6 +663,7 @@ def scp_to_remote(host, port, username, password, local_path, remote_path, @param password: Password (if required) @param local_path: Path on the local machine where we are copying from @param remote_path: Path on the remote machine where we are copying to +@param log_filename: If specified, log all output to this file @param timeout: The time duration (in seconds) to wait for the transfer to complete. @@ -654,11 +672,11 @@ def scp_to_remote(host, port, username, password, local_path, remote_path, command = (scp -v -o UserKnownHostsFile=/dev/null -o PreferredAuthentications=password -r -P %s %s %...@%s:%s % (port, local_path, username, host, remote_path)) -return remote_scp(command, password, timeout) +return remote_scp(command, password, log_filename, timeout) def scp_from_remote(host, port, username, password, remote_path, local_path, -timeout=600): +log_filename=None, timeout=600): Copy files from a remote host (guest). @@ -667,6 +685,7 @@ def scp_from_remote(host, port, username,
[PATCH 11/13] KVM test: Enable the serial console during unattended installation
This patch enable the serial console during unattended installation for all linux guests. Signed-off-by: Jason Wang jasow...@redhat.com --- client/tests/kvm/tests_base.cfg.sample |8 1 files changed, 4 insertions(+), 4 deletions(-) diff --git a/client/tests/kvm/tests_base.cfg.sample b/client/tests/kvm/tests_base.cfg.sample index ce88235..ec871df 100644 --- a/client/tests/kvm/tests_base.cfg.sample +++ b/client/tests/kvm/tests_base.cfg.sample @@ -527,7 +527,7 @@ variants: pxe_initrd = initrd.img tftp = images/tftpboot extra_params += -bootp /pxelinux.0 -boot cn -kernel_args = ks=floppy nicdelay=60 +kernel_args = ks=floppy nicdelay=60 console=ttyS0,115200 console=tty0 variants: - 8.32: @@ -688,7 +688,7 @@ variants: pxe_initrd = initrd tftp = images/tftpboot extra_params += -bootp /pxelinux.0 -boot cn -kernel_args = autoyast=floppy +kernel_args = autoyast=floppy console=ttyS0,115200 console=tty0 post_install_delay = 10 variants: @@ -770,7 +770,7 @@ variants: pxe_image = linux pxe_initrd = initrd extra_params += -bootp /pxelinux.0 -boot cn -kernel_args = autoyast=floppy +kernel_args = autoyast=floppy console=ttyS0,115200 console=tty0 post_install_delay = 10 variants: @@ -862,7 +862,7 @@ variants: pxe_initrd = initrd.img tftp = images/tftpboot extra_params += -bootp /pxelinux.0 -boot cn -kernel_args = ks=floppy nicdelay=60 +kernel_args = ks=floppy nicdelay=60 console=ttyS0,115200 console=tty0 variants: - 3.9.i386: -- 1.7.0.1 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 12/13] KVM test: Enable the serial console for all linux guests
From: Jason Wang jasow...@redhat.com As we have the ability to dump the content from serial console or use a session through it, we need to redirect the console to serial through unattended files to make use of it. The patch also keep the tty0 accroding to the suggestion of Michael Goldish. Signed-off-by: Jason Wang jasow...@redhat.com --- client/tests/kvm/unattended/Fedora-10.ks |2 +- client/tests/kvm/unattended/Fedora-11.ks |2 +- client/tests/kvm/unattended/Fedora-12.ks |2 +- client/tests/kvm/unattended/Fedora-13.ks |2 +- client/tests/kvm/unattended/Fedora-8.ks |2 +- client/tests/kvm/unattended/Fedora-9.ks |2 +- client/tests/kvm/unattended/OpenSUSE-11.xml |1 + client/tests/kvm/unattended/RHEL-3-series.ks |2 +- client/tests/kvm/unattended/RHEL-4-series.ks |2 +- client/tests/kvm/unattended/RHEL-5-series.ks |2 +- client/tests/kvm/unattended/SLES-11.xml |1 + 11 files changed, 11 insertions(+), 9 deletions(-) diff --git a/client/tests/kvm/unattended/Fedora-10.ks b/client/tests/kvm/unattended/Fedora-10.ks index 43c236a..03163c3 100644 --- a/client/tests/kvm/unattended/Fedora-10.ks +++ b/client/tests/kvm/unattended/Fedora-10.ks @@ -11,7 +11,7 @@ firewall --enabled --ssh selinux --enforcing timezone --utc America/New_York firstboot --disable -bootloader --location=mbr +bootloader --location=mbr --append=console=ttyS0,115200 console=tty0 zerombr clearpart --all --initlabel autopart diff --git a/client/tests/kvm/unattended/Fedora-11.ks b/client/tests/kvm/unattended/Fedora-11.ks index bef3af7..443e2c3 100644 --- a/client/tests/kvm/unattended/Fedora-11.ks +++ b/client/tests/kvm/unattended/Fedora-11.ks @@ -10,7 +10,7 @@ firewall --enabled --ssh selinux --enforcing timezone --utc America/New_York firstboot --disable -bootloader --location=mbr +bootloader --location=mbr --append=console=ttyS0,115200 console=tty0 zerombr clearpart --all --initlabel diff --git a/client/tests/kvm/unattended/Fedora-12.ks b/client/tests/kvm/unattended/Fedora-12.ks index bef3af7..443e2c3 100644 --- a/client/tests/kvm/unattended/Fedora-12.ks +++ b/client/tests/kvm/unattended/Fedora-12.ks @@ -10,7 +10,7 @@ firewall --enabled --ssh selinux --enforcing timezone --utc America/New_York firstboot --disable -bootloader --location=mbr +bootloader --location=mbr --append=console=ttyS0,115200 console=tty0 zerombr clearpart --all --initlabel diff --git a/client/tests/kvm/unattended/Fedora-13.ks b/client/tests/kvm/unattended/Fedora-13.ks index 0be7d06..ef978e8 100644 --- a/client/tests/kvm/unattended/Fedora-13.ks +++ b/client/tests/kvm/unattended/Fedora-13.ks @@ -10,7 +10,7 @@ firewall --enabled --ssh selinux --enforcing timezone --utc America/New_York firstboot --disable -bootloader --location=mbr +bootloader --location=mbr --append=console=ttyS0,115200 console=tty0 zerombr clearpart --all --initlabel diff --git a/client/tests/kvm/unattended/Fedora-8.ks b/client/tests/kvm/unattended/Fedora-8.ks index cde85dd..3e9d387 100644 --- a/client/tests/kvm/unattended/Fedora-8.ks +++ b/client/tests/kvm/unattended/Fedora-8.ks @@ -11,7 +11,7 @@ firewall --enabled --ssh selinux --enforcing timezone --utc America/New_York firstboot --disable -bootloader --location=mbr +bootloader --location=mbr --append=console=ttyS0,115200 console=tty0 zerombr clearpart --all --initlabel autopart diff --git a/client/tests/kvm/unattended/Fedora-9.ks b/client/tests/kvm/unattended/Fedora-9.ks index cde85dd..3e9d387 100644 --- a/client/tests/kvm/unattended/Fedora-9.ks +++ b/client/tests/kvm/unattended/Fedora-9.ks @@ -11,7 +11,7 @@ firewall --enabled --ssh selinux --enforcing timezone --utc America/New_York firstboot --disable -bootloader --location=mbr +bootloader --location=mbr --append=console=ttyS0,115200 console=tty0 zerombr clearpart --all --initlabel autopart diff --git a/client/tests/kvm/unattended/OpenSUSE-11.xml b/client/tests/kvm/unattended/OpenSUSE-11.xml index 7dd44fa..64140bf 100644 --- a/client/tests/kvm/unattended/OpenSUSE-11.xml +++ b/client/tests/kvm/unattended/OpenSUSE-11.xml @@ -50,6 +50,7 @@ moduleedd/module /initrd_module /initrd_modules +appendconsole=ttyS0,115200 console=tty0/append loader_typegrub/loader_type sections config:type=list/ /bootloader diff --git a/client/tests/kvm/unattended/RHEL-3-series.ks b/client/tests/kvm/unattended/RHEL-3-series.ks index 5321118..413890a 100644 --- a/client/tests/kvm/unattended/RHEL-3-series.ks +++ b/client/tests/kvm/unattended/RHEL-3-series.ks @@ -10,7 +10,7 @@ rootpw 123456 firewall --enabled --ssh timezone America/New_York firstboot --disable -bootloader --location=mbr +bootloader --location=mbr --append=console=ttyS0,115200 console=tty0 clearpart --all --initlabel autopart reboot diff --git a/client/tests/kvm/unattended/RHEL-4-series.ks b/client/tests/kvm/unattended/RHEL-4-series.ks index 159998b..213914d 100644 ---
[PATCH 13/13] KVM test: Fix a small bug on timedrift test
On timedrift.py, a prefix is added to indicate the kvm subprocess instances of the guest load processes. With the fact the ssh sessions are being logged, that implies in changing the the kvm subprocess callback function. When this happens, the wrong parameters will be passed to the logging system causing an exception while formatting the message. Let's fix that. Signed-off-by: Lucas Meneghel Rodrigues l...@redhat.com Signed-off-by: Michael Goldish mgold...@redhat.com --- client/tests/kvm/tests/timedrift.py |5 + 1 files changed, 5 insertions(+), 0 deletions(-) diff --git a/client/tests/kvm/tests/timedrift.py b/client/tests/kvm/tests/timedrift.py index 028b74c..a6d3076 100644 --- a/client/tests/kvm/tests/timedrift.py +++ b/client/tests/kvm/tests/timedrift.py @@ -90,6 +90,11 @@ def run_timedrift(test, params, env): load_session = vm.remote_login() if not load_session: raise error.TestFail(Could not log into guest) +# Set output func to None to stop it from being called so we +# can change the callback function and the parameters it takes +# with no problems +load_session.set_output_func(None) +load_session.set_output_params(()) load_session.set_output_prefix((guest load %d) % i) load_session.set_output_func(logging.debug) guest_load_sessions.append(load_session) -- 1.7.0.1 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH V2 2/5] ara virt interface of perf to support kvm guest os statistics collection in guest os
On Mon, 2010-06-21 at 15:00 +0300, Avi Kivity wrote: On 06/21/2010 12:31 PM, Zhang, Yanmin wrote: The 2nd patch is to change the definition of perf_event to facilitate perf attr copy when a hypercall happens. Signed-off-by: Zhang Yanminyanmin_zh...@linux.intel.com --- --- linux-2.6_tip0620/include/linux/perf_event.h2010-06-21 15:19:52.821999849 +0800 +++ linux-2.6_tip0620perfkvm/include/linux/perf_event.h 2010-06-21 16:53:49.283999849 +0800 @@ -188,7 +188,10 @@ struct perf_event_attr { __u64 sample_type; __u64 read_format; Assuming these flags are available to the guest? These flags are used by generic perf codes. To match with host kernel, we wish guest os also use the flags. - __u64 disabled : 1, /* off by default*/ + union { + __u64 flags; + struct { + __u64 disabled : 1, /* off by default*/ inherit: 1, /* children inherit it */ inherit is meaningless for a guest. Right. host kernel will reset it to 0 before create perf_event for guest os. Here we couldn't delete the flag as it's used by perf generic codes. I need separate the patch a little better. All definitions in include/linux/perf_event.h are mostly perf generic code related. I'm very careful to change it. pinned : 1, /* must always be on PMU */ We cannot allow a guest to pin a counter. Ok. I will reset it in function kvm_pv_perf_op_open. The other flags are also problematic. I'd like to see virt-specific flags (probably we'll only need kernel/user and nested_hv for nested virtualization). How about to add more comments around struct guest_perf_attr-flags that guest os developers should take a look at include/linux/perf_event.h? BTW, I will reset more flags to 0 in kvm_pv_perf_op_open. Something that is worrying is that we don't expose group information. perf will multiplex the events for us, but there will be a loss in accuracy. #ifdef CONFIG_HAVE_HW_BREAKPOINT #includeasm/hw_breakpoint.h #endif @@ -753,6 +752,20 @@ struct perf_event { perf_overflow_handler_t overflow_handler; + /* +* pointers used by kvm perf paravirt interface. +* +* 1) Used in host kernel and points to host_perf_shadow which +* has information about guest perf_event +*/ + void*host_perf_shadow; Can we have real types instead of void pointers? I just want perf generic codes have less dependency on KVM codes. + /* +* 2) Used in guest kernel and points to guest_perf_shadow which +* is used as a communication area with host kernel. Host kernel +* copies overflow data to it when an event overflows. +*/ + void*guest_perf_shadow; It's strange to see both guest and host parts in the same patch. Splitting to separate patches will really help review. It's a little hard to split the patches if they change the same file. Perhaps I could add more statements before the patch when I send it out. @@ -1626,9 +1629,22 @@ void perf_event_task_tick(struct task_st if (ctx ctx-nr_events ctx-nr_events != ctx-nr_active) rotate = 1; - perf_ctx_adjust_freq(cpuctx-ctx); - if (ctx) - perf_ctx_adjust_freq(ctx); +#ifdef CONFIG_KVM_PERF + if (kvm_para_available()) { + /* +* perf_ctx_adjust_freq causes lots of pmu-read which would +* trigger too many vmexit to host kernel. We disable it +* under para virt situation +*/ + adjust_freq = 0; + } +#endif Perhaps we can have a batch read interface which will read many counters at once. It's a good idea. But that will touch many perf generic codes which causes it's hard to maintain or follow future changes. This would reduce the number of exits. Also adjust the frequency less frequently. Here it depends on process scheduler frequency, CONFIG_HZ. + + if (adjust_freq) { + perf_ctx_adjust_freq(cpuctx-ctx); + if (ctx) + perf_ctx_adjust_freq(ctx); + } -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] KVM Test: Fix invalid literal bug in ioquit
- Michael Goldish mgold...@redhat.com wrote: From: Michael Goldish mgold...@redhat.com To: Feng Yang fy...@redhat.com Cc: autot...@test.kernel.org, kvm@vger.kernel.org Sent: Monday, June 21, 2010 7:19:58 PM GMT +08:00 Beijing / Chongqing / Hong Kong / Urumqi Subject: Re: [PATCH] KVM Test: Fix invalid literal bug in ioquit On 06/21/2010 01:07 PM, Feng Yang wrote: Sometime check_cmd could not finish in setting time. Then o=, so int(o) will cause ValueError: invalid literal for int() with base 10: '' So change to check return status. Signed-off-by: Feng Yang fy...@redhat.com --- client/tests/kvm/tests/ioquit.py |6 +++--- client/tests/kvm/tests_base.cfg.sample |2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/client/tests/kvm/tests/ioquit.py b/client/tests/kvm/tests/ioquit.py index 389a867..8126139 100644 --- a/client/tests/kvm/tests/ioquit.py +++ b/client/tests/kvm/tests/ioquit.py @@ -23,13 +23,13 @@ def run_ioquit(test, params, env): (s, o) = session.get_command_status_output(bg_cmd, timeout=60) check_cmd = params.get(check_cmd) (s, o) = session2.get_command_status_output(check_cmd, timeout=60) -if int(o) = 0: +if s: raise error.TestError(Fail to add IO workload for Guest OS) Please use 'if s != 0' because in case of a timeout s is None. Hi Michael, thanks for your comments! But here, I think 'if s:' is better. get_command_status_output(check_cmd, timeout=60) timeout should caused by workload is bigger in the guest. This just what we want. Should not throw an error here. Only 'if s:' match, we can say 'Fail to add IO workload for Guest OS'. Thanks! logging.info(Sleep for a while) time.sleep(random.randrange(30,100)) -(s, o) = session2.get_command_status_output(check_cmd, timeout=300) -if int(o) = 0: +(s, o) = session2.get_command_status_output(check_cmd, timeout=60) +if s: Same here. logging.info(IO workload finished before the VM was killed) logging.info(Kill the virtual machine) vm.process.close() diff --git a/client/tests/kvm/tests_base.cfg.sample b/client/tests/kvm/tests_base.cfg.sample index ce88235..0fd5543 100644 --- a/client/tests/kvm/tests_base.cfg.sample +++ b/client/tests/kvm/tests_base.cfg.sample @@ -411,7 +411,7 @@ variants: - ioquit: type = ioquit background_cmd = for i in 1 2 3 4; do (nohup dd if=/dev/urandom of=/tmp/file bs=102400 count=1000 ) done -check_cmd = ps -a |grep dd |wc -l +check_cmd = ps -a |grep dd login_timeout = 360 - qemu_img: -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH V2 3/5] ara virt interface of perf to support kvm guest os statistics collection in guest os
On Mon, 2010-06-21 at 15:33 +0300, Avi Kivity wrote: On 06/21/2010 12:31 PM, Zhang, Yanmin wrote: The 3rd patch is to implement para virt perf at host kernel. @@ -64,6 +73,85 @@ struct kvm_mmu_op_release_pt { #ifdef __KERNEL__ #includeasm/processor.h +/* + * In host kernel, perf_event-host_perf_shadow points to + * host_perf_shadow which records some information + * about the guest. + */ +struct host_perf_shadow { + /* guest perf_event id passed from guest os */ + int id; + /* +* Host kernel saves data into data member counter firstly. +* kvm will get data from this counter and calls kvm functions +* to copy or add data back to guets os before entering guest os +* next time +*/ + struct guest_perf_event counter; + /* guest_event_addr is gpa_t pointing to guest os guest_perf_event*/ + __u64 guest_event_addr; So just use gpa_t as the type. host_perf_shadow-guest_event_addr is a copy of guest_event_addr-guest_event_addr. As the latter's type is __u64 as the interface between guest os and host os, I use __u64 as the type of host_perf_shadow-guest_event_addr. + + /* +* Link to of kvm.kvm_arch.shadow_hash_table +*/ + struct list_head shadow_entry; + struct kvm_vcpu *vcpu; + + struct perf_event *host_event; + /* +* Below counter is to prevent malicious guest os to try to +* close/enable event at the same time. +*/ + atomic_t ref_counter; If events are made per-vcpu (like real hardware), races become impossible. This design is to deal with a task context perf collection in guest os. Scenario 1: 1) guest os starts to collect statistics of process A on vcpu 0; 2) process A is scheduled to vcpu 1. Then, the perf_event at host side need to be moved to VCPU 1 's thread. With the per KVM instance design, we needn't move host_perf_shadow among vcpus. Scenario 2: 1) guest os creates a perf_event at host side on vcpu 0; 2) malicious guest os calls close to delete the host perf_event on vcpu 1, but enables the perf_event on vcpu0 at the same time. When close thread runs to get the host_perf_shadow from the list, enable thread also gets it. Then, close thread deletes the perf_event, and enable thread will cause host kernel panic when using host_perf_shadow. +}; Please move this structure to include/linux/kvm_host.h. No need to spam kvm_para.h. Note it's not x86 specific (though you can leave arch enabling to arch maintainers). Ok. Originally, I wanted to do so, but I'm afraid other arch might be not happy. + +/* + * In guest kernel, perf_event-guest_shadow points to + * guest_perf_shadow which records some information + * about the guest. + */ +struct guest_perf_shadow { + /* guest perf_event id passed from guest os */ + int id; + /* +* Host kernel kvm saves data into data member counter +*/ + struct guest_perf_event counter; +}; Don't ordinary perf structures already have a counter ID which we can reuse? No. In the other hand, if we assume generic perf has, we couldn't use it, because generic perf id (actually there is no) is host kernel system-wide while here guest_perf_shadow-id is per kvm instance wide. + +/* + * guest_perf_attr is used when guest calls hypercall to + * open a new perf_event at host side. Mostly, it's a copy of + * perf_event_attr and deletes something not used by host kernel. + */ +struct guest_perf_attr { + __u32 type; + __u64 config; + __u64 sample_period; + __u64 sample_type; + __u64 read_format; + __u64 flags; + __u32 bp_type; + __u64 bp_addr; + __u64 bp_len; +}; This is really not a guest or host structure, it's part of the interface. So please rename it (and similar) kvm_pv_perf_*. Good idea. @@ -24,6 +24,7 @@ #includeasm/desc.h #includeasm/mtrr.h #includeasm/msr-index.h +#includeasm/perf_event.h #define KVM_MAX_VCPUS 64 #define KVM_MEMORY_SLOTS 32 @@ -360,6 +361,18 @@ struct kvm_vcpu_arch { /* fields used by HYPER-V emulation */ u64 hv_vapic; + + /* +* Fields used by PARAVIRT perf interface: +* +* kvm checks overflow_events before entering guest os, +* and copy data back to guest os. +* event_mutex is to avoid a race between NMI perf event overflow +* handler, event close, and enable/disable. +*/ + struct mutex event_mutex; No race can exist. The host NMI handler cannot take any mutex We use a mutex_trylock in NMI hanlder. If it can't get the lock, there is a NMI miss happening, but host kernel still updates perf_event-host_perf_shadow.counter, so the overflow data will be accumulated. so it must be immune to