[COMMIT master] KVM: Expand on help info to specify kvm intel and amd module names
From: Robert P. J. Day rpj...@crashcourse.ca Signed-off-by: Robert P. J. Day rpj...@crashcourse.ca Cc: Avi Kivity a...@redhat.com Signed-off-by: Andrew Morton a...@linux-foundation.org Signed-off-by: Avi Kivity a...@redhat.com diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig index a58504e..8600a09 100644 --- a/arch/x86/kvm/Kconfig +++ b/arch/x86/kvm/Kconfig @@ -50,6 +50,9 @@ config KVM_INTEL Provides support for KVM on Intel processors equipped with the VT extensions. + To compile this as a module, choose M here: the module + will be called kvm-intel. + config KVM_AMD tristate KVM for AMD processors support depends on KVM @@ -57,6 +60,9 @@ config KVM_AMD Provides support for KVM on AMD processors equipped with the AMD-V (SVM) extensions. + To compile this as a module, choose M here: the module + will be called kvm-amd. + config KVM_TRACE bool KVM trace support depends on KVM SYSFS -- To unsubscribe from this list: send the line unsubscribe kvm-commits in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[COMMIT master] KVM: x86: check for cr3 validity in mmu_alloc_roots
From: Marcelo Tosatti mtosa...@redhat.com Verify the cr3 address stored in vcpu-arch.cr3 points to an existant memslot. If not, inject a triple fault. Signed-off-by: Marcelo Tosatti mtosa...@redhat.com Signed-off-by: Avi Kivity a...@redhat.com diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 80c76f4..479e748 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -1912,7 +1912,19 @@ static void mmu_free_roots(struct kvm_vcpu *vcpu) vcpu-arch.mmu.root_hpa = INVALID_PAGE; } -static void mmu_alloc_roots(struct kvm_vcpu *vcpu) +static int mmu_check_root(struct kvm_vcpu *vcpu, gfn_t root_gfn) +{ + int ret = 0; + + if (!kvm_is_visible_gfn(vcpu-kvm, root_gfn)) { + set_bit(KVM_REQ_TRIPLE_FAULT, vcpu-requests); + ret = 1; + } + + return ret; +} + +static int mmu_alloc_roots(struct kvm_vcpu *vcpu) { int i; gfn_t root_gfn; @@ -1927,13 +1939,15 @@ static void mmu_alloc_roots(struct kvm_vcpu *vcpu) ASSERT(!VALID_PAGE(root)); if (tdp_enabled) direct = 1; + if (mmu_check_root(vcpu, root_gfn)) + return 1; sp = kvm_mmu_get_page(vcpu, root_gfn, 0, PT64_ROOT_LEVEL, direct, ACC_ALL, NULL); root = __pa(sp-spt); ++sp-root_count; vcpu-arch.mmu.root_hpa = root; - return; + return 0; } direct = !is_paging(vcpu); if (tdp_enabled) @@ -1950,6 +1964,8 @@ static void mmu_alloc_roots(struct kvm_vcpu *vcpu) root_gfn = vcpu-arch.pdptrs[i] PAGE_SHIFT; } else if (vcpu-arch.mmu.root_level == 0) root_gfn = 0; + if (mmu_check_root(vcpu, root_gfn)) + return 1; sp = kvm_mmu_get_page(vcpu, root_gfn, i 30, PT32_ROOT_LEVEL, direct, ACC_ALL, NULL); @@ -1958,6 +1974,7 @@ static void mmu_alloc_roots(struct kvm_vcpu *vcpu) vcpu-arch.mmu.pae_root[i] = root | PT_PRESENT_MASK; } vcpu-arch.mmu.root_hpa = __pa(vcpu-arch.mmu.pae_root); + return 0; } static void mmu_sync_roots(struct kvm_vcpu *vcpu) @@ -1976,7 +1993,7 @@ static void mmu_sync_roots(struct kvm_vcpu *vcpu) for (i = 0; i 4; ++i) { hpa_t root = vcpu-arch.mmu.pae_root[i]; - if (root) { + if (root VALID_PAGE(root)) { root = PT64_BASE_ADDR_MASK; sp = page_header(root); mmu_sync_children(vcpu, sp); @@ -2311,9 +2328,11 @@ int kvm_mmu_load(struct kvm_vcpu *vcpu) goto out; spin_lock(vcpu-kvm-mmu_lock); kvm_mmu_free_some_pages(vcpu); - mmu_alloc_roots(vcpu); + r = mmu_alloc_roots(vcpu); mmu_sync_roots(vcpu); spin_unlock(vcpu-kvm-mmu_lock); + if (r) + goto out; kvm_x86_ops-set_cr3(vcpu, vcpu-arch.mmu.root_hpa); kvm_mmu_flush_tlb(vcpu); out: diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index ecc35c6..33f850b 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4564,6 +4564,7 @@ int kvm_arch_set_memory_region(struct kvm *kvm, void kvm_arch_flush_shadow(struct kvm *kvm) { kvm_mmu_zap_all(kvm); + kvm_reload_remote_mmus(kvm); } int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) -- To unsubscribe from this list: send the line unsubscribe kvm-commits in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[COMMIT master] KVM: s390: Unlink vcpu on destroy - v2
From: Carsten Otte co...@de.ibm.com This patch makes sure we do unlink a vcpu's sie control block from the system control area in kvm_arch_vcpu_destroy. This prevents illegal accesses to the sie control block from other virtual cpus after free. Reported-by: Mijo Safradin m...@linux.vnet.ibm.com Signed-off-by: Carsten Otte co...@de.ibm.com Signed-off-by: Christian Ehrhardt ehrha...@de.ibm.com Signed-off-by: Avi Kivity a...@redhat.com diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 36c654d..628494a 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -196,6 +196,10 @@ out_nokvm: void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) { VCPU_EVENT(vcpu, 3, %s, free cpu); + if (vcpu-kvm-arch.sca-cpu[vcpu-vcpu_id].sda == + (__u64) vcpu-arch.sie_block) + vcpu-kvm-arch.sca-cpu[vcpu-vcpu_id].sda = 0; + smp_mb(); free_page((unsigned long)(vcpu-arch.sie_block)); kvm_vcpu_uninit(vcpu); kfree(vcpu); @@ -310,8 +314,10 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, vcpu-arch.sie_block-icpua = id; BUG_ON(!kvm-arch.sca); - BUG_ON(kvm-arch.sca-cpu[id].sda); - kvm-arch.sca-cpu[id].sda = (__u64) vcpu-arch.sie_block; + if (!kvm-arch.sca-cpu[id].sda) + kvm-arch.sca-cpu[id].sda = (__u64) vcpu-arch.sie_block; + else + BUG_ON(!kvm-vcpus[id]); /* vcpu does already exist */ vcpu-arch.sie_block-scaoh = (__u32)(((__u64)kvm-arch.sca) 32); vcpu-arch.sie_block-scaol = (__u32)(__u64)kvm-arch.sca; -- To unsubscribe from this list: send the line unsubscribe kvm-commits in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[COMMIT master] KVM: Unprotect a page if #PF happens during NMI injection.
From: Gleb Natapov g...@redhat.com It is done for exception and interrupt already. Signed-off-by: Gleb Natapov g...@redhat.com Signed-off-by: Avi Kivity a...@redhat.com diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 8b5ffbd..ac3d5ba 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1122,8 +1122,7 @@ static int pf_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) if (npt_enabled) svm_flush_tlb(svm-vcpu); else { - if (svm-vcpu.arch.interrupt.pending || - svm-vcpu.arch.exception.pending) + if (kvm_event_needs_reinjection(svm-vcpu)) kvm_mmu_unprotect_page_virt(svm-vcpu, fault_address); } return kvm_mmu_page_fault(svm-vcpu, fault_address, error_code); diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index f3ab27b..8981654 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -2615,7 +2615,7 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) cr2 = vmcs_readl(EXIT_QUALIFICATION); KVMTRACE_3D(PAGE_FAULT, vcpu, error_code, (u32)cr2, (u32)((u64)cr2 32), handler); - if (vcpu-arch.interrupt.pending || vcpu-arch.exception.pending) + if (kvm_event_needs_reinjection(vcpu)) kvm_mmu_unprotect_page_virt(vcpu, cr2); return kvm_mmu_page_fault(vcpu, cr2, error_code); } diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index 39350b2..21203d4 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h @@ -30,4 +30,10 @@ static inline u8 kvm_pop_irq(struct kvm_vcpu *vcpu) clear_bit(word_index, vcpu-arch.irq_summary); return irq; } + +static inline bool kvm_event_needs_reinjection(struct kvm_vcpu *vcpu) +{ + return vcpu-arch.exception.pending || vcpu-arch.interrupt.pending || + vcpu-arch.nmi_injected; +} #endif -- To unsubscribe from this list: send the line unsubscribe kvm-commits in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[COMMIT master] KVM: s390: use hrtimer for clock wakeup from idle - v2
From: Christian Borntraeger borntrae...@de.ibm.com This patch reworks the s390 clock comparator wakeup to hrtimer. The clock comparator is a per-cpu value that is compared against the TOD clock. If ckc = TOD an external interrupt 1004 is triggered. Since the clock comparator and the TOD clock have a much higher resolution than jiffies we should use hrtimers to trigger the wakeup. This speeds up guest nanosleep for small values. Since hrtimers callbacks run in hard-irq context, I added a tasklet to do the actual work with enabled interrupts. Signed-off-by: Christian Borntraeger borntrae...@de.ibm.com Signed-off-by: Carsten Otte co...@de.ibm.com Signed-off-by: Christian Ehrhardt ehrha...@de.ibm.com Signed-off-by: Avi Kivity a...@redhat.com diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index 54ea39f..a27d0d5 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -13,6 +13,8 @@ #ifndef ASM_KVM_HOST_H #define ASM_KVM_HOST_H +#include linux/hrtimer.h +#include linux/interrupt.h #include linux/kvm_host.h #include asm/debug.h #include asm/cpuid.h @@ -210,7 +212,8 @@ struct kvm_vcpu_arch { s390_fp_regs guest_fpregs; unsigned int guest_acrs[NUM_ACRS]; struct kvm_s390_local_interrupt local_int; - struct timer_list ckc_timer; + struct hrtimerckc_timer; + struct tasklet_struct tasklet; union { cpuid_t cpu_id; u64 stidp_data; diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index 4ed4c3a..a48830f 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -12,6 +12,8 @@ #include asm/lowcore.h #include asm/uaccess.h +#include linux/hrtimer.h +#include linux/interrupt.h #include linux/kvm_host.h #include linux/signal.h #include kvm-s390.h @@ -361,12 +363,10 @@ int kvm_s390_handle_wait(struct kvm_vcpu *vcpu) return 0; } - sltime = (vcpu-arch.sie_block-ckc - now) / (0xf424ul / HZ) + 1; + sltime = ((vcpu-arch.sie_block-ckc - now)*125)9; - vcpu-arch.ckc_timer.expires = jiffies + sltime; - - add_timer(vcpu-arch.ckc_timer); - VCPU_EVENT(vcpu, 5, enabled wait timer:%llx jiffies, sltime); + hrtimer_start(vcpu-arch.ckc_timer, ktime_set (0, sltime) , HRTIMER_MODE_REL); + VCPU_EVENT(vcpu, 5, enabled wait via clock comparator: %llx ns, sltime); no_timer: spin_lock_bh(vcpu-arch.local_int.float_int-lock); spin_lock_bh(vcpu-arch.local_int.lock); @@ -389,21 +389,34 @@ no_timer: remove_wait_queue(vcpu-wq, wait); spin_unlock_bh(vcpu-arch.local_int.lock); spin_unlock_bh(vcpu-arch.local_int.float_int-lock); - del_timer(vcpu-arch.ckc_timer); + hrtimer_try_to_cancel(vcpu-arch.ckc_timer); return 0; } -void kvm_s390_idle_wakeup(unsigned long data) +void kvm_s390_tasklet(unsigned long parm) { - struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data; + struct kvm_vcpu *vcpu = (struct kvm_vcpu *) parm; - spin_lock_bh(vcpu-arch.local_int.lock); + spin_lock(vcpu-arch.local_int.lock); vcpu-arch.local_int.timer_due = 1; if (waitqueue_active(vcpu-arch.local_int.wq)) wake_up_interruptible(vcpu-arch.local_int.wq); - spin_unlock_bh(vcpu-arch.local_int.lock); + spin_unlock(vcpu-arch.local_int.lock); } +/* + * low level hrtimer wake routine. Because this runs in hardirq context + * we schedule a tasklet to do the real work. + */ +enum hrtimer_restart kvm_s390_idle_wakeup(struct hrtimer *timer) +{ + struct kvm_vcpu *vcpu; + + vcpu = container_of(timer, struct kvm_vcpu, arch.ckc_timer); + tasklet_schedule(vcpu-arch.tasklet); + + return HRTIMER_NORESTART; +} void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu) { diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 86567e1..dc3d068 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -15,6 +15,7 @@ #include linux/compiler.h #include linux/err.h #include linux/fs.h +#include linux/hrtimer.h #include linux/init.h #include linux/kvm.h #include linux/kvm_host.h @@ -283,8 +284,10 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) vcpu-arch.sie_block-gmsor = vcpu-kvm-arch.guest_origin; vcpu-arch.sie_block-ecb = 2; vcpu-arch.sie_block-eca = 0xC1002001U; - setup_timer(vcpu-arch.ckc_timer, kvm_s390_idle_wakeup, -(unsigned long) vcpu); + hrtimer_init(vcpu-arch.ckc_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS); + tasklet_init(vcpu-arch.tasklet, kvm_s390_tasklet, +(unsigned long) vcpu); + vcpu-arch.ckc_timer.function = kvm_s390_idle_wakeup; get_cpu_id(vcpu-arch.cpu_id); vcpu-arch.cpu_id.version = 0xff; return 0; diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h index 00bbe69..748fee8 100644 ---
[COMMIT master] KVM: s390: optimize float int lock: spin_lock_bh -- spin_lock
From: Christian Borntraeger borntrae...@de.ibm.com The floating interrupt lock is only taken in process context. We can replace all spin_lock_bh with standard spin_lock calls. Signed-off-by: Christian Borntraeger borntrae...@de.ibm.com Signed-off-by: Christian Ehrhardt ehrha...@de.ibm.com Signed-off-by: Avi Kivity a...@redhat.com diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index a48830f..f04f530 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -301,13 +301,13 @@ int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu) } if ((!rc) atomic_read(fi-active)) { - spin_lock_bh(fi-lock); + spin_lock(fi-lock); list_for_each_entry(inti, fi-list, list) if (__interrupt_is_deliverable(vcpu, inti)) { rc = 1; break; } - spin_unlock_bh(fi-lock); + spin_unlock(fi-lock); } if ((!rc) (vcpu-arch.sie_block-ckc @@ -368,7 +368,7 @@ int kvm_s390_handle_wait(struct kvm_vcpu *vcpu) hrtimer_start(vcpu-arch.ckc_timer, ktime_set (0, sltime) , HRTIMER_MODE_REL); VCPU_EVENT(vcpu, 5, enabled wait via clock comparator: %llx ns, sltime); no_timer: - spin_lock_bh(vcpu-arch.local_int.float_int-lock); + spin_lock(vcpu-arch.local_int.float_int-lock); spin_lock_bh(vcpu-arch.local_int.lock); add_wait_queue(vcpu-arch.local_int.wq, wait); while (list_empty(vcpu-arch.local_int.list) @@ -377,18 +377,18 @@ no_timer: !signal_pending(current)) { set_current_state(TASK_INTERRUPTIBLE); spin_unlock_bh(vcpu-arch.local_int.lock); - spin_unlock_bh(vcpu-arch.local_int.float_int-lock); + spin_unlock(vcpu-arch.local_int.float_int-lock); vcpu_put(vcpu); schedule(); vcpu_load(vcpu); - spin_lock_bh(vcpu-arch.local_int.float_int-lock); + spin_lock(vcpu-arch.local_int.float_int-lock); spin_lock_bh(vcpu-arch.local_int.lock); } __unset_cpu_idle(vcpu); __set_current_state(TASK_RUNNING); remove_wait_queue(vcpu-wq, wait); spin_unlock_bh(vcpu-arch.local_int.lock); - spin_unlock_bh(vcpu-arch.local_int.float_int-lock); + spin_unlock(vcpu-arch.local_int.float_int-lock); hrtimer_try_to_cancel(vcpu-arch.ckc_timer); return 0; } @@ -455,7 +455,7 @@ void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu) if (atomic_read(fi-active)) { do { deliver = 0; - spin_lock_bh(fi-lock); + spin_lock(fi-lock); list_for_each_entry_safe(inti, n, fi-list, list) { if (__interrupt_is_deliverable(vcpu, inti)) { list_del(inti-list); @@ -466,7 +466,7 @@ void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu) } if (list_empty(fi-list)) atomic_set(fi-active, 0); - spin_unlock_bh(fi-lock); + spin_unlock(fi-lock); if (deliver) { __do_deliver_interrupt(vcpu, inti); kfree(inti); @@ -531,7 +531,7 @@ int kvm_s390_inject_vm(struct kvm *kvm, mutex_lock(kvm-lock); fi = kvm-arch.float_int; - spin_lock_bh(fi-lock); + spin_lock(fi-lock); list_add_tail(inti-list, fi-list); atomic_set(fi-active, 1); sigcpu = find_first_bit(fi-idle_mask, KVM_MAX_VCPUS); @@ -548,7 +548,7 @@ int kvm_s390_inject_vm(struct kvm *kvm, if (waitqueue_active(li-wq)) wake_up_interruptible(li-wq); spin_unlock_bh(li-lock); - spin_unlock_bh(fi-lock); + spin_unlock(fi-lock); mutex_unlock(kvm-lock); return 0; } diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index dc3d068..36c654d 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -318,11 +318,11 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, spin_lock_init(vcpu-arch.local_int.lock); INIT_LIST_HEAD(vcpu-arch.local_int.list); vcpu-arch.local_int.float_int = kvm-arch.float_int; - spin_lock_bh(kvm-arch.float_int.lock); + spin_lock(kvm-arch.float_int.lock); kvm-arch.float_int.local_int[id] = vcpu-arch.local_int; init_waitqueue_head(vcpu-arch.local_int.wq); vcpu-arch.local_int.cpuflags = vcpu-arch.sie_block-cpuflags; - spin_unlock_bh(kvm-arch.float_int.lock); + spin_unlock(kvm-arch.float_int.lock); rc = kvm_vcpu_init(vcpu, kvm, id); if (rc) diff --git a/arch/s390/kvm/priv.c
[COMMIT master] KVM: s390: Verify memory in kvm run
From: Carsten Otte co...@de.ibm.com This check verifies that the guest we're trying to run in KVM_RUN has some memory assigned to it. It enters an endless exception loop if this is not the case. Reported-by: Mijo Safradin m...@linux.vnet.ibm.com Signed-off-by: Carsten Otte co...@de.ibm.com Signed-off-by: Christian Ehrhardt ehrha...@de.ibm.com Signed-off-by: Avi Kivity a...@redhat.com diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 628494a..10bccd1 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -487,6 +487,12 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) vcpu_load(vcpu); + /* verify, that memory has been registered */ + if (!vcpu-kvm-arch.guest_memsize) { + vcpu_put(vcpu); + return -EINVAL; + } + if (vcpu-sigset_active) sigprocmask(SIG_SETMASK, vcpu-sigset, sigsaved); -- To unsubscribe from this list: send the line unsubscribe kvm-commits in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[COMMIT master] KVM: Do not allow interrupt injection from userspace if there is a pending event.
From: Gleb Natapov g...@redhat.com The exception will immediately close the interrupt window. Signed-off-by: Gleb Natapov g...@redhat.com Signed-off-by: Avi Kivity a...@redhat.com diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 33f850b..d9396a7 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3101,8 +3101,9 @@ static void post_kvm_run_save(struct kvm_vcpu *vcpu, kvm_run-ready_for_interrupt_injection = 1; else kvm_run-ready_for_interrupt_injection = - (kvm_arch_interrupt_allowed(vcpu) -!kvm_cpu_has_interrupt(vcpu)); + kvm_arch_interrupt_allowed(vcpu) + !kvm_cpu_has_interrupt(vcpu) + !kvm_event_needs_reinjection(vcpu); } static void vapic_enter(struct kvm_vcpu *vcpu) -- To unsubscribe from this list: send the line unsubscribe kvm-commits in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[COMMIT master] KVM: skip_emulated_instruction() decode instruction if size is not known
From: Gleb Natapov g...@redhat.com Signed-off-by: Gleb Natapov g...@redhat.com Signed-off-by: Avi Kivity a...@redhat.com diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index ac3d5ba..1315ce0 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -228,7 +228,9 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu) struct vcpu_svm *svm = to_svm(vcpu); if (!svm-next_rip) { - printk(KERN_DEBUG %s: NOP\n, __func__); + if (emulate_instruction(vcpu, vcpu-run, 0, 0, EMULTYPE_SKIP) != + EMULATE_DONE) + printk(KERN_DEBUG %s: NOP\n, __func__); return; } if (svm-next_rip - kvm_rip_read(vcpu) MAX_INST_SIZE) @@ -1868,11 +1870,8 @@ static int task_switch_interception(struct vcpu_svm *svm, if (reason != TASK_SWITCH_GATE || int_type == SVM_EXITINTINFO_TYPE_SOFT || (int_type == SVM_EXITINTINFO_TYPE_EXEPT -(int_vec == OF_VECTOR || int_vec == BP_VECTOR))) { - if (emulate_instruction(svm-vcpu, kvm_run, 0, 0, - EMULTYPE_SKIP) != EMULATE_DONE) - return 0; - } +(int_vec == OF_VECTOR || int_vec == BP_VECTOR))) + skip_emulated_instruction(svm-vcpu); return kvm_task_switch(svm-vcpu, tss_selector, reason); } -- To unsubscribe from this list: send the line unsubscribe kvm-commits in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[COMMIT master] KVM: Disable CR8 intercept if tpr patching is active
From: Gleb Natapov g...@redhat.com Signed-off-by: Gleb Natapov g...@redhat.com Signed-off-by: Avi Kivity a...@redhat.com diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 7037afa..44e87a5 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3138,7 +3138,10 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu) if (!kvm_x86_ops-update_cr8_intercept) return; - max_irr = kvm_lapic_find_highest_irr(vcpu); + if (!vcpu-arch.apic-vapic_addr) + max_irr = kvm_lapic_find_highest_irr(vcpu); + else + max_irr = -1; if (max_irr != -1) max_irr = 4; @@ -3245,10 +3248,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) kvm_x86_ops-enable_irq_window(vcpu); if (kvm_lapic_enabled(vcpu)) { - if (!vcpu-arch.apic-vapic_addr) - update_cr8_intercept(vcpu); - else - kvm_lapic_sync_to_vapic(vcpu); + update_cr8_intercept(vcpu); + kvm_lapic_sync_to_vapic(vcpu); } up_read(vcpu-kvm-slots_lock); -- To unsubscribe from this list: send the line unsubscribe kvm-commits in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[COMMIT master] KVM: Do not re-execute INTn instruction.
From: Gleb Natapov g...@redhat.com Re-inject event instead. This is what Intel suggest. Also use correct instruction length when re-injecting soft fault/interrupt. Signed-off-by: Gleb Natapov g...@redhat.com Signed-off-by: Avi Kivity a...@redhat.com diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 4347cc3..b5b3a72 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -319,6 +319,8 @@ struct kvm_vcpu_arch { struct kvm_pio_request pio; void *pio_data; + u8 event_exit_inst_len; + struct kvm_queued_exception { bool pending; bool has_error_code; @@ -328,6 +330,7 @@ struct kvm_vcpu_arch { struct kvm_queued_interrupt { bool pending; + bool soft; u8 nr; } interrupt; @@ -511,7 +514,7 @@ struct kvm_x86_ops { u32 (*get_interrupt_shadow)(struct kvm_vcpu *vcpu, int mask); void (*patch_hypercall)(struct kvm_vcpu *vcpu, unsigned char *hypercall_addr); - void (*set_irq)(struct kvm_vcpu *vcpu, int vec); + void (*set_irq)(struct kvm_vcpu *vcpu); void (*set_nmi)(struct kvm_vcpu *vcpu); void (*queue_exception)(struct kvm_vcpu *vcpu, unsigned nr, bool has_error_code, u32 error_code); diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 1315ce0..377c4f1 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -2310,13 +2310,13 @@ static void svm_queue_irq(struct kvm_vcpu *vcpu, unsigned nr) SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_INTR; } -static void svm_set_irq(struct kvm_vcpu *vcpu, int irq) +static void svm_set_irq(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); nested_svm_intr(svm); - svm_queue_irq(vcpu, irq); + svm_queue_irq(vcpu, vcpu-arch.interrupt.nr); } static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr) @@ -2418,7 +2418,7 @@ static void svm_complete_interrupts(struct vcpu_svm *svm) case SVM_EXITINTINFO_TYPE_EXEPT: /* In case of software exception do not reinject an exception vector, but re-execute and instruction instead */ - if (vector == BP_VECTOR || vector == OF_VECTOR) + if (kvm_exception_is_soft(vector)) break; if (exitintinfo SVM_EXITINTINFO_VALID_ERR) { u32 err = svm-vmcb-control.exit_int_info_err; @@ -2428,7 +2428,7 @@ static void svm_complete_interrupts(struct vcpu_svm *svm) kvm_queue_exception(svm-vcpu, vector); break; case SVM_EXITINTINFO_TYPE_INTR: - kvm_queue_interrupt(svm-vcpu, vector); + kvm_queue_interrupt(svm-vcpu, vector, false); break; default: break; diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 8981654..29b49f0 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -801,8 +801,9 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu, unsigned nr, return; } - if (nr == BP_VECTOR || nr == OF_VECTOR) { - vmcs_write32(VM_ENTRY_INSTRUCTION_LEN, 1); + if (kvm_exception_is_soft(nr)) { + vmcs_write32(VM_ENTRY_INSTRUCTION_LEN, +vmx-vcpu.arch.event_exit_inst_len); intr_info |= INTR_TYPE_SOFT_EXCEPTION; } else intr_info |= INTR_TYPE_HARD_EXCEPTION; @@ -2445,9 +2446,11 @@ static void enable_nmi_window(struct kvm_vcpu *vcpu) vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control); } -static void vmx_inject_irq(struct kvm_vcpu *vcpu, int irq) +static void vmx_inject_irq(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); + uint32_t intr; + int irq = vcpu-arch.interrupt.nr; KVMTRACE_1D(INJ_VIRQ, vcpu, (u32)irq, handler); @@ -2462,8 +2465,14 @@ static void vmx_inject_irq(struct kvm_vcpu *vcpu, int irq) kvm_rip_write(vcpu, vmx-rmode.irq.rip - 1); return; } - vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, - irq | INTR_TYPE_EXT_INTR | INTR_INFO_VALID_MASK); + intr = irq | INTR_INFO_VALID_MASK; + if (vcpu-arch.interrupt.soft) { + intr |= INTR_TYPE_SOFT_INTR; + vmcs_write32(VM_ENTRY_INSTRUCTION_LEN, +vmx-vcpu.arch.event_exit_inst_len); + } else + intr |= INTR_TYPE_EXT_INTR; + vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr); } static void vmx_inject_nmi(struct kvm_vcpu *vcpu) @@ -3024,6 +3033,7 @@ static int handle_task_switch(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) GUEST_INTR_STATE_NMI); break; case
[COMMIT master] Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6
From: Avi Kivity a...@redhat.com Conflicts: arch/x86/kvm/x86.c Signed-off-by: Avi Kivity a...@redhat.com -- To unsubscribe from this list: send the line unsubscribe kvm-commits in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[COMMIT master] KVM: Always request IRQ/NMI window if an interrupt is pending
From: Gleb Natapov g...@redhat.com Currently they are not requested if there is pending exception. Signed-off-by: Gleb Natapov g...@redhat.com Signed-off-by: Avi Kivity a...@redhat.com diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index e395ca4..efba9bc 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3148,8 +3148,11 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu) kvm_x86_ops-update_cr8_intercept(vcpu, tpr, max_irr); } -static void inject_irq(struct kvm_vcpu *vcpu) +static void inject_pending_irq(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) { + if (vcpu-guest_debug KVM_GUESTDBG_SINGLESTEP) + kvm_x86_ops-set_interrupt_shadow(vcpu, 0); + /* try to reinject previous events if any */ if (vcpu-arch.nmi_injected) { kvm_x86_ops-set_nmi(vcpu); @@ -3177,26 +3180,11 @@ static void inject_irq(struct kvm_vcpu *vcpu) } } -static void inject_pending_irq(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) -{ - bool req_int_win = !irqchip_in_kernel(vcpu-kvm) - kvm_run-request_interrupt_window; - - if (vcpu-guest_debug KVM_GUESTDBG_SINGLESTEP) - kvm_x86_ops-set_interrupt_shadow(vcpu, 0); - - inject_irq(vcpu); - - /* enable NMI/IRQ window open exits if needed */ - if (vcpu-arch.nmi_pending) - kvm_x86_ops-enable_nmi_window(vcpu); - else if (kvm_cpu_has_interrupt(vcpu) || req_int_win) - kvm_x86_ops-enable_irq_window(vcpu); -} - static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) { int r; + bool req_int_win = !irqchip_in_kernel(vcpu-kvm) + kvm_run-request_interrupt_window; if (vcpu-requests) if (test_and_clear_bit(KVM_REQ_MMU_RELOAD, vcpu-requests)) @@ -3250,6 +3238,12 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) else inject_pending_irq(vcpu, kvm_run); + /* enable NMI/IRQ window open exits if needed */ + if (vcpu-arch.nmi_pending) + kvm_x86_ops-enable_nmi_window(vcpu); + else if (kvm_cpu_has_interrupt(vcpu) || req_int_win) + kvm_x86_ops-enable_irq_window(vcpu); + if (kvm_lapic_enabled(vcpu)) { if (!vcpu-arch.apic-vapic_addr) update_cr8_intercept(vcpu); -- To unsubscribe from this list: send the line unsubscribe kvm-commits in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[COMMIT master] KVM: Move exit due to NMI handling into vmx_complete_interrupts()
From: Gleb Natapov g...@redhat.com To save us one reading of VM_EXIT_INTR_INFO. Signed-off-by: Gleb Natapov g...@redhat.com Signed-off-by: Avi Kivity a...@redhat.com diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 29b49f0..fe2ce2b 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -3261,8 +3261,17 @@ static void vmx_complete_interrupts(struct vcpu_vmx *vmx) int type; bool idtv_info_valid; - idtv_info_valid = idt_vectoring_info VECTORING_INFO_VALID_MASK; exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO); + + /* We need to handle NMIs before interrupts are enabled */ + if ((exit_intr_info INTR_INFO_INTR_TYPE_MASK) == INTR_TYPE_NMI_INTR + (exit_intr_info INTR_INFO_VALID_MASK)) { + KVMTRACE_0D(NMI, vmx-vcpu, handler); + asm(int $2); + } + + idtv_info_valid = idt_vectoring_info VECTORING_INFO_VALID_MASK; + if (cpu_has_virtual_nmis()) { unblock_nmi = (exit_intr_info INTR_INFO_UNBLOCK_NMI) != 0; vector = exit_intr_info INTR_INFO_VECTOR_MASK; @@ -3363,7 +3372,6 @@ static void fixup_rmode_irq(struct vcpu_vmx *vmx) static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) { struct vcpu_vmx *vmx = to_vmx(vcpu); - u32 intr_info; /* Record the guest's net vcpu time for enforced NMI injections. */ if (unlikely(!cpu_has_virtual_nmis() vmx-soft_vnmi_blocked)) @@ -3490,15 +3498,6 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) asm(mov %0, %%ds; mov %0, %%es : : r(__USER_DS)); vmx-launched = 1; - intr_info = vmcs_read32(VM_EXIT_INTR_INFO); - - /* We need to handle NMIs before interrupts are enabled */ - if ((intr_info INTR_INFO_INTR_TYPE_MASK) == INTR_TYPE_NMI_INTR - (intr_info INTR_INFO_VALID_MASK)) { - KVMTRACE_0D(NMI, vcpu, handler); - asm(int $2); - } - vmx_complete_interrupts(vmx); } -- To unsubscribe from this list: send the line unsubscribe kvm-commits in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[COMMIT master] KVM: Do not migrate pending software interrupts.
From: Gleb Natapov g...@redhat.com INTn will be re-executed after migration. If we wanted to migrate pending software interrupt we would need to migrate interrupt type and instruction length too, but we do not have all required info on SVM, so SVM-VMX migration would need to re-execute INTn anyway. To make it simple never migrate pending soft interrupt. Signed-off-by: Gleb Natapov g...@redhat.com Signed-off-by: Avi Kivity a...@redhat.com diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index efba9bc..7037afa 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3575,7 +3575,7 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, memset(sregs-interrupt_bitmap, 0, sizeof sregs-interrupt_bitmap); - if (vcpu-arch.interrupt.pending) + if (vcpu-arch.interrupt.pending !vcpu-arch.interrupt.soft) set_bit(vcpu-arch.interrupt.nr, (unsigned long *)sregs-interrupt_bitmap); -- To unsubscribe from this list: send the line unsubscribe kvm-commits in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH -tip] x86: kvm replace MSR_IA32_TIME_STAMP_COUNTER with MSR_IA32_TSC of msr-index.h
Use standard msr-index.h's MSR declaration. MSR_IA32_TSC is better than MSR_IA32_TIME_STAMP_COUNTER as it also solves 80 column issue. Signed-off-by: Jaswinder Singh Rajput jaswinderraj...@gmail.com --- arch/x86/include/asm/kvm_host.h |2 -- arch/x86/kvm/svm.c |4 ++-- arch/x86/kvm/vmx.c |4 ++-- arch/x86/kvm/x86.c |5 ++--- 4 files changed, 6 insertions(+), 9 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index f0faf58..824f5e6 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -750,8 +750,6 @@ static inline void kvm_inject_gp(struct kvm_vcpu *vcpu, u32 error_code) kvm_queue_exception_e(vcpu, GP_VECTOR, error_code); } -#define MSR_IA32_TIME_STAMP_COUNTER0x010 - #define TSS_IOPB_BASE_OFFSET 0x66 #define TSS_BASE_SIZE 0x68 #define TSS_IOPB_SIZE (65536 / 8) diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 1821c20..f149d17 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1889,7 +1889,7 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 *data) struct vcpu_svm *svm = to_svm(vcpu); switch (ecx) { - case MSR_IA32_TIME_STAMP_COUNTER: { + case MSR_IA32_TSC: { u64 tsc; rdtscll(tsc); @@ -1979,7 +1979,7 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 data) struct vcpu_svm *svm = to_svm(vcpu); switch (ecx) { - case MSR_IA32_TIME_STAMP_COUNTER: { + case MSR_IA32_TSC: { u64 tsc; rdtscll(tsc); diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index bb48133..c8d3234 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -897,7 +897,7 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata) case MSR_EFER: return kvm_get_msr_common(vcpu, msr_index, pdata); #endif - case MSR_IA32_TIME_STAMP_COUNTER: + case MSR_IA32_TSC: data = guest_read_tsc(); break; case MSR_IA32_SYSENTER_CS: @@ -957,7 +957,7 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data) case MSR_IA32_SYSENTER_ESP: vmcs_writel(GUEST_SYSENTER_ESP, data); break; - case MSR_IA32_TIME_STAMP_COUNTER: + case MSR_IA32_TSC: rdtscll(host_tsc); guest_write_tsc(data, host_tsc); break; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 8e4a0ef..db0fd7d 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -465,7 +465,7 @@ static u32 msrs_to_save[] = { #ifdef CONFIG_X86_64 MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR, #endif - MSR_IA32_TIME_STAMP_COUNTER, MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK, + MSR_IA32_TSC, MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK, MSR_IA32_PERF_STATUS, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA }; @@ -637,8 +637,7 @@ static void kvm_write_guest_time(struct kvm_vcpu *v) /* Keep irq disabled to prevent changes to the clock */ local_irq_save(flags); - kvm_get_msr(v, MSR_IA32_TIME_STAMP_COUNTER, - vcpu-hv_clock.tsc_timestamp); + kvm_get_msr(v, MSR_IA32_TSC, vcpu-hv_clock.tsc_timestamp); ktime_get_ts(ts); local_irq_restore(flags); -- 1.6.0.6 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCHv5 1/3] virtio: find_vqs/del_vqs virtio operations
Am Wednesday 13 May 2009 21:08:58 schrieb Michael S. Tsirkin: This replaces find_vq/del_vq with find_vqs/del_vqs virtio operations, and updates all drivers. This is needed for MSI support, because MSI needs to know the total number of vectors upfront. [...] --- a/drivers/s390/kvm/kvm_virtio.c +++ b/drivers/s390/kvm/kvm_virtio.c @@ -227,6 +227,38 @@ static void kvm_del_vq(struct virtqueue *vq) KVM_S390_VIRTIO_RING_ALIGN)); } +static void vp_del_vqs(struct virtio_device *vdev) s/vp_del_vqs/kvm_del_vqs/ : drivers/s390/kvm/kvm_virtio.c: In function 'kvm_find_vqs': drivers/s390/kvm/kvm_virtio.c:258: error: implicit declaration of function 'kvm_del_vqs' drivers/s390/kvm/kvm_virtio.c: At top level: drivers/s390/kvm/kvm_virtio.c:274: error: 'kvm_del_vqs' undeclared here (not in a function) make[2]: *** [drivers/s390/kvm/kvm_virtio.o] Error 1 +{ + struct virtqueue *vq, *n; + + list_for_each_entry_safe(vq, n, vdev-vqs, list) + kvm_del_vq(vq); +} + +static int kvm_find_vqs(struct virtio_device *vdev, unsigned nvqs, + struct virtqueue *vqs[], + vq_callback_t *callbacks[], + const char *names[]) +{ + struct kvm_device *kdev = to_kvmdev(vdev); + int i; + + /* We must have this many virtqueues. */ + if (nvqs kdev-desc-num_vq) + return -ENOENT; + + for (i = 0; i nvqs; ++i) { + vqs[i] = kvm_find_vq(vdev, i, callbacks[i], names[i]); + if (IS_ERR(vqs[i])) + goto error; + } + return 0; + +error: + kvm_del_vqs(vdev); + return PTR_ERR(vqs[i]); +} + /* * The config ops structure as defined by virtio config */ @@ -238,8 +270,8 @@ static struct virtio_config_ops kvm_vq_configspace_ops = { .get_status = kvm_get_status, .set_status = kvm_set_status, .reset = kvm_reset, - .find_vq = kvm_find_vq, - .del_vq = kvm_del_vq, + .find_vqs = kvm_find_vqs, + .del_vqs = kvm_del_vqs, }; /* --- a/include/linux/virtio_config.h +++ b/include/linux/virtio_config.h [...] needs an #ifdef __KERNEL__ +#include linux/err.h #include linux/virtio.h [...] + int err = vdev-config-find_vqs(vdev, 1, vq, callbacks, names); + if (err 0) + return ERR_PTR(err); Otherwise ERR_PTR does not compile. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: kvm-autotest: The automation plans?
- jason wang jasow...@redhat.com wrote: sudhir kumar 写道: Hi Uri/Lucas, Do you have any plans for enhancing kvm-autotest? I was looking mainly on the following 2 aspects: (1). we have standalone migration only. Is there any plans of enhancing kvm-autotest so that we can trigger migration while a workload is running? Something like this: Start a workload(may be n instances of it). let the test execute for some time. Trigger migration. Log into the target. Check if the migration is succesful Check if the test results are consistent. We have some patches of ping pong migration and workload adding. The migration is based on public bridge and workload adding is based on running benchmark in the background of guest. (2). How can we run N parallel instances of a test? Will the current configuration be easily able to support it? Please provide your thoughts on the above features. The parallelized instances could be easily achieved through job.parallel() of autotest framework, and that is what we have used in our tests. We have make some helper routines such as get_free_port to be reentrant through file lock. We'll probably have to use file locks anyway when we work with TAP, but in VM.create(), not in get_free_port(), because we also want to prevent parallel qemu instances from choosing the same TAP device. I'm not sure how qemu handles this internally, and I'd rather be on the safe side. Do you release the file lock inside get_free_port or only after running qemu? We've implemented following test cases: timedrift(already sent here), savevm/loadvm, suspend/resume, jumboframe, migration between two machines and others. We will sent it here for review in the following weeks. There are some other things could be improved: 1) Current kvm_test.cfg.sample/kvm_test.cfg is transparent to autotest server UI. This would make it hard to configure the tests in the server side. During our test, we have merged it into control and make it could be configured by editing control file function of autotest server side web UI. Would it not suffice to just modify the configuration, instead of completely define it, inside the control file? This is possible using parse_string(). For example: cfg = kvm_config.config(kvm_tests.cfg) cfg.parse_string(only weekly) cfg.parse_string(only Fedora RHEL Windows) cfg.parse_string( variants: - 1: only ide - 2: Fedora: no rtl8139 ) list = cfg.get_list() (get_list() returns the test dictionaries.) The advantage here is that we can have a standard kvm_tests.cfg that we all agree on and only rather small environment-specific modifications are made in the control file. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: Best choice for copy/clone/snapshot
Ross Boylan wrote: Thanks for all the info. I have one follow up. On Wed, 2009-05-13 at 10:07 +0300, Avi Kivity wrote: As I install software onto a system I want to preserve its state--just the disk state---at various points so I can go back. What is the best way to do this? LVM snapshots. Read up on the 'lvcreate -s' command and option. I may have been unclear. I meant as I install software on the VM. Since some of them are running Windows, they can't do LVM. I am running LVM on my host Linux system. Or are you suggesting that I put the image files on a snapshottable partition? Over time the snapshot seems likely to accumulate a lot of original sectors that don't involve the disk image I care about. Or do you mean I should back each virtual disk with an LVM volume? That does seem cleaner; I've just been following the docs and they use regular files. They say I can't just use a raw partition, but maybe kvm-img -f qcow2 /dev/MyVolumeGroup/Volume10 ? You can certainly use a raw partition, for example qemu-system-x86_64 -drive file=/dev/vg0/guest1,cache=none Does that give better performance? That is the highest performing option, especially with cache=none. The one drawback I see is that I'd have to really take the space I wanted, rather than having it only notionally reserved for a file. Yes, that's a drawback, and there's currently no way around it. I'm not sure how growing the logical volume would interact with qcow... It should work, but I wouldn't recommend it. -- error compiling committee.c: too many arguments to function -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: event injection MACROs
Dong, Eddie wrote: OK. Also back to Gleb's question, the reason I want to do that is to simplify event generation mechanism in current KVM. Today KVM use additional layer of exception/nmi/interrupt such as vcpu.arch.exception.pending, vcpu-arch.interrupt.pending vcpu-arch.nmi_injected. All those additional layer is due to compete of VM_ENTRY_INTR_INFO_FIELD write to inject the event. Both SVM VMX has only one resource to inject the virtual event but KVM generates 3 catagory of events in parallel which further requires additional logic to dictate among them. I thought of using a queue to hold all pending events (in a common format), sort it by priority, and inject the head. One example is that exception has higher priority than NMI/IRQ injection in current code which is not true in reality. I don't think it matters in practice, since the guest will see it as a timing issue. NMIs and IRQs are asynchronous (even those generated by the guest through the local APIC). Another issue is that an failed event from previous injection say IRQ or NMI may be discarded if an virtual exception happens in the EXIT handling now. With the patch of generic double fault handling, this case should be handled as normally. Discarding an exception is usually okay as it will be regenerated. I don't think we discard interrupts or NMIs. -- error compiling committee.c: too many arguments to function -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [ANNOUNCE] qemu-kvm-0.10.4
Mark McLoughlin wrote: - There will be no stable releases, as such, of the kernel module. You should use upstream linux releases instead - e.g. the latest stable release is 2.6.29.2 Actually, I do plan to release kvm-kmod-2.6.30 (and kvm-kmod-2.6.30.x). -- error compiling committee.c: too many arguments to function -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [KVM PATCH v7 2/3] kvm: add support for irqfd via eventfd-notification interface
Gregory Haskins wrote: KVM provides a complete virtual system environment for guests, including support for injecting interrupts modeled after the real exception/interrupt facilities present on the native platform (such as the IDT on x86). Virtual interrupts can come from a variety of sources (emulated devices, pass-through devices, etc) but all must be injected to the guest via the KVM infrastructure. This patch adds a new mechanism to inject a specific interrupt to a guest using a decoupled eventfd mechnanism: Any legal signal on the irqfd (using eventfd semantics from either userspace or kernel) will translate into an injected interrupt in the guest at the next available interrupt window. r = 1; break; case KVM_CAP_COALESCED_MMIO: diff --git a/include/linux/kvm.h b/include/linux/kvm.h index 3db5d8d..dfc4bcc 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -415,6 +415,7 @@ struct kvm_trace_rec { #define KVM_CAP_ASSIGN_DEV_IRQ 29 /* Another bug in KVM_SET_USER_MEMORY_REGION fixed: */ #define KVM_CAP_JOIN_MEMORY_REGIONS_WORKS 30 +#define KVM_CAP_EVENTFD 31 Let's keep a fine granularity and call it IRQFD. + +int +kvm_deassign_irqfd(struct kvm *kvm, int fd) +{ + struct _irqfd *irqfd, *tmp; + + mutex_lock(kvm-lock); + + /* +* linear search isn't brilliant, but this should be a infrequent +* operation and the list should not grow very large +*/ + list_for_each_entry_safe(irqfd, tmp, kvm-irqfds, list) { + if (irqfd-fd != fd) + continue; Please fget() the new fd and compare the filps; fds aren't meaningful in the kernel. You can also drop _irqfd::fd. It may also be useful to compare the gsi, this allows a make-before-break switchover: - guest reroutes irq to a different gsi - associate irqfd with new gsi - disassociate irqfd from old gsi + + irqfd_release(irqfd); + mutex_unlock(kvm-lock); + return 0; Don't return, userspace may have multiple associations? -- error compiling committee.c: too many arguments to function -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: kvm-autotest: The automation plans?
On Wed, May 13, 2009 at 11:30 PM, Michael Goldish mgold...@redhat.com wrote: - sudhir kumar smalik...@gmail.com wrote: Hi Uri/Lucas, Do you have any plans for enhancing kvm-autotest? I was looking mainly on the following 2 aspects: (1). we have standalone migration only. Is there any plans of enhancing kvm-autotest so that we can trigger migration while a workload is running? Something like this: Start a workload(may be n instances of it). let the test execute for some time. Trigger migration. Log into the target. Check if the migration is succesful Check if the test results are consistent. Yes, we have plans to implement such functionality. It shouldn't be hard, but we need to give it some thought in order to implement it as elegantly as possible. I completely agree here. (2). How can we run N parallel instances of a test? Will the current configuration be easily able to support it? I currently have some experimental patches that allow running of several parallel queues of tests. But what exactly do you mean by Please post them. N parallel instances of a test? Do you mean N queues? Please provide an example so I can get a better idea. I wanted a parallelism in 2 degrees. Let me try with an example. The following test only raw.*ide.*default.*smp2.*RHEL5.3.i386.*migrate.dbench is just one instance and will create one VM with given specifications and execute migrate and dbench. So I am thinking how can we trigger n similar tests execution in parallel. I feel job.parallel() is meant for that but is kvm_tests.cfg good enough to be used under such a scenario? However we have most of the stuff non static(as getting the free vnc port, etc) but still we have some variables which are static. For ex. vm name, migration port etc. So what are your thoughts on it. In this scenario my system will be having N VMs, all running the same set of testcases. On the other hand I was looking for something like this as well. only raw.*ide.*default.*smp2.*RHEL5.3.i386.*migrate.dbench.dbench_instancesN.bonnie Thus all the tests will be executed in normal way except dbench. There should be running N instances of dbench and when over simply run bonnie and exit. I hope my demand to kvm-autotest is not too much but for an effective and rigorous testing of kvm such a framework is necessary. I am bit new to autotest framework and have very little knowledge of the server side. I will start spending some time on looking at the available features. Hope I was clear this time. -- Sudhir Kumar -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
XP smp using a lot of CPU
I had a similar problem some weeks ago. Finally I found out that my VM running WinXP was working on a non-acpi system (maybe I started kvm with -no-acpi option during the installation). In the Device Manager there has to be the entry Computer-ACPI Multiprocessor PC. Otherwise the VM produced 100% real cpu load on my machines (the fans were running on highest speed level). I just started the WinXP installation in repair mode and this did fix the problem. I hope this helps! regards Johannes On Wed, May 13, 2009 at 2:41 AM, Ross Boylan r...@biostat.ucsf.edu wrote: I just installed XP into a new VM, specifying -smp 2 for the machine. According to top, it's using nearly 200% of a cpu even when I'm not doing anything. Is this real CPU useage, or just a reporting problem (just as my disk image is big according to ls, but isn't really)? If it's real, is there anything I can do about it? kvm 0.7.2 on Debian Lenny (but 2.6.29 kernel), amd64. Xeon chips; 32 bit version of XP pro installed, now fully patched (including the Windows Genuine Advantage stuff, though I cancelled it when it wanted to run). Task manager in XP shows virtually no CPU useage. Please cc me on responses. Thanks for any assistance. -- Ross Boylan wk: (415) 514-8146 185 Berry St #5700 r...@biostat.ucsf.edu Dept of Epidemiology and Biostatistics fax: (415) 514-8150 University of California, San Francisco San Francisco, CA 94107-1739 hm: (415) 550-1062 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: kvm-autotest: The automation plans?
On Thu, May 14, 2009 at 12:22 PM, jason wang jasow...@redhat.com wrote: sudhir kumar 写道: Hi Uri/Lucas, Do you have any plans for enhancing kvm-autotest? I was looking mainly on the following 2 aspects: (1). we have standalone migration only. Is there any plans of enhancing kvm-autotest so that we can trigger migration while a workload is running? Something like this: Start a workload(may be n instances of it). let the test execute for some time. Trigger migration. Log into the target. Check if the migration is succesful Check if the test results are consistent. We have some patches of ping pong migration and workload adding. The migration is based on public bridge and workload adding is based on running benchmark in the background of guest. Cool. I would like to have look on them. So how do you manage the background process/thread? (2). How can we run N parallel instances of a test? Will the current configuration be easily able to support it? Please provide your thoughts on the above features. The parallelized instances could be easily achieved through job.parallel() of autotest framework, and that is what we have used in our tests. We have make some helper routines such as get_free_port to be reentrant through file lock. We've implemented following test cases: timedrift(already sent here), savevm/loadvm, suspend/resume, jumboframe, migration between two machines and others. We will sent it here for review in the following weeks. There are some other things could be improved: 1) Current kvm_test.cfg.sample/kvm_test.cfg is transparent to autotest server UI. This would make it hard to configure the tests in the server side. During our test, we have merged it into control and make it could be configured by editing control file function of autotest server side web UI. Not much clue here. But I would like to keep the control file as simple as possible and as much independent of test scenarios as possible. kvm_tests.cfg should be the right file untill and unless it is impossible to do by using it. 2) Public bridge support: I've sent a patch(TAP network support in kvm-autotest), this patch needs external DHCP server and requires nmap support. I don't know whether the method of original kvm_runtes_old(DHCP server of private bridge) is preferable. The old approach is better. All might not be able to run an external DHCP server for running the test. I do not see any issue with the old approach. -- Sudhir Kumar -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH][KVM-AUTOTEST] TAP network support in kvm-autotest
Hi Jason, We already have patches that implement similar functionality here in TLV, as mentioned in the to-do list (item #4 under 'Framework'). They're not yet committed upstream because they're still quite fresh. Still, your patch looks good and is quite similar to mine. The main difference is that I use MAC/IP address pools specified by the user, instead of random MACs with arp/nmap to detect the matching IP addresses. I will post my patch to the mailing list soon, but it will come together with quite a few other patches that I haven't posted yet, so please be patient. Comments/questions: Why do you use nmap in addition to arp? In what cases will arp not suffice? I'm a little put off by the fact that nmap imposes an additional requirement on the host. Three hosts I've tried don't come with nmap installed by default. Please see additional comments below. - Jason Wang jasow...@redhat.com wrote: Hi All: This patch tries to add tap network support in kvm-autotest. Multiple nics connected to different bridges could be achieved through this script. Public bridge is important for testing real network traffic and migration. The patch gives each nic with randomly generated mac address. The ip address required in the test could be dynamically probed through nmap/arp. Only the ip address of first NIC is used through the test. Example: nics = nic1 nic2 network = bridge bridge = switch ifup =/etc/qemu-ifup-switch ifdown =/etc/qemu-ifdown-switch This would make the virtual machine have two nics both of which are connected to a bridge with the name of 'switch'. Ifup/ifdown scripts are also specified. Another Example: nics = nic1 nic2 network = bridge bridge = switch bridge_nic2 = virbr0 ifup =/etc/qemu-ifup-switch ifup_nic2 = /etc/qemu-ifup-virbr0 This would makes the virtual machine have two nics: nic1 are connected to bridge 'switch' and nci2 are connected to bridge 'virbr0'. Public mode and user mode nic could also be mixed: nics = nic1 nic2 network = bridge network_nic2 = user Looking forward for comments and suggestions. From: jason jasow...@redhat.com Date: Wed, 13 May 2009 16:15:28 +0800 Subject: [PATCH] Add tap networking support. --- client/tests/kvm_runtest_2/kvm_utils.py |7 +++ client/tests/kvm_runtest_2/kvm_vm.py| 74 ++- 2 files changed, 69 insertions(+), 12 deletions(-) diff --git a/client/tests/kvm_runtest_2/kvm_utils.py b/client/tests/kvm_runtest_2/kvm_utils.py index be8ad95..0d1f7f8 100644 --- a/client/tests/kvm_runtest_2/kvm_utils.py +++ b/client/tests/kvm_runtest_2/kvm_utils.py @@ -773,3 +773,10 @@ def md5sum_file(filename, size=None): size -= len(data) f.close() return o.hexdigest() + +def random_mac(): +mac=[0x00,0x16,0x30, + random.randint(0x00,0x09), + random.randint(0x00,0x09), + random.randint(0x00,0x09)] +return ':'.join(map(lambda x: %02x %x,mac)) Random MAC addresses will not necessarily work everywhere, as far as I know. That's why I prefer user specified MAC/IP address ranges. diff --git a/client/tests/kvm_runtest_2/kvm_vm.py b/client/tests/kvm_runtest_2/kvm_vm.py index fab839f..ea7dab6 100644 --- a/client/tests/kvm_runtest_2/kvm_vm.py +++ b/client/tests/kvm_runtest_2/kvm_vm.py @@ -105,6 +105,10 @@ class VM: self.qemu_path = qemu_path self.image_dir = image_dir self.iso_dir = iso_dir +self.macaddr = [] +for nic_name in kvm_utils.get_sub_dict_names(params,nics): +macaddr = kvm_utils.random_mac() +self.macaddr.append(macaddr) def verify_process_identity(self): Make sure .pid really points to the original qemu process. @@ -189,9 +193,25 @@ class VM: for nic_name in kvm_utils.get_sub_dict_names(params, nics): nic_params = kvm_utils.get_sub_dict(params, nic_name) qemu_cmd += -net nic,vlan=%d % vlan +net = nic_params.get(network) +if net == bridge: +qemu_cmd += ,macaddr=%s % self.macaddr[vlan] if nic_params.get(nic_model): qemu_cmd += ,model=%s % nic_params.get(nic_model) -qemu_cmd += -net user,vlan=%d % vlan +if net == bridge: +qemu_cmd += -net tap,vlan=%d % vlan +ifup = nic_params.get(ifup) +if ifup: +qemu_cmd += ,script=%s % ifup +else: +qemu_cmd += ,script=/etc/qemu-ifup Why not just leave 'script' out if the user doesn't specify 'ifup'? There's no good reason to prefer /etc/qemu-ifup to /etc/kvm-ifup or anything else, so I think it's best to leave it up to qemu if the user has no preference. It's also slightly shorter. +ifdown = nic_params.get(ifdown) +if ifdown: +qemu_cmd += ,downscript=%s % ifdown +
Re: kvm-autotest: The automation plans?
- sudhir kumar smalik...@gmail.com wrote: On Thu, May 14, 2009 at 12:22 PM, jason wang jasow...@redhat.com wrote: sudhir kumar 写道: Hi Uri/Lucas, Do you have any plans for enhancing kvm-autotest? I was looking mainly on the following 2 aspects: (1). we have standalone migration only. Is there any plans of enhancing kvm-autotest so that we can trigger migration while a workload is running? Something like this: Start a workload(may be n instances of it). let the test execute for some time. Trigger migration. Log into the target. Check if the migration is succesful Check if the test results are consistent. We have some patches of ping pong migration and workload adding. The migration is based on public bridge and workload adding is based on running benchmark in the background of guest. Cool. I would like to have look on them. So how do you manage the background process/thread? (2). How can we run N parallel instances of a test? Will the current configuration be easily able to support it? Please provide your thoughts on the above features. The parallelized instances could be easily achieved through job.parallel() of autotest framework, and that is what we have used in our tests. We have make some helper routines such as get_free_port to be reentrant through file lock. We've implemented following test cases: timedrift(already sent here), savevm/loadvm, suspend/resume, jumboframe, migration between two machines and others. We will sent it here for review in the following weeks. There are some other things could be improved: 1) Current kvm_test.cfg.sample/kvm_test.cfg is transparent to autotest server UI. This would make it hard to configure the tests in the server side. During our test, we have merged it into control and make it could be configured by editing control file function of autotest server side web UI. Not much clue here. But I would like to keep the control file as simple as possible and as much independent of test scenarios as possible. kvm_tests.cfg should be the right file untill and unless it is impossible to do by using it. 2) Public bridge support: I've sent a patch(TAP network support in kvm-autotest), this patch needs external DHCP server and requires nmap support. I don't know whether the method of original kvm_runtes_old(DHCP server of private bridge) is preferable. The old approach is better. All might not be able to run an external DHCP server for running the test. I do not see any issue with the old approach. We're taking more of a minimalist approach in kvm_runtest_2: the framework should handle only the things directly related to testing. Configuring and running a DHCP server is and should be beyond the scope of the KVM-Autotest framework. To emulate the old behavior, you can just start the DHCP server yourself locally. If you wish, maybe we can bundle example scripts with the framework that will do this for the user, but they should not be an integral part of the framework in my opinion. -- Sudhir Kumar -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCHv6 0/4] virtio: MSI-X support
Here's the latest draft of virtio patches. This is on top of Rusty's recent virtqueue list + name patch, which is included in series for completeness. Changelog: changes since v5: fix build on s390 (only patch 2/4 modified) Michael S. Tsirkin (3): virtio: find_vqs/del_vqs virtio operations virtio_pci: split up vp_interrupt virtio_pci: optional MSI-X support Rusty Russell (1): virtio: add names to virtqueue struct, mapping from devices to queues. drivers/block/virtio_blk.c |6 +- drivers/char/hw_random/virtio-rng.c |6 +- drivers/char/virtio_console.c | 26 ++-- drivers/lguest/lguest_device.c | 41 +- drivers/net/virtio_net.c| 45 ++--- drivers/s390/kvm/kvm_virtio.c | 43 +- drivers/virtio/virtio.c |2 + drivers/virtio/virtio_balloon.c | 27 ++-- drivers/virtio/virtio_pci.c | 306 ++- drivers/virtio/virtio_ring.c| 25 +++- include/linux/virtio.h | 12 +- include/linux/virtio_config.h | 45 - include/linux/virtio_pci.h | 10 +- include/linux/virtio_ring.h |3 +- net/9p/trans_virtio.c |2 +- 15 files changed, 465 insertions(+), 134 deletions(-) -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCHv6 1/4] virtio: add names to virtqueue struct, mapping from devices to queues.
From: Rusty Russell ru...@rustcorp.com.au Add a linked list of all virtqueues for a virtio device: this helps for debugging and is also needed for upcoming interface change. Also, add a name field for clearer debug messages. Signed-off-by: Rusty Russell ru...@rustcorp.com.au --- including this Rusty's patch here for completeness. drivers/block/virtio_blk.c |2 +- drivers/char/hw_random/virtio-rng.c |2 +- drivers/char/virtio_console.c |4 ++-- drivers/lguest/lguest_device.c |5 +++-- drivers/net/virtio_net.c|6 +++--- drivers/s390/kvm/kvm_virtio.c |7 --- drivers/virtio/virtio.c |2 ++ drivers/virtio/virtio_balloon.c |4 ++-- drivers/virtio/virtio_pci.c |5 +++-- drivers/virtio/virtio_ring.c| 25 +++-- include/linux/virtio.h | 12 include/linux/virtio_config.h |6 -- include/linux/virtio_ring.h |3 ++- net/9p/trans_virtio.c |2 +- 14 files changed, 55 insertions(+), 30 deletions(-) diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 5d34764..8f7c956 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -224,7 +224,7 @@ static int virtblk_probe(struct virtio_device *vdev) sg_init_table(vblk-sg, vblk-sg_elems); /* We expect one virtqueue, for output. */ - vblk-vq = vdev-config-find_vq(vdev, 0, blk_done); + vblk-vq = vdev-config-find_vq(vdev, 0, blk_done, requests); if (IS_ERR(vblk-vq)) { err = PTR_ERR(vblk-vq); goto out_free_vblk; diff --git a/drivers/char/hw_random/virtio-rng.c b/drivers/char/hw_random/virtio-rng.c index 86e83f8..2aeafce 100644 --- a/drivers/char/hw_random/virtio-rng.c +++ b/drivers/char/hw_random/virtio-rng.c @@ -94,7 +94,7 @@ static int virtrng_probe(struct virtio_device *vdev) int err; /* We expect a single virtqueue. */ - vq = vdev-config-find_vq(vdev, 0, random_recv_done); + vq = vdev-config-find_vq(vdev, 0, random_recv_done, input); if (IS_ERR(vq)) return PTR_ERR(vq); diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c index ff6f5a4..58684e4 100644 --- a/drivers/char/virtio_console.c +++ b/drivers/char/virtio_console.c @@ -202,13 +202,13 @@ static int __devinit virtcons_probe(struct virtio_device *dev) /* Find the input queue. */ /* FIXME: This is why we want to wean off hvc: we do nothing * when input comes in. */ - in_vq = vdev-config-find_vq(vdev, 0, hvc_handle_input); + in_vq = vdev-config-find_vq(vdev, 0, hvc_handle_input, input); if (IS_ERR(in_vq)) { err = PTR_ERR(in_vq); goto free; } - out_vq = vdev-config-find_vq(vdev, 1, NULL); + out_vq = vdev-config-find_vq(vdev, 1, NULL, output); if (IS_ERR(out_vq)) { err = PTR_ERR(out_vq); goto free_in_vq; diff --git a/drivers/lguest/lguest_device.c b/drivers/lguest/lguest_device.c index df44d96..4babed8 100644 --- a/drivers/lguest/lguest_device.c +++ b/drivers/lguest/lguest_device.c @@ -228,7 +228,8 @@ extern void lguest_setup_irq(unsigned int irq); * function. */ static struct virtqueue *lg_find_vq(struct virtio_device *vdev, unsigned index, - void (*callback)(struct virtqueue *vq)) + void (*callback)(struct virtqueue *vq), + const char *name) { struct lguest_device *ldev = to_lgdev(vdev); struct lguest_vq_info *lvq; @@ -263,7 +264,7 @@ static struct virtqueue *lg_find_vq(struct virtio_device *vdev, /* OK, tell virtio_ring.c to set up a virtqueue now we know its size * and we've got a pointer to its pages. */ vq = vring_new_virtqueue(lvq-config.num, LGUEST_VRING_ALIGN, -vdev, lvq-pages, lg_notify, callback); +vdev, lvq-pages, lg_notify, callback, name); if (!vq) { err = -ENOMEM; goto unmap; diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 4d1d479..be3b734 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -906,20 +906,20 @@ static int virtnet_probe(struct virtio_device *vdev) vi-mergeable_rx_bufs = true; /* We expect two virtqueues, receive then send. */ - vi-rvq = vdev-config-find_vq(vdev, 0, skb_recv_done); + vi-rvq = vdev-config-find_vq(vdev, 0, skb_recv_done, input); if (IS_ERR(vi-rvq)) { err = PTR_ERR(vi-rvq); goto free; } - vi-svq = vdev-config-find_vq(vdev, 1, skb_xmit_done); + vi-svq = vdev-config-find_vq(vdev, 1, skb_xmit_done, output); if (IS_ERR(vi-svq)) {
[PATCHv6 4/4] virtio_pci: optional MSI-X support
This implements optional MSI-X support in virtio_pci. MSI-X is used whenever the host supports at least 2 MSI-X vectors: 1 for configuration changes and 1 for virtqueues. Per-virtqueue vectors are allocated if enough vectors available. Signed-off-by: Michael S. Tsirkin m...@redhat.com --- drivers/virtio/virtio_pci.c | 227 +++ include/linux/virtio_pci.h | 10 ++- 2 files changed, 217 insertions(+), 20 deletions(-) diff --git a/drivers/virtio/virtio_pci.c b/drivers/virtio/virtio_pci.c index 951e673..65627a4 100644 --- a/drivers/virtio/virtio_pci.c +++ b/drivers/virtio/virtio_pci.c @@ -42,6 +42,26 @@ struct virtio_pci_device /* a list of queues so we can dispatch IRQs */ spinlock_t lock; struct list_head virtqueues; + + /* MSI-X support */ + int msix_enabled; + int intx_enabled; + struct msix_entry *msix_entries; + /* Name strings for interrupts. This size should be enough, +* and I'm too lazy to allocate each name separately. */ + char (*msix_names)[256]; + /* Number of available vectors */ + unsigned msix_vectors; + /* Vectors allocated */ + unsigned msix_used_vectors; +}; + +/* Constants for MSI-X */ +/* Use first vector for configuration changes, second and the rest for + * virtqueues Thus, we need at least 2 vectors for MSI. */ +enum { + VP_MSIX_CONFIG_VECTOR = 0, + VP_MSIX_VQ_VECTOR = 1, }; struct virtio_pci_vq_info @@ -60,6 +80,9 @@ struct virtio_pci_vq_info /* the list node for the virtqueues list */ struct list_head node; + + /* MSI-X vector (or none) */ + unsigned vector; }; /* Qumranet donated their vendor ID for devices 0x1000 thru 0x10FF. */ @@ -109,7 +132,8 @@ static void vp_get(struct virtio_device *vdev, unsigned offset, void *buf, unsigned len) { struct virtio_pci_device *vp_dev = to_vp_device(vdev); - void __iomem *ioaddr = vp_dev-ioaddr + VIRTIO_PCI_CONFIG + offset; + void __iomem *ioaddr = vp_dev-ioaddr + + VIRTIO_PCI_CONFIG(vp_dev) + offset; u8 *ptr = buf; int i; @@ -123,7 +147,8 @@ static void vp_set(struct virtio_device *vdev, unsigned offset, const void *buf, unsigned len) { struct virtio_pci_device *vp_dev = to_vp_device(vdev); - void __iomem *ioaddr = vp_dev-ioaddr + VIRTIO_PCI_CONFIG + offset; + void __iomem *ioaddr = vp_dev-ioaddr + + VIRTIO_PCI_CONFIG(vp_dev) + offset; const u8 *ptr = buf; int i; @@ -221,7 +246,121 @@ static irqreturn_t vp_interrupt(int irq, void *opaque) return vp_vring_interrupt(irq, opaque); } -/* the config-find_vq() implementation */ +static void vp_free_vectors(struct virtio_device *vdev) { + struct virtio_pci_device *vp_dev = to_vp_device(vdev); + int i; + + if (vp_dev-intx_enabled) { + free_irq(vp_dev-pci_dev-irq, vp_dev); + vp_dev-intx_enabled = 0; + } + + for (i = 0; i vp_dev-msix_used_vectors; ++i) + free_irq(vp_dev-msix_entries[i].vector, vp_dev); + vp_dev-msix_used_vectors = 0; + + if (vp_dev-msix_enabled) { + /* Disable the vector used for configuration */ + iowrite16(VIRTIO_MSI_NO_VECTOR, + vp_dev-ioaddr + VIRTIO_MSI_CONFIG_VECTOR); + /* Flush the write out to device */ + ioread16(vp_dev-ioaddr + VIRTIO_MSI_CONFIG_VECTOR); + + vp_dev-msix_enabled = 0; + pci_disable_msix(vp_dev-pci_dev); + } +} + +static int vp_enable_msix(struct pci_dev *dev, struct msix_entry *entries, + int *options, int noptions) +{ + int i; + for (i = 0; i noptions; ++i) + if (!pci_enable_msix(dev, entries, options[i])) + return options[i]; + return -EBUSY; +} + +static int vp_request_vectors(struct virtio_device *vdev, unsigned max_vqs) +{ + struct virtio_pci_device *vp_dev = to_vp_device(vdev); + const char *name = dev_name(vp_dev-vdev.dev); + unsigned i, v; + int err = -ENOMEM; + /* We want at most one vector per queue and one for config changes. +* Fallback to separate vectors for config and a shared for queues. +* Finally fall back to regular interrupts. */ + int options[] = { max_vqs + 1, 2 }; + int nvectors = max(options[0], options[1]); + + vp_dev-msix_entries = kmalloc(nvectors * sizeof *vp_dev-msix_entries, + GFP_KERNEL); + if (!vp_dev-msix_entries) + goto error_entries; + vp_dev-msix_names = kmalloc(nvectors * sizeof *vp_dev-msix_names, +GFP_KERNEL); + if (!vp_dev-msix_names) + goto error_names; + + for (i = 0; i nvectors; ++i) +
Re: [KVM PATCH v7 3/3] kvm: add iofd support
Gregory Haskins wrote: iofd is a mechanism to register PIO/MMIO regions to trigger an eventfd signal when written to. Userspace can register any arbitrary address with a corresponding eventfd. Please start a separate patchset for this so I can merge irqfd. diff --git a/include/linux/kvm.h b/include/linux/kvm.h index dfc4bcc..99b6e45 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -292,6 +292,17 @@ struct kvm_guest_debug { struct kvm_guest_debug_arch arch; }; +#define KVM_IOFD_FLAG_DEASSIGN (1 0) +#define KVM_IOFD_FLAG_PIO (1 1) + +struct kvm_iofd { + __u64 addr; + __u32 len; + __u32 fd; + __u32 flags; + __u8 pad[12]; +}; + Please add a data match capability. virtio uses a write with the data containing the queue ID, and we want a separate event for each queue. * kvm trace categories @@ -508,6 +519,7 @@ struct kvm_irqfd { #define KVM_DEASSIGN_DEV_IRQ _IOW(KVMIO, 0x75, struct kvm_assigned_irq) #define KVM_ASSIGN_IRQFD _IOW(KVMIO, 0x76, struct kvm_irqfd) #define KVM_DEASSIGN_IRQFD _IOW(KVMIO, 0x77, __u32) +#define KVM_IOFD _IOW(KVMIO, 0x78, struct kvm_iofd) Too general a name. It's not doing IO, just sending out notifications. Why have assign/deassign for irqfd and a single ioctl for iofd? The rest looks good. -- error compiling committee.c: too many arguments to function -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: kvm-autotest: The automation plans?
- sudhir kumar smalik...@gmail.com wrote: On Wed, May 13, 2009 at 11:30 PM, Michael Goldish mgold...@redhat.com wrote: - sudhir kumar smalik...@gmail.com wrote: Hi Uri/Lucas, Do you have any plans for enhancing kvm-autotest? I was looking mainly on the following 2 aspects: (1). we have standalone migration only. Is there any plans of enhancing kvm-autotest so that we can trigger migration while a workload is running? Something like this: Start a workload(may be n instances of it). let the test execute for some time. Trigger migration. Log into the target. Check if the migration is succesful Check if the test results are consistent. Yes, we have plans to implement such functionality. It shouldn't be hard, but we need to give it some thought in order to implement it as elegantly as possible. I completely agree here. (2). How can we run N parallel instances of a test? Will the current configuration be easily able to support it? I currently have some experimental patches that allow running of several parallel queues of tests. But what exactly do you mean by Please post them. N parallel instances of a test? Do you mean N queues? Please provide an example so I can get a better idea. I wanted a parallelism in 2 degrees. Let me try with an example. The following test only raw.*ide.*default.*smp2.*RHEL5.3.i386.*migrate.dbench is just one instance and will create one VM with given specifications and execute migrate and dbench. So I am thinking how can we trigger n similar tests execution in parallel. I feel job.parallel() is meant for that but is kvm_tests.cfg good enough to be used under such a scenario? However we have most of the stuff non static(as getting the free vnc port, etc) but still we have some variables which are static. For ex. vm name, migration port etc. So what are your thoughts on it. I think generally kvm_tests.cfg is flexible enough, and can easily be modified to define whatever you like. Note, however, that the config file parser module is only responsible for producing a list of dictionaries which define the tests to run. It doesn't care much about parallelism -- this is up to the control file and the rest of the framework. If you're not familiar with the format of config files, please refer to http://www.linux-kvm.org/page/KVM-Autotest/Test_Config_File and http://www.linux-kvm.org/page/KVM-Autotest/Parameters In this scenario my system will be having N VMs, all running the same set of testcases. I thought you said one VM running migrate and dbench in parallel. I'm not sure I follow. On the other hand I was looking for something like this as well. only raw.*ide.*default.*smp2.*RHEL5.3.i386.*migrate.dbench.dbench_instancesN.bonnie Thus all the tests will be executed in normal way except dbench. There should be running N instances of dbench and when over simply run bonnie and exit. This seems like two tests to me: dbench with dbench (several instances), and then another unrelated bonnie test. Also note that the variants you select with 'only' must be defined before they can be selected. Look at the examples in the wiki as well as real config files. I hope my demand to kvm-autotest is not too much but for an effective and rigorous testing of kvm such a framework is necessary. I am bit new to autotest framework and have very little knowledge of the server side. I will start spending some time on looking at the available features. Hope I was clear this time. Regarding parallelism: Generally two types can be implemented. 1. Several independent test execution queues: in this case there are several queues that don't interfere with each other. Each queue works with its own VMs. This is useful for saving time by running tests in parallel on capable hosts. This can be implemented using job.parallel() and is already running in TLV. I will try to post the patches soon. This can probably also be implemented from the server, if it can treat a single physical host as if it were several, thus running several independent copies of the Autotest client on it. 2. Several tests on a single VM, which is what you were referring to, if I understood correctly: in this case several threads work with the same VMs and abuse them in parallel -- one thread can run dbench while the other runs migration on the same VM. This is possible using threads, and the syntax in the config file can be something like 'types = dbench migration' instead of what we currently use -- 'type = dbench'. However, we have to think whether we really just want to run tests in parallel. In the migration-dbench case, for example, we'd like to make sure dbench starts running before we migrate. So maybe it's wiser to just run some load inside the migration test, instead of the dbench test. We should carefully consider all options. Thanks, Michael -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a
Re: [KVM PATCH v7 2/3] kvm: add support for irqfd via eventfd-notification interface
Gregory Haskins wrote: KVM provides a complete virtual system environment for guests, including support for injecting interrupts modeled after the real exception/interrupt facilities present on the native platform (such as the IDT on x86). Virtual interrupts can come from a variety of sources (emulated devices, pass-through devices, etc) but all must be injected to the guest via the KVM infrastructure. This patch adds a new mechanism to inject a specific interrupt to a guest using a decoupled eventfd mechnanism: Any legal signal on the irqfd (using eventfd semantics from either userspace or kernel) will translate into an injected interrupt in the guest at the next available interrupt window. + +static void +irqfd_inject(struct work_struct *work) +{ + struct _irqfd *irqfd = container_of(work, struct _irqfd, work); + struct kvm *kvm = irqfd-kvm; + I think you need to -read() from the irqfd, otherwise the count will never clear. + mutex_lock(kvm-lock); + kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd-gsi, 1); + kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd-gsi, 0); + mutex_unlock(kvm-lock); +} -- error compiling committee.c: too many arguments to function -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [KVM PATCH v7 2/3] kvm: add support for irqfd via eventfd-notification interface
Avi Kivity wrote: Gregory Haskins wrote: KVM provides a complete virtual system environment for guests, including support for injecting interrupts modeled after the real exception/interrupt facilities present on the native platform (such as the IDT on x86). Virtual interrupts can come from a variety of sources (emulated devices, pass-through devices, etc) but all must be injected to the guest via the KVM infrastructure. This patch adds a new mechanism to inject a specific interrupt to a guest using a decoupled eventfd mechnanism: Any legal signal on the irqfd (using eventfd semantics from either userspace or kernel) will translate into an injected interrupt in the guest at the next available interrupt window. r = 1; break; case KVM_CAP_COALESCED_MMIO: diff --git a/include/linux/kvm.h b/include/linux/kvm.h index 3db5d8d..dfc4bcc 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -415,6 +415,7 @@ struct kvm_trace_rec { #define KVM_CAP_ASSIGN_DEV_IRQ 29 /* Another bug in KVM_SET_USER_MEMORY_REGION fixed: */ #define KVM_CAP_JOIN_MEMORY_REGIONS_WORKS 30 +#define KVM_CAP_EVENTFD 31 Let's keep a fine granularity and call it IRQFD. Yeah, the iofd stuff is still immature and is not likely to be ready at the same time anyway. The CAP bits are cheap enough as it is, so not sure what I was thinking. Will fix. + +int +kvm_deassign_irqfd(struct kvm *kvm, int fd) +{ +struct _irqfd *irqfd, *tmp; + +mutex_lock(kvm-lock); + +/* + * linear search isn't brilliant, but this should be a infrequent + * operation and the list should not grow very large + */ +list_for_each_entry_safe(irqfd, tmp, kvm-irqfds, list) { +if (irqfd-fd != fd) +continue; Please fget() the new fd and compare the filps; fds aren't meaningful in the kernel. You can also drop _irqfd::fd. I like this as a second option... It may also be useful to compare the gsi, this allows a make-before-break switchover: ...but I like this best. Good idea. - guest reroutes irq to a different gsi - associate irqfd with new gsi - disassociate irqfd from old gsi + +irqfd_release(irqfd); +mutex_unlock(kvm-lock); +return 0; Don't return, userspace may have multiple associations? Parse error. Can you elaborate? -Greg signature.asc Description: OpenPGP digital signature
Re: [KVM PATCH v7 3/3] kvm: add iofd support
Avi Kivity wrote: Gregory Haskins wrote: iofd is a mechanism to register PIO/MMIO regions to trigger an eventfd signal when written to. Userspace can register any arbitrary address with a corresponding eventfd. Please start a separate patchset for this so I can merge irqfd. Ack. Will spin a new split series with your irqfd review changes diff --git a/include/linux/kvm.h b/include/linux/kvm.h index dfc4bcc..99b6e45 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -292,6 +292,17 @@ struct kvm_guest_debug { struct kvm_guest_debug_arch arch; }; +#define KVM_IOFD_FLAG_DEASSIGN (1 0) +#define KVM_IOFD_FLAG_PIO (1 1) + +struct kvm_iofd { +__u64 addr; +__u32 len; +__u32 fd; +__u32 flags; +__u8 pad[12]; +}; + Please add a data match capability. virtio uses a write with the data containing the queue ID, and we want a separate event for each queue. How about u64 cookie ? * kvm trace categories @@ -508,6 +519,7 @@ struct kvm_irqfd { #define KVM_DEASSIGN_DEV_IRQ _IOW(KVMIO, 0x75, struct kvm_assigned_irq) #define KVM_ASSIGN_IRQFD _IOW(KVMIO, 0x76, struct kvm_irqfd) #define KVM_DEASSIGN_IRQFD _IOW(KVMIO, 0x77, __u32) +#define KVM_IOFD _IOW(KVMIO, 0x78, struct kvm_iofd) Too general a name. It's not doing IO, just sending out notifications. Hmm...good point. I was trying to reflect [MM/P]IO-FD. How about IOSIGNALFD Why have assign/deassign for irqfd and a single ioctl for iofd? Heh.. irqfd liked two because the deassign only needed a u32. iofd needed more or less the same structure for both so I guess I thought I would be slick and condense the vectors. Will fix so they are symmetrical. The rest looks good. I will also submit a patch to fix the io_bus stuff so that registrations can gracefully fail instead of BUG_ON(), and to provide an unregister function. Thanks Avi, -Greg signature.asc Description: OpenPGP digital signature
Re: [PATCH] don't use a 32-bit bit type as offset argument.
Glauber Costa wrote: In the call path of kvm_get_dirty_pages_log_range(), its caller kvm_get_dirty_bitmap_cb() passes the target_phys_addr_t both as start_addr and the offset. So, using int will make dirty tracking over 4G fail completely. Applied, thanks. Of course we should be using qemu types in here, so please don't get me started on this. The whole file is wrong already ;) These callbacks are called from libkvm, which doesn't know much about qemu. -- error compiling committee.c: too many arguments to function -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [KVM PATCH v7 2/3] kvm: add support for irqfd via eventfd-notification interface
Gregory Haskins wrote: Please fget() the new fd and compare the filps; fds aren't meaningful in the kernel. You can also drop _irqfd::fd. I like this as a second option... It may also be useful to compare the gsi, this allows a make-before-break switchover: ...but I like this best. Good idea. I thought of comparing both. - guest reroutes irq to a different gsi - associate irqfd with new gsi - disassociate irqfd from old gsi + +irqfd_release(irqfd); +mutex_unlock(kvm-lock); +return 0; Don't return, userspace may have multiple associations? Parse error. Can you elaborate? You break out of the look when you match your irqfd. But there may be multiple matches. Granted, it doesn't make much sense to hook the same fd to the same gsi multiple times (it may make sense to hook multiple fds to a single gsi, or maybe a single fd to multiple gsis), but it pays to have a consistent do-what-I-said-even-if-it-doesn't-make-sense interface. -- error compiling committee.c: too many arguments to function -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [KVM PATCH v7 3/3] kvm: add iofd support
Gregory Haskins wrote: +#define KVM_IOFD_FLAG_PIO (1 1) + +struct kvm_iofd { +__u64 addr; +__u32 len; +__u32 fd; +__u32 flags; +__u8 pad[12]; +}; + Please add a data match capability. virtio uses a write with the data containing the queue ID, and we want a separate event for each queue. How about u64 cookie ? Sure, and a bit in flags to enable it. * kvm trace categories @@ -508,6 +519,7 @@ struct kvm_irqfd { #define KVM_DEASSIGN_DEV_IRQ _IOW(KVMIO, 0x75, struct kvm_assigned_irq) #define KVM_ASSIGN_IRQFD _IOW(KVMIO, 0x76, struct kvm_irqfd) #define KVM_DEASSIGN_IRQFD _IOW(KVMIO, 0x77, __u32) +#define KVM_IOFD _IOW(KVMIO, 0x78, struct kvm_iofd) Too general a name. It's not doing IO, just sending out notifications. Hmm...good point. I was trying to reflect [MM/P]IO-FD. How about IOSIGNALFD Okay. Why have assign/deassign for irqfd and a single ioctl for iofd? Heh.. irqfd liked two because the deassign only needed a u32. iofd needed more or less the same structure for both so I guess I thought I would be slick and condense the vectors. Will fix so they are symmetrical. Yeah. You could have both use just one, or both use two. Not sure which is better. -- error compiling committee.c: too many arguments to function -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: i8042.c: No controller found - no keyboard when I type in BIOS
Tomasz Chmielewski schrieb: The keyboard is not present after I reboot the guest and usually type before Linux is started. It does not happen always. Observed with kvm-83, kvm-84, kvm-85 on multiple KVM hosts (different hardware). Anyone else seeing this? If you're not sure, do something like: Looks I'm not alone here with this issue: http://osdir.com/ml/fedora-virt/2009-04/msg00066.html -- Tomasz Chmielewski http://wpkg.org -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [KVM PATCH v7 2/3] kvm: add support for irqfd via eventfd-notification interface
Avi Kivity wrote: Gregory Haskins wrote: Please fget() the new fd and compare the filps; fds aren't meaningful in the kernel. You can also drop _irqfd::fd. I like this as a second option... It may also be useful to compare the gsi, this allows a make-before-break switchover: ...but I like this best. Good idea. I thought of comparing both. Ah, ok. I misunderstood. We can do that. - guest reroutes irq to a different gsi - associate irqfd with new gsi - disassociate irqfd from old gsi + +irqfd_release(irqfd); +mutex_unlock(kvm-lock); +return 0; Don't return, userspace may have multiple associations? Parse error. Can you elaborate? You break out of the look when you match your irqfd. But there may be multiple matches. Granted, it doesn't make much sense to hook the same fd to the same gsi multiple times (it may make sense to hook multiple fds to a single gsi, or maybe a single fd to multiple gsis), but it pays to have a consistent do-what-I-said-even-if-it-doesn't-make-sense interface. Ack, will do. -Greg signature.asc Description: OpenPGP digital signature
Re: i8042.c: No controller found - no keyboard when I type in BIOS
Tomasz Chmielewski schrieb: Tomasz Chmielewski schrieb: The keyboard is not present after I reboot the guest and usually type before Linux is started. It does not happen always. Observed with kvm-83, kvm-84, kvm-85 on multiple KVM hosts (different hardware). Anyone else seeing this? If you're not sure, do something like: Looks I'm not alone here with this issue: http://osdir.com/ml/fedora-virt/2009-04/msg00066.html Seems to be qemu-related problem (I found more confirmations in the internet); reposting question to qemu-devel list. -- Tomasz Chmielewski http://wpkg.org -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
RE: event injection MACROs
Avi Kivity wrote: Dong, Eddie wrote: OK. Also back to Gleb's question, the reason I want to do that is to simplify event generation mechanism in current KVM. Today KVM use additional layer of exception/nmi/interrupt such as vcpu.arch.exception.pending, vcpu-arch.interrupt.pending vcpu-arch.nmi_injected. All those additional layer is due to compete of VM_ENTRY_INTR_INFO_FIELD write to inject the event. Both SVM VMX has only one resource to inject the virtual event but KVM generates 3 catagory of events in parallel which further requires additional logic to dictate among them. I thought of using a queue to hold all pending events (in a common format), sort it by priority, and inject the head. The SDM Table 5-4 requires to merge 2 events together, i.e. convert to #DF/ Triple fault or inject serially when 2 events happens no matter NMI, IRQ or exception. As if considering above events merging activity, that is a single element queue. We could have either: 1) A pure SW queue that will be flush to HW register later (VM_ENTRY_INTR_INFO_FIELD), 2) Direct use HW register. A potential benefit is that it can avoid duplicated code and potential bugs in current code as following patch shows if I understand correctly: --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -2599,7 +2599,7 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) cr2 = vmcs_readl(EXIT_QUALIFICATION); KVMTRACE_3D(PAGE_FAULT, vcpu, error_code, (u32)cr2, (u32)((u64)cr2 32), handler); - if (vcpu-arch.interrupt.pending || vcpu-arch.exception.pending ) + if (vcpu-arch.interrupt.pending || vcpu-arch.exception.pending || vcpu-arch.nmi_injected) kvm_mmu_unprotect_page_virt(vcpu, cr2); return kvm_mmu_page_fault(vcpu, cr2, error_code); } If using above merged SW queue or HW direct register, we can do like following: --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -2599,7 +2599,7 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) cr2 = vmcs_readl(EXIT_QUALIFICATION); KVMTRACE_3D(PAGE_FAULT, vcpu, error_code, (u32)cr2, (u32)((u64)cr2 32), handler); - if (vcpu-arch.interrupt.pending || vcpu-arch.exception.pending ) + if (vmcs_read(VM_ENTRY_INTR_INFO_FIELD) INTR_INFO_VALID_MASK) kvm_mmu_unprotect_page_virt(vcpu, cr2); return kvm_mmu_page_fault(vcpu, cr2, error_code); } Either way are OK and up to you. BTW Xen uses HW register directly to representing an pending event. One example is that exception has higher priority than NMI/IRQ injection in current code which is not true in reality. I don't think it matters in practice, since the guest will see it as a timing issue. NMIs and IRQs are asynchronous (even those generated by the guest through the local APIC). Yes. But also cause IRQ injection be delayed which may have side effect. For example if guest exception handler is very longer or if guest VCPU fall into recursive #GP. Within current logic, a guest IRQ event from KDB (IPI) running on VCPU0, as an example, can't force the dead loop VCPU1 into KDB since it is recursively #GP. Another issue is that an failed event from previous injection say IRQ or NMI may be discarded if an virtual exception happens in the EXIT handling now. With the patch of generic double fault handling, this case should be handled as normally. Discarding an exception is usually okay as it will be regenerated. I don't think we discard interrupts or NMIs. In reality (Running OS in guest), it doesn't happen so far. But architecturally, it could. For example KVM injects an IRQ, but VM Resume get #PF and back to KVM with IDT_VECTORING valid. Then KVM will put back the failed IRQ to interrupt queue. But if #PF handling generates another exception, then the interrupt queue won't be able to be injected, since KVM inject exception first. And the interrupt queue is discarded at next VM Exit. Overal, I think this is mostly for simplification but may benefit future a lot. Especially with Gleb's recent cleanup, it soulds to be much easier to do than before. I may make mistake here, will like to see more comments. thx, eddie -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: event injection MACROs
On Thu, May 14, 2009 at 09:43:33PM +0800, Dong, Eddie wrote: Avi Kivity wrote: Dong, Eddie wrote: OK. Also back to Gleb's question, the reason I want to do that is to simplify event generation mechanism in current KVM. Today KVM use additional layer of exception/nmi/interrupt such as vcpu.arch.exception.pending, vcpu-arch.interrupt.pending vcpu-arch.nmi_injected. All those additional layer is due to compete of VM_ENTRY_INTR_INFO_FIELD write to inject the event. Both SVM VMX has only one resource to inject the virtual event but KVM generates 3 catagory of events in parallel which further requires additional logic to dictate among them. I thought of using a queue to hold all pending events (in a common format), sort it by priority, and inject the head. The SDM Table 5-4 requires to merge 2 events together, i.e. convert to #DF/ Triple fault or inject serially when 2 events happens no matter NMI, IRQ or exception. As if considering above events merging activity, that is a single element queue. I don't know how you got to this conclusion from you previous statement. See explanation to table 5-2 for instate where it is stated that interrupt should be held pending if there is exception with higher priority. Should be held pending where? In the queue, like we do. Note that low prio exceptions are just dropped since they will be regenerated. We could have either: 1) A pure SW queue that will be flush to HW register later (VM_ENTRY_INTR_INFO_FIELD), 2) Direct use HW register. We have three event sources 1) exceptions 2) IRQ 3) NMI. We should have queue of three elements sorted by priority. On each entry we should inject an event with highest priority. And remove it from queue on exit. A potential benefit is that it can avoid duplicated code and potential bugs in current code as following patch shows if I understand correctly: --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -2599,7 +2599,7 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) cr2 = vmcs_readl(EXIT_QUALIFICATION); KVMTRACE_3D(PAGE_FAULT, vcpu, error_code, (u32)cr2, (u32)((u64)cr2 32), handler); - if (vcpu-arch.interrupt.pending || vcpu-arch.exception.pending ) + if (vcpu-arch.interrupt.pending || vcpu-arch.exception.pending || vcpu-arch.nmi_injected) kvm_mmu_unprotect_page_virt(vcpu, cr2); return kvm_mmu_page_fault(vcpu, cr2, error_code); } This fix is already in Avi's tree (not yet pushed). Either way are OK and up to you. BTW Xen uses HW register directly to representing an pending event. In this particular case I don't mind to use HW register either, but I don't see any advantage. One example is that exception has higher priority than NMI/IRQ injection in current code which is not true in reality. I don't think it matters in practice, since the guest will see it as a timing issue. NMIs and IRQs are asynchronous (even those generated by the guest through the local APIC). Yes. But also cause IRQ injection be delayed which may have side effect. For example if guest exception handler is very longer or if guest VCPU fall into recursive #GP. Within current logic, a guest IRQ event from KDB (IPI) running on VCPU0, as an example, can't force the dead loop VCPU1 into KDB since it is recursively #GP. If one #GP causes another #GP this is a #DF. If CPU has a chance to executes something in between KVM will have a chance to inject NMI. Another issue is that an failed event from previous injection say IRQ or NMI may be discarded if an virtual exception happens in the EXIT handling now. With the patch of generic double fault handling, this case should be handled as normally. Discarding an exception is usually okay as it will be regenerated. I don't think we discard interrupts or NMIs. In reality (Running OS in guest), it doesn't happen so far. But architecturally, it could. For example KVM injects an IRQ, but VM Resume get #PF and back to KVM with IDT_VECTORING valid. Then KVM will put back the failed IRQ to interrupt queue. But if #PF handling generates another exception, then the interrupt queue won't be able to be injected, since KVM inject exception first. And the interrupt queue is discarded at next VM Exit. I acknowledge the presence of the bug although I was not able to write a test case to cause it yet, but it is easy to fix this without changing code too much. Unified event queue and clearing of only injected event on exit should do the trick. -- Gleb. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Unicode Error
Hi all I'm newbie on list. I have deploy a system here, with a Ubuntu Server running KVM. Well, when I run virt-clone command, I get this error: CMD: virt-clone -o vm01 -n VMUbuntu-2 -f /virt/ubuntu-2.img RESULT: Traceback (most recent call last): File /usr/lib/python2.6/logging/__init__.py, line 773, in emit stream.write(fs % msg.encode(UTF-8)) UnicodeDecodeError: 'ascii' codec can't decode byte 0xc3 in position 14: ordinal not in range(128) I don't know if this is a issue of Ubuntu, libvirt (!)... Someone can point a way to fix this issue... Thanks... Gilberto Nunes Ferreira TI Selbetti Gestão de Documentos Telefone: +55 (47) 3441-6004 Celular: +55 (47) 8861-6672 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: Unicode Error
On Thu, May 14, 2009 at 9:16 AM, Gilberto Nunes gilberto.nu...@selbetti.com.br wrote: Hi all I'm newbie on list. I have deploy a system here, with a Ubuntu Server running KVM. Well, when I run virt-clone command, I get this error: CMD: virt-clone -o vm01 -n VMUbuntu-2 -f /virt/ubuntu-2.img RESULT: Traceback (most recent call last): File /usr/lib/python2.6/logging/__init__.py, line 773, in emit stream.write(fs % msg.encode(UTF-8)) UnicodeDecodeError: 'ascii' codec can't decode byte 0xc3 in position 14: ordinal not in range(128) I don't know if this is a issue of Ubuntu, libvirt (!)... Someone can point a way to fix this issue... it seems that at some point in libvirt (which is mostly written in Python), it transcodes some info between ascii and UTF8. some of that info isn't valid 7-bit ASCII, probably some name. it's safer to use only ascii valid strings, both in names and paths. of course, it should be reported as a bug to the libvirt people (http://libvirt.org/bugs.html) -- Javier -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: XP smp using a lot of CPU
On Wed, May 13, 2009 at 09:56:18AM +0300, Avi Kivity wrote: Ross Boylan wrote: I just installed XP into a new VM, specifying -smp 2 for the machine. According to top, it's using nearly 200% of a cpu even when I'm not doing anything. Is this real CPU useage, or just a reporting problem (just as my disk image is big according to ls, but isn't really)? If it's real, is there anything I can do about it? kvm 0.7.2 on Debian Lenny (but 2.6.29 kernel), amd64. Xeon chips; 32 bit version of XP pro installed, now fully patched (including the Windows Genuine Advantage stuff, though I cancelled it when it wanted to run). Task manager in XP shows virtually no CPU useage. Please cc me on responses. I'm guessing Windows uses a pio port to sleep, which kvm doesn't support. Can you provide kvm_stat output? Could this be what has happened to Windows 2000 as well? (kvm-Bugs-2314737) Task manager in the guest shows both CPUs idle, but on the host it shows 200% CPU almost constantly. ucwb-0119:/home/kmshanah/kvm/kvm-85# ./kvm_stat -1 efer_reload0 0 exits 5454894602 4839 fpu_reload 5311150 5 halt_exits200719 1 halt_wakeup 200218 1 host_state_reload 1951410204 1869 hypercalls 0 0 insn_emulation1391377570 1393 insn_emulation_fail 52 0 invlpg 92034019 1 io_exits 1042421930 694 irq_exits 1545221935 1874 irq_injections 450100320 448 irq_window 225291775 245 kvm_request_irq0 0 largepages 0 0 mmio_exits484805 0 mmu_cache_miss 26346459 4 mmu_flooded 21532314 4 mmu_pde_zapped886970 0 mmu_pte_updated 52882039 4 mmu_pte_write 70044961 6 mmu_recycled1102 0 mmu_shadow_zapped 26384127 4 mmu_unsync 7671 0 mmu_unsync_global 0 0 nmi_injections 0 0 nmi_window 0 0 pf_fixed 421444565 167 pf_guest34545643 1 remote_tlb_flush 125039581 9 request_nmi0 0 signal_exits 1 0 tlb_flush 749126829 284 Regards, Kevin. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
RE: event injection MACROs
Gleb Natapov wrote: On Thu, May 14, 2009 at 09:43:33PM +0800, Dong, Eddie wrote: Avi Kivity wrote: Dong, Eddie wrote: OK. Also back to Gleb's question, the reason I want to do that is to simplify event generation mechanism in current KVM. Today KVM use additional layer of exception/nmi/interrupt such as vcpu.arch.exception.pending, vcpu-arch.interrupt.pending vcpu-arch.nmi_injected. All those additional layer is due to compete of VM_ENTRY_INTR_INFO_FIELD write to inject the event. Both SVM VMX has only one resource to inject the virtual event but KVM generates 3 catagory of events in parallel which further requires additional logic to dictate among them. I thought of using a queue to hold all pending events (in a common format), sort it by priority, and inject the head. The SDM Table 5-4 requires to merge 2 events together, i.e. convert to #DF/ Triple fault or inject serially when 2 events happens no matter NMI, IRQ or exception. As if considering above events merging activity, that is a single element queue. I don't know how you got to this conclusion from you previous statement. See explanation to table 5-2 for instate where it is stated that interrupt should be held pending if there is exception with higher priority. Should be held pending where? In the queue, like we do. Note that low prio exceptions are just dropped since they will be regenerated. I have different understanding here. My understanding is that held means NO INTA in HW, i.e. LAPIC still hold this IRQ. We could have either: 1) A pure SW queue that will be flush to HW register later (VM_ENTRY_INTR_INFO_FIELD), 2) Direct use HW register. We have three event sources 1) exceptions 2) IRQ 3) NMI. We should have queue of three elements sorted by priority. On each entry we should Table 5-4 alreadys says NMI/IRQ is BENIGN. inject an event with highest priority. And remove it from queue on exit. The problem is that we have to decide to inject only one of above 3, and discard the rest. Whether priority them or merge (to one event as Table 5-4) is another story. A potential benefit is that it can avoid duplicated code and potential bugs in current code as following patch shows if I understand correctly: --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -2599,7 +2599,7 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) cr2 = vmcs_readl(EXIT_QUALIFICATION); KVMTRACE_3D(PAGE_FAULT, vcpu, error_code, (u32)cr2, (u32)((u64)cr2 32), handler); - if (vcpu-arch.interrupt.pending || vcpu-arch.exception.pending ) + if (vcpu-arch.interrupt.pending || vcpu-arch.exception.pending || vcpu-arch.nmi_injected) kvm_mmu_unprotect_page_virt(vcpu, cr2); return kvm_mmu_page_fault(vcpu, cr2, error_code); } This fix is already in Avi's tree (not yet pushed). Either way are OK and up to you. BTW Xen uses HW register directly to representing an pending event. In this particular case I don't mind to use HW register either, but I don't see any advantage. One example is that exception has higher priority than NMI/IRQ injection in current code which is not true in reality. I don't think it matters in practice, since the guest will see it as a timing issue. NMIs and IRQs are asynchronous (even those generated by the guest through the local APIC). Yes. But also cause IRQ injection be delayed which may have side effect. For example if guest exception handler is very longer or if guest VCPU fall into recursive #GP. Within current logic, a guest IRQ event from KDB (IPI) running on VCPU0, as an example, can't force the dead loop VCPU1 into KDB since it is recursively #GP. If one #GP causes another #GP this is a #DF. If CPU has a chance to Means another #GP in next instruction i.e. Beginning of #GP handler in guest. No #DF here. executes something in between KVM will have a chance to inject NMI. Could have no chance in some cases though not very common. Another issue is that an failed event from previous injection say IRQ or NMI may be discarded if an virtual exception happens in the EXIT handling now. With the patch of generic double fault handling, this case should be handled as normally. Discarding an exception is usually okay as it will be regenerated. I don't think we discard interrupts or NMIs. In reality (Running OS in guest), it doesn't happen so far. But architecturally, it could. For example KVM injects an IRQ, but VM Resume get #PF and back to KVM with IDT_VECTORING valid. Then KVM will put back the failed IRQ to interrupt queue. But if #PF handling generates another exception, then the interrupt queue won't be able to be injected, since KVM inject exception first. And the interrupt queue is discarded at next VM Exit. I acknowledge the presence of the bug
Re: [PATCH v4 resend 5/6] VT-d: cleanup iommu_flush_iotlb_psi and flush_unmaps
On Thu, 2009-05-14 at 10:32 +0800, Yu Zhao wrote: Make iommu_flush_iotlb_psi() and flush_unmaps() more readable. This doesn't apply any more. -- David WoodhouseOpen Source Technology Centre david.woodho...@intel.com Intel Corporation -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: XP smp using a lot of CPU
On Thu, 2009-05-14 at 12:19 +0200, Johannes Schlatow wrote: I had a similar problem some weeks ago. Finally I found out that my VM running WinXP was working on a non-acpi system (maybe I started kvm with -no-acpi option during the installation). In the Device Manager there has to be the entry Computer-ACPI Multiprocessor PC. Otherwise the VM produced 100% real cpu load on my machines (the fans were running on highest speed level). I just started the WinXP installation in repair mode and this did fix the problem. I hope this helps! regards Johannes That may be it: I was running with -no-acpi. Various docs recommended this for Windows performance, but your comment reminded me that acpi is (I think) required for multiprocessors. I'll be in where I can check on this later today. Thanks. Ross On Wed, May 13, 2009 at 2:41 AM, Ross Boylan r...@biostat.ucsf.edu wrote: I just installed XP into a new VM, specifying -smp 2 for the machine. According to top, it's using nearly 200% of a cpu even when I'm not doing anything. Is this real CPU useage, or just a reporting problem (just as my disk image is big according to ls, but isn't really)? If it's real, is there anything I can do about it? kvm 0.7.2 on Debian Lenny (but 2.6.29 kernel), amd64. Xeon chips; 32 bit version of XP pro installed, now fully patched (including the Windows Genuine Advantage stuff, though I cancelled it when it wanted to run). Task manager in XP shows virtually no CPU useage. Please cc me on responses. Thanks for any assistance. -- Ross Boylan wk: (415) 514-8146 185 Berry St #5700 r...@biostat.ucsf.edu Dept of Epidemiology and Biostatistics fax: (415) 514-8150 University of California, San Francisco San Francisco, CA 94107-1739 hm: (415) 550-1062 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: event injection MACROs
On Thu, May 14, 2009 at 10:34:11PM +0800, Dong, Eddie wrote: Gleb Natapov wrote: On Thu, May 14, 2009 at 09:43:33PM +0800, Dong, Eddie wrote: Avi Kivity wrote: Dong, Eddie wrote: OK. Also back to Gleb's question, the reason I want to do that is to simplify event generation mechanism in current KVM. Today KVM use additional layer of exception/nmi/interrupt such as vcpu.arch.exception.pending, vcpu-arch.interrupt.pending vcpu-arch.nmi_injected. All those additional layer is due to compete of VM_ENTRY_INTR_INFO_FIELD write to inject the event. Both SVM VMX has only one resource to inject the virtual event but KVM generates 3 catagory of events in parallel which further requires additional logic to dictate among them. I thought of using a queue to hold all pending events (in a common format), sort it by priority, and inject the head. The SDM Table 5-4 requires to merge 2 events together, i.e. convert to #DF/ Triple fault or inject serially when 2 events happens no matter NMI, IRQ or exception. As if considering above events merging activity, that is a single element queue. I don't know how you got to this conclusion from you previous statement. See explanation to table 5-2 for instate where it is stated that interrupt should be held pending if there is exception with higher priority. Should be held pending where? In the queue, like we do. Note that low prio exceptions are just dropped since they will be regenerated. I have different understanding here. My understanding is that held means NO INTA in HW, i.e. LAPIC still hold this IRQ. And what if INTA already happened and CPU is ready to fetch IDT for interrupt vector and at this very moment CPU faults? We could have either: 1) A pure SW queue that will be flush to HW register later (VM_ENTRY_INTR_INFO_FIELD), 2) Direct use HW register. We have three event sources 1) exceptions 2) IRQ 3) NMI. We should have queue of three elements sorted by priority. On each entry we should Table 5-4 alreadys says NMI/IRQ is BENIGN. Table 5-2 applies here not table 5-4 I think. inject an event with highest priority. And remove it from queue on exit. The problem is that we have to decide to inject only one of above 3, and discard the rest. Whether priority them or merge (to one event as Table 5-4) is another story. Only a small number of event are merged into #DF. Most handled serially (SDM does not define what serially means unfortunately), so I don't understand where discard the rest is come from. We can discard exception since it will be regenerated anyway, but IRQ and NMI is another story. SDM says that IRQ should be held pending (once again not much explanation here), nothing about NMI. A potential benefit is that it can avoid duplicated code and potential bugs in current code as following patch shows if I understand correctly: --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -2599,7 +2599,7 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) cr2 = vmcs_readl(EXIT_QUALIFICATION); KVMTRACE_3D(PAGE_FAULT, vcpu, error_code, (u32)cr2, (u32)((u64)cr2 32), handler); - if (vcpu-arch.interrupt.pending || vcpu-arch.exception.pending ) + if (vcpu-arch.interrupt.pending || vcpu-arch.exception.pending || vcpu-arch.nmi_injected) kvm_mmu_unprotect_page_virt(vcpu, cr2); return kvm_mmu_page_fault(vcpu, cr2, error_code); } This fix is already in Avi's tree (not yet pushed). Either way are OK and up to you. BTW Xen uses HW register directly to representing an pending event. In this particular case I don't mind to use HW register either, but I don't see any advantage. One example is that exception has higher priority than NMI/IRQ injection in current code which is not true in reality. I don't think it matters in practice, since the guest will see it as a timing issue. NMIs and IRQs are asynchronous (even those generated by the guest through the local APIC). Yes. But also cause IRQ injection be delayed which may have side effect. For example if guest exception handler is very longer or if guest VCPU fall into recursive #GP. Within current logic, a guest IRQ event from KDB (IPI) running on VCPU0, as an example, can't force the dead loop VCPU1 into KDB since it is recursively #GP. If one #GP causes another #GP this is a #DF. If CPU has a chance to Means another #GP in next instruction i.e. Beginning of #GP handler in guest. No #DF here. In this case we will enter guest with NMI windows open request and should exit immediately before first instruction of #GP handler. At this moment KVM will be able to inject NMI. executes something in between KVM will have a chance to inject NMI. Could have no chance
Re: [PATCH] kvm: user: include arch specific headers from $(KERNELDIR)
On Thursday 14 May 2009, Avi Kivity wrote: There aren't the real kernel headers, just cheap copies carried in qemu-kvm.git which have been appropriately postprocessed. We do this since the kvm external module can run on a much older kernel, so there is no natural place to find it headers. Sorry for the confusion on my part. I was aware of the sanitized kernel headers, but was mislead by the line kerneldir=/lib/modules/$(uname -r)/build in kvm/user/configure. What I didn't realize is that this always gets overridden by kvm/configure. Maybe we can change the default in kvm/user/configure to something more sensible: --- [PATCH] kvm: user: fix default kerneldir calling ./configure in kvm/user sets the kerneldir to the currently running kernel, which is incorrect for user code. This changes the default to the sanitized header files from the kvm/kernel directory. Signed-off-by: Arnd Bergmann a...@arndb.de diff --git a/kvm/user/configure b/kvm/user/configure index efb8705..858a519 100755 --- a/kvm/user/configure +++ b/kvm/user/configure @@ -1,7 +1,7 @@ #!/bin/bash prefix=/usr/local -kerneldir=/lib/modules/$(uname -r)/build +kerneldir=$(dirname $0)/../kernel cc=gcc ld=ld objcopy=objcopy -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [KVM PATCH v7 2/3] kvm: add support for irqfd via eventfd-notification interface
Avi Kivity wrote: Gregory Haskins wrote: KVM provides a complete virtual system environment for guests, including support for injecting interrupts modeled after the real exception/interrupt facilities present on the native platform (such as the IDT on x86). Virtual interrupts can come from a variety of sources (emulated devices, pass-through devices, etc) but all must be injected to the guest via the KVM infrastructure. This patch adds a new mechanism to inject a specific interrupt to a guest using a decoupled eventfd mechnanism: Any legal signal on the irqfd (using eventfd semantics from either userspace or kernel) will translate into an injected interrupt in the guest at the next available interrupt window. + +static void +irqfd_inject(struct work_struct *work) +{ +struct _irqfd *irqfd = container_of(work, struct _irqfd, work); +struct kvm *kvm = irqfd-kvm; + I think you need to -read() from the irqfd, otherwise the count will never clear. Yeah, and this is a disavantage to using eventfd vs a custom anon-fd implementation. However, the count is really only there for deciding whether to sleep a traditional eventfd recipient which doesn't really apply in this application. I suppose we could try to invoke the read method (or add a new method to eventfd to allow it to be cleared independent of the f_ops-read() (ala eventfd_signal() vs f_ops-write()). I'm not convinced we really need to worry about it, though. IMO we can just let the count accumulate. But if you insist this loose end should be addressed, perhaps Davide has some thoughts on how to best do this? -Greg +mutex_lock(kvm-lock); +kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd-gsi, 1); +kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd-gsi, 0); +mutex_unlock(kvm-lock); +} signature.asc Description: OpenPGP digital signature
Re: [PATCH] kvm: user: include arch specific headers from $(KERNELDIR)
Arnd Bergmann wrote: On Thursday 14 May 2009, Avi Kivity wrote: There aren't the real kernel headers, just cheap copies carried in qemu-kvm.git which have been appropriately postprocessed. We do this since the kvm external module can run on a much older kernel, so there is no natural place to find it headers. Sorry for the confusion on my part. I was aware of the sanitized kernel headers, but was mislead by the line kerneldir=/lib/modules/$(uname -r)/build in kvm/user/configure. What I didn't realize is that this always gets overridden by kvm/configure. Maybe we can change the default in kvm/user/configure to something more sensible: --- [PATCH] kvm: user: fix default kerneldir calling ./configure in kvm/user sets the kerneldir to the currently running kernel, which is incorrect for user code. This changes the default to the sanitized header files from the kvm/kernel directory. Signed-off-by: Arnd Bergmann a...@arndb.de diff --git a/kvm/user/configure b/kvm/user/configure index efb8705..858a519 100755 --- a/kvm/user/configure +++ b/kvm/user/configure @@ -1,7 +1,7 @@ #!/bin/bash prefix=/usr/local -kerneldir=/lib/modules/$(uname -r)/build +kerneldir=$(dirname $0)/../kernel cc=gcc ld=ld objcopy=objcopy I usually add a readlink -f in there due to my innate fear of relative directories and cd. btw, these are my plans for kvm/user: - convert the tests to be loadable with qemu -kernel; we lose the simplicity of kvmctl so I'm not 100% sure it's a good idea. On the other hand some of the tests are useful for tcg. - kill kvmtrace (replaced by the standard ftrace tools, whatever they are; maybe create a new repo if kvm specific tools are needed) -- error compiling committee.c: too many arguments to function -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] kvm: user: include arch specific headers from $(KERNELDIR)
On Thursday 14 May 2009, Avi Kivity wrote: I usually add a readlink -f in there due to my innate fear of relative directories and cd. There is one already in the only place where this gets used: KERNELDIR=$(readlink -f $kerneldir) It also gets shown in the configure --help output, but I suppose showing the relative path there may be helpful because of its brevity. Arnd -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
profiling virtio (blk in this case)
The Host--Guest calls in virtio_blk (using the generic virtio kick/notify) are as follows: Guest-Host --- do_virtlkb_request calls kick in the guest side causing handle_output to be called on the host side. Host-Guest --- virtio_blk_rw_complete calls notify in the host side causing block_done to be called on the guest side My question has to do with the timing of the calls. Which would be the correct drawing 1. Overlapping: kick || handle_output || 2. Disjoint: kick || handle_output|| In other words: if I do g1 = get_cpu_cycles kick g2 = get_cpu_cycles and h1 = get_cpu_cycles handle_output h2 = get_cpu_cycles would (g2-g1) + (h2-h2) count some cycles twice? Same question for notify and block_done. Thanks very much, Eran -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] bios: Fix MADT corruption and RSDT size when using -acpitable
Anthony Liguori wrote: Vincent Minet wrote: External ACPI tables are counted twice for the RSDT size and the load address for the first external table is in the MADT (interrupt override entries are overwritten). Signed-off-by: Vincent Minet vinc...@vincent-minet.net Beth, I think you had a patch attempting to address the same issue. It was a bit more involved though. Which is the proper fix and are they both to the same problem? They are for 2 different bases. My patch was for qemu's bochs bios and this is for qemu-kvm/kvm/bios/rombios32.c. They are pretty divergent in this area of setting up the ACPI tables. My patch is still needed for the qemu base. I hope we'll be getting to one base soon :-) Assuming the intent of the code was for MAX_RSDT_ENTRIES to include external_tables, this patch looks correct. I think one additional check would be needed (in my patch) to make sure that the code doesn't exceed MAX_RSDT_ENTRIES when the external tables are being loaded. My patch also puts all the code that calculates madt_size in the same place, at the beginning of the table layout. I believe this is neater and will avoid problems like this one in the future. As much as possible, I think it best to get all the tables layed out, then fill them in. If for some reason this is not acceptable, we need to add a big note that no tables should be layed out after the madt because the madt may grow further down in the code and overwrite the other table. Regards, Anthony Liguori --- kvm/bios/rombios32.c |3 ++- 1 files changed, 2 insertions(+), 1 deletions(-) diff --git a/kvm/bios/rombios32.c b/kvm/bios/rombios32.c index cbd5f15..289361b 100755 --- a/kvm/bios/rombios32.c +++ b/kvm/bios/rombios32.c @@ -1626,7 +1626,7 @@ void acpi_bios_init(void) addr = base_addr = ram_size - ACPI_DATA_SIZE; rsdt_addr = addr; rsdt = (void *)(addr); -rsdt_size = sizeof(*rsdt) + external_tables * 4; +rsdt_size = sizeof(*rsdt); addr += rsdt_size; fadt_addr = addr; @@ -1787,6 +1787,7 @@ void acpi_bios_init(void) } int_override++; madt_size += sizeof(struct madt_int_override); +addr += sizeof(struct madt_int_override); } acpi_build_table_header((struct acpi_table_header *)madt, APIC, madt_size, 1); -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html diff --git a/kvm/bios/rombios32.c b/kvm/bios/rombios32.c index cbd5f15..23835b6 100755 --- a/kvm/bios/rombios32.c +++ b/kvm/bios/rombios32.c @@ -1626,7 +1626,7 @@ void acpi_bios_init(void) addr = base_addr = ram_size - ACPI_DATA_SIZE; rsdt_addr = addr; rsdt = (void *)(addr); -rsdt_size = sizeof(*rsdt) + external_tables * 4; +rsdt_size = sizeof(*rsdt); addr += rsdt_size; fadt_addr = addr; @@ -1665,6 +1665,7 @@ void acpi_bios_init(void) addr = (addr + 7) ~7; madt_addr = addr; +madt = (void *)(addr); madt_size = sizeof(*madt) + sizeof(struct madt_processor_apic) * MAX_CPUS + #ifdef BX_QEMU @@ -1672,7 +1673,11 @@ void acpi_bios_init(void) #else sizeof(struct madt_io_apic); #endif -madt = (void *)(addr); +for ( i = 0; i 16; i++ ) { +if ( PCI_ISA_IRQ_MASK (1U i) ) { +madt_size += sizeof(struct madt_int_override); +} +} addr += madt_size; #ifdef BX_QEMU @@ -1786,7 +1791,6 @@ void acpi_bios_init(void) continue; } int_override++; -madt_size += sizeof(struct madt_int_override); } acpi_build_table_header((struct acpi_table_header *)madt, APIC, madt_size, 1); @@ -1868,17 +1872,6 @@ void acpi_bios_init(void) acpi_build_table_header((struct acpi_table_header *)hpet, HPET, sizeof(*hpet), 1); #endif - -acpi_additional_tables(); /* resets cfg to required entry */ -for(i = 0; i external_tables; i++) { -uint16_t len; -if(acpi_load_table(i, addr, len) 0) -BX_PANIC(Failed to load ACPI table from QEMU\n); -rsdt-table_offset_entry[nb_rsdt_entries++] = cpu_to_le32(addr); -addr += len; -if(addr = ram_size) -BX_PANIC(ACPI table overflow\n); -} #endif /* RSDT */ @@ -1891,6 +1884,16 @@ void acpi_bios_init(void) // rsdt-table_offset_entry[nb_rsdt_entries++] = cpu_to_le32(hpet_addr); if (nb_numa_nodes 0) rsdt-table_offset_entry[nb_rsdt_entries++] = cpu_to_le32(srat_addr); +acpi_additional_tables(); /* resets cfg to required entry */ +for(i = 0; i external_tables; i++) { +uint16_t len; +if(acpi_load_table(i, addr, len) 0) +BX_PANIC(Failed to load ACPI table from QEMU\n); +
[KVM PATCH v8] kvm: add support for irqfd via eventfd-notification interface
KVM provides a complete virtual system environment for guests, including support for injecting interrupts modeled after the real exception/interrupt facilities present on the native platform (such as the IDT on x86). Virtual interrupts can come from a variety of sources (emulated devices, pass-through devices, etc) but all must be injected to the guest via the KVM infrastructure. This patch adds a new mechanism to inject a specific interrupt to a guest using a decoupled eventfd mechnanism: Any legal signal on the irqfd (using eventfd semantics from either userspace or kernel) will translate into an injected interrupt in the guest at the next available interrupt window. Signed-off-by: Gregory Haskins ghask...@novell.com --- arch/x86/kvm/Makefile|2 arch/x86/kvm/x86.c |1 include/linux/kvm.h | 11 +++ include/linux/kvm_host.h |4 + virt/kvm/eventfd.c | 198 ++ virt/kvm/kvm_main.c | 11 +++ 6 files changed, 226 insertions(+), 1 deletions(-) create mode 100644 virt/kvm/eventfd.c diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile index b43c4ef..4d50904 100644 --- a/arch/x86/kvm/Makefile +++ b/arch/x86/kvm/Makefile @@ -3,7 +3,7 @@ # common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o \ -coalesced_mmio.o irq_comm.o) +coalesced_mmio.o irq_comm.o eventfd.o) ifeq ($(CONFIG_KVM_TRACE),y) common-objs += $(addprefix ../../../virt/kvm/, kvm_trace.o) endif diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index c6d3ff3..1d062eb 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1026,6 +1026,7 @@ int kvm_dev_ioctl_check_extension(long ext) case KVM_CAP_REINJECT_CONTROL: case KVM_CAP_IRQ_INJECT_STATUS: case KVM_CAP_ASSIGN_DEV_IRQ: + case KVM_CAP_IRQFD: r = 1; break; case KVM_CAP_COALESCED_MMIO: diff --git a/include/linux/kvm.h b/include/linux/kvm.h index 3db5d8d..a1ecc6a 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -415,6 +415,7 @@ struct kvm_trace_rec { #define KVM_CAP_ASSIGN_DEV_IRQ 29 /* Another bug in KVM_SET_USER_MEMORY_REGION fixed: */ #define KVM_CAP_JOIN_MEMORY_REGIONS_WORKS 30 +#define KVM_CAP_IRQFD 31 #ifdef KVM_CAP_IRQ_ROUTING @@ -454,6 +455,15 @@ struct kvm_irq_routing { #endif +#define KVM_IRQFD_FLAG_DEASSIGN (1 0) + +struct kvm_irqfd { + __u32 fd; + __u32 gsi; + __u32 flags; + __u8 pad[20]; +}; + /* * ioctls for VM fds */ @@ -498,6 +508,7 @@ struct kvm_irq_routing { #define KVM_ASSIGN_SET_MSIX_ENTRY \ _IOW(KVMIO, 0x74, struct kvm_assigned_msix_entry) #define KVM_DEASSIGN_DEV_IRQ _IOW(KVMIO, 0x75, struct kvm_assigned_irq) +#define KVM_IRQFD _IOW(KVMIO, 0x76, struct kvm_irqfd) /* * ioctls for vcpu fds diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 2b8df0c..dc91610 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -134,6 +134,7 @@ struct kvm { struct list_head vm_list; struct kvm_io_bus mmio_bus; struct kvm_io_bus pio_bus; + struct list_head irqfds; struct kvm_vm_stat stat; struct kvm_arch arch; atomic_t users_count; @@ -525,4 +526,7 @@ static inline void kvm_free_irq_routing(struct kvm *kvm) {} #endif +int kvm_irqfd(struct kvm *kvm, int fd, int gsi, int flags); +void kvm_irqfd_release(struct kvm *kvm); + #endif diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c new file mode 100644 index 000..884df16 --- /dev/null +++ b/virt/kvm/eventfd.c @@ -0,0 +1,198 @@ +/* + * kvm eventfd support - use eventfd objects to signal various KVM events + * + * Copyright 2009 Novell. All Rights Reserved. + * + * Author: + * Gregory Haskins ghask...@novell.com + * + * This file is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#include linux/kvm_host.h +#include linux/workqueue.h +#include linux/syscalls.h +#include linux/wait.h +#include linux/poll.h +#include linux/file.h +#include linux/list.h + +/* + * + * irqfd: Allows an fd to be used to inject an interrupt to the guest + * + * Credit goes to Avi Kivity for the original idea. + *
[PATCH v8] qemu-kvm: add irqfd support
irqfd lets you create an eventfd based file-desriptor to inject interrupts to a kvm guest. We associate one gsi per fd for fine-grained routing. Signed-off-by: Gregory Haskins ghask...@novell.com --- kvm/libkvm/libkvm.c | 57 +++ kvm/libkvm/libkvm.h | 26 +++ 2 files changed, 83 insertions(+), 0 deletions(-) diff --git a/kvm/libkvm/libkvm.c b/kvm/libkvm/libkvm.c index ba0a5d1..ccab985 100644 --- a/kvm/libkvm/libkvm.c +++ b/kvm/libkvm/libkvm.c @@ -34,6 +34,7 @@ #include string.h #include errno.h #include sys/ioctl.h +#include sys/eventfd.h #include inttypes.h #include libkvm.h @@ -1444,3 +1445,59 @@ int kvm_assign_set_msix_entry(kvm_context_t kvm, return ret; } #endif + +#ifdef KVM_CAP_IRQFD +static int _kvm_irqfd(kvm_context_t kvm, int fd, int gsi, int flags) +{ + int r; + struct kvm_irqfd data = { + .fd= fd, + .gsi = gsi, + .flags = flags, + }; + + r = ioctl(kvm-vm_fd, KVM_IRQFD, data); + if (r == -1) + r = -errno; + return r; +} + +int kvm_create_irqfd(kvm_context_t kvm, int gsi, int flags) +{ + int r; + int fd; + + if (!kvm_check_extension(kvm, KVM_CAP_IRQFD)) + return -ENOENT; + + fd = eventfd(0, 0); + if (fd 0) + return -errno; + + r = _kvm_irqfd(kvm, fd, gsi, 0); + if (r 0) { + close(fd); + return -errno; + } + + return fd; +} + +int kvm_destroy_irqfd(kvm_context_t kvm, int fd, int gsi, int flags) +{ + return _kvm_irqfd(kvm, fd, gsi, KVM_IRQFD_FLAG_DEASSIGN); +} + +#else /* KVM_CAP_IRQFD */ + +int kvm_create_irqfd(kvm_context_t kvm, int gsi, int flags) +{ + return -ENOENT; +} + +int kvm_destroy_irqfd(kvm_context_t kvm, int fd, int gsi, int flags) +{ + return -ENOENT; +} + +#endif /* KVM_CAP_IRQFD */ diff --git a/kvm/libkvm/libkvm.h b/kvm/libkvm/libkvm.h index 4821a1e..3ccbe3d 100644 --- a/kvm/libkvm/libkvm.h +++ b/kvm/libkvm/libkvm.h @@ -856,6 +856,32 @@ int kvm_commit_irq_routes(kvm_context_t kvm); */ int kvm_get_irq_route_gsi(kvm_context_t kvm); +/*! + * \brief Create a file descriptor for injecting interrupts + * + * Creates an eventfd based file-descriptor that maps to a specific GSI + * in the guest. eventfd compliant signaling (write() from userspace, or + * eventfd_signal() from kernelspace) will cause the GSI to inject + * itself into the guest at the next available window. + * + * \param kvm Pointer to the current kvm_context + * \param gsi GSI to assign to this fd + * \param flags reserved, must be zero + */ +int kvm_create_irqfd(kvm_context_t kvm, int gsi, int flags); + +/*! + * \brief Destroy an irqfd file descriptor + * + * Destroys a file descriptor previously opened with kvm_create_irqfd() + * + * \param kvm Pointer to the current kvm_context + * \param fd fd to close + * \param gsi GSI to close + * \param flags reserved, must be zero + */ +int kvm_destroy_irqfd(kvm_context_t kvm, int fd, int gsi, int flags); + #ifdef KVM_CAP_DEVICE_MSIX int kvm_assign_set_msix_nr(kvm_context_t kvm, struct kvm_assigned_msix_nr *msix_nr); -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
RE: Status of pci passthrough work?
Amit, I trying to use PVDMA. I've downloaded a kernel snapshot from the your kvm git, but I couldn't download a snapshot or the repo from your kvm-userspace tree. I tried to launch the VM using kvm-85 user space but it hangs before loading it. Should it work with kvm-85 user space? Do you have the userspace patches for PVDMA? Thanks, Pablo -Original Message- From: kvm-ow...@vger.kernel.org [mailto:kvm-ow...@vger.kernel.org] On Behalf Of Amit Shah Sent: Tuesday, December 16, 2008 12:07 PM To: xming Cc: Thomas Fjellstrom; kvm@vger.kernel.org Subject: Re: Status of pci passthrough work? Hello, - xming xming...@gmail.com wrote: When can we expect pvdma updates? Is it ever going to be merged into mainline kvm? The pvdma tree at http://git.kernel.org/?p=linux/kernel/git/amit/kvm.git;a=shortlog;h=pvdm a is based of an older Linux version. It's usable; but not ported to newer kernel versions. I can't say when I'll get around doing it. In the meanwhile if someone else is interested, drop me a line. Amit. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [Qemu-devel] Re: Question about KVM and PC speaker
malc wrote: On Wed, 13 May 2009, Sebastian Herbszt wrote: Jan Kiszka wrote: Moreover, does sound work at all with your qemu? The image I tried [1] issues two beeps after loading (obviously via direct hw access) - a good way to check general support. Note that one reason for broken host sound with qemu can be OSS. For that reason I always configure my qemu with --audio-drv-list=alsa. Thats a good hint :) Seems i used to compile qemu without --audio-drv-list. Since dsound and fmod drivers don't compile here (i likely miss some libs in my mingw), i used sdl. Don't do that. Here's a nice tutorial Kazu made that will probably help you: http://www.h7.dion.ne.jp/~qemu-win/Audio-en.html So you're saying the use of sdl for audio is not recommended? Now i can hear those two beeps with the image you suggested. Tho those are coming thru my sound card and not the hosts pc speaker (even with -soundhw pcspk, but maybe that option means something different). And it will always come through your soundcard. pcspk is not a passthrough thing. Thanks for the clarification. With INT 10h AH=0Eh i now can hear a beep too, but it doesn't stop and qemu somewhat freezes. Huh? With this INT 10h function qemu should beep once, but it does loop the beep infinitely. Normally i can exit qemu by clicking on the [x] window close icon, but while it does endlessly beep that doesn't work (vista says process doesn't respond). Using quit in the monitor window doesn't work either. - Sebastian -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: XP smp using a lot of CPU
On Wed, 2009-05-13 at 09:56 +0300, Avi Kivity wrote: Ross Boylan wrote: I just installed XP into a new VM, specifying -smp 2 for the machine. According to top, it's using nearly 200% of a cpu even when I'm not doing anything. Is this real CPU useage, or just a reporting problem (just as my disk image is big according to ls, but isn't really)? If it's real, is there anything I can do about it? kvm 0.7.2 on Debian Lenny (but 2.6.29 kernel), amd64. Xeon chips; 32 bit version of XP pro installed, now fully patched (including the Windows Genuine Advantage stuff, though I cancelled it when it wanted to run). Task manager in XP shows virtually no CPU useage. Please cc me on responses. I'm guessing Windows uses a pio port to sleep, which kvm doesn't support. Can you provide kvm_stat output? markov:~# kvm_stat -1 efer_reload0 0 exits9921384 566 fpu_reload267970 0 halt_exits 1 0 halt_wakeup3 0 host_state_reload402605017 hypercalls 0 0 insn_emulation 1329455 0 insn_emulation_fail 154 0 invlpg176773 0 io_exits 3818270 0 irq_exits1434046 566 irq_injections326730 0 irq_window164827 0 largepages 0 0 mmio_exits 35892 0 mmu_cache_miss 29760 0 mmu_flooded19908 0 mmu_pde_zapped 15557 0 mmu_pte_updated82088 0 mmu_pte_write 97990 0 mmu_recycled 0 0 mmu_shadow_zapped 43276 0 mmu_unsync 891 0 mmu_unsync_global 0 0 nmi_injections 0 0 nmi_window 0 0 pf_fixed 1231164 0 pf_guest 276083 0 remote_tlb_flush 115606 0 request_irq0 0 request_nmi0 0 signal_exits 5 0 tlb_flush 960198 0 This is with the VM displaying the XP It is now safe to turn off your computer. CPU remains about 200% from kvm. Invoked with sudo vdeq kvm -net nic,vlan=1,macaddr=52:54:a0:12:01:00 \ -net vde,vlan=1,sock=/var/run/vde2/tap0.ctl \ -std-vga -hda XP.raw \ -boot c \ -soundhw es1370 -localtime -no-acpi -m 1G -smp 2 Next I'll trying fiddling with acpi. -- Ross Boylan wk: (415) 514-8146 185 Berry St #5700 r...@biostat.ucsf.edu Dept of Epidemiology and Biostatistics fax: (415) 514-8150 University of California, San Francisco San Francisco, CA 94107-1739 hm: (415) 550-1062 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
RFC: convert KVMTRACE to event traces
Convert custom marker based KVMTRACE to event trace. Applies on top of git://git.kernel.org/pub/scm/linux/kernel/git/mingo/linux-2.6-x86.git See Documentation/trace/events.txt and commit 7ce7e4249921d5073e764f7ff7ad83cfa9894bd7 if you're interested in playing with event traces. Index: linux-2.6-x86-2/arch/x86/kvm/vmx.c === --- linux-2.6-x86-2.orig/arch/x86/kvm/vmx.c +++ linux-2.6-x86-2/arch/x86/kvm/vmx.c @@ -25,6 +25,7 @@ #include linux/highmem.h #include linux/sched.h #include linux/moduleparam.h +#include trace/events/kvm/x86-arch.h #include kvm_cache_regs.h #include x86.h @@ -2406,7 +2407,7 @@ static void vmx_inject_irq(struct kvm_vc { struct vcpu_vmx *vmx = to_vmx(vcpu); - KVMTRACE_1D(INJ_VIRQ, vcpu, (u32)irq, handler); + trace_kvm_inj_virq(irq); ++vcpu-stat.irq_injections; if (vcpu-arch.rmode.active) { @@ -2631,8 +2632,8 @@ static int handle_exception(struct kvm_v if (vm_need_ept()) BUG(); cr2 = vmcs_readl(EXIT_QUALIFICATION); - KVMTRACE_3D(PAGE_FAULT, vcpu, error_code, (u32)cr2, - (u32)((u64)cr2 32), handler); + trace_kvm_page_fault(cr2, error_code); + if (vcpu-arch.interrupt.pending || vcpu-arch.exception.pending) kvm_mmu_unprotect_page_virt(vcpu, cr2); return kvm_mmu_page_fault(vcpu, cr2, error_code); @@ -2679,7 +2680,6 @@ static int handle_external_interrupt(str struct kvm_run *kvm_run) { ++vcpu-stat.irq_exits; - KVMTRACE_1D(INTR, vcpu, vmcs_read32(VM_EXIT_INTR_INFO), handler); return 1; } @@ -2727,7 +2727,7 @@ vmx_patch_hypercall(struct kvm_vcpu *vcp static int handle_cr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) { - unsigned long exit_qualification; + unsigned long exit_qualification, val; int cr; int reg; @@ -2736,25 +2736,23 @@ static int handle_cr(struct kvm_vcpu *vc reg = (exit_qualification 8) 15; switch ((exit_qualification 4) 3) { case 0: /* mov to cr */ - KVMTRACE_3D(CR_WRITE, vcpu, (u32)cr, - (u32)kvm_register_read(vcpu, reg), - (u32)((u64)kvm_register_read(vcpu, reg) 32), - handler); + val = kvm_register_read(vcpu, reg); + trace_kvm_cr_write(cr, val); switch (cr) { case 0: - kvm_set_cr0(vcpu, kvm_register_read(vcpu, reg)); + kvm_set_cr0(vcpu, val); skip_emulated_instruction(vcpu); return 1; case 3: - kvm_set_cr3(vcpu, kvm_register_read(vcpu, reg)); + kvm_set_cr3(vcpu, val); skip_emulated_instruction(vcpu); return 1; case 4: - kvm_set_cr4(vcpu, kvm_register_read(vcpu, reg)); + kvm_set_cr4(vcpu, val); skip_emulated_instruction(vcpu); return 1; case 8: - kvm_set_cr8(vcpu, kvm_register_read(vcpu, reg)); + kvm_set_cr8(vcpu, val); skip_emulated_instruction(vcpu); if (irqchip_in_kernel(vcpu-kvm)) return 1; @@ -2767,23 +2765,19 @@ static int handle_cr(struct kvm_vcpu *vc vcpu-arch.cr0 = ~X86_CR0_TS; vmcs_writel(CR0_READ_SHADOW, vcpu-arch.cr0); vmx_fpu_activate(vcpu); - KVMTRACE_0D(CLTS, vcpu, handler); skip_emulated_instruction(vcpu); return 1; case 1: /*mov from cr*/ switch (cr) { case 3: kvm_register_write(vcpu, reg, vcpu-arch.cr3); - KVMTRACE_3D(CR_READ, vcpu, (u32)cr, - (u32)kvm_register_read(vcpu, reg), - (u32)((u64)kvm_register_read(vcpu, reg) 32), - handler); + trace_kvm_cr_read(cr, vcpu-arch.cr3); skip_emulated_instruction(vcpu); return 1; case 8: - kvm_register_write(vcpu, reg, kvm_get_cr8(vcpu)); - KVMTRACE_2D(CR_READ, vcpu, (u32)cr, - (u32)kvm_register_read(vcpu, reg), handler); + val = kvm_get_cr8(vcpu); + kvm_register_write(vcpu, cr, val); + trace_kvm_cr_read(cr, val); skip_emulated_instruction(vcpu); return 1; } @@
[PATCH] Allow to override sync source
In order to allow sync'ing the kmod dir against arbitrary kernels trees, extend the sync script to accept alternative paths and adjust the Makefile accordingly. Signed-off-by: Jan Kiszka jan.kis...@siemens.com --- Makefile |3 ++- sync | 14 +++--- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/Makefile b/Makefile index 1e0420e..dad5f0b 100644 --- a/Makefile +++ b/Makefile @@ -17,6 +17,7 @@ ORIGMODDIR = $(patsubst %/build,%/kernel,$(KERNELDIR)) rpmrelease = devel +KVM_VERSION = kvm-devel LINUX = ./linux-2.6 ifeq ($(EXT_CONFIG_KVM_TRACE),y) @@ -38,7 +39,7 @@ include $(MAKEFILE_PRE) .PHONY: sync sync: - ./sync $(KVM_VERSION) + ./sync -v $(KVM_VERSION) -l $(LINUX) install: mkdir -p $(DESTDIR)/$(INSTALLDIR) diff --git a/sync b/sync index 4a89296..2e53a31 100755 --- a/sync +++ b/sync @@ -1,6 +1,7 @@ #!/usr/bin/python import sys, os, glob, os.path, shutil, re +from optparse import OptionParser glob = glob.glob @@ -8,12 +9,19 @@ def cmd(c): if os.system(c) != 0: raise Exception('command execution failed: ' + c) -version = 'kvm-devel' -if len(sys.argv) = 2: -version = sys.argv[1] +parser = OptionParser(usage='usage: %prog [-v version][-l linuxkernel]') +parser.add_option('-v', action='store', type='string', dest='version') +parser.add_option('-l', action='store', type='string', dest='linux') +(options, args) = parser.parse_args() +version = 'kvm-devel' linux = 'linux-2.6' +if options.version: +version = options.version +if options.linux: +linux = options.linux + _re_cache = {} def re_cache(regexp): signature.asc Description: OpenPGP digital signature
[PATCH v3] kvm: x86: Allow PIT emulation without speaker port
The in-kernel speaker emulation is only a dummy and also unneeded from the performance point of view. Rather, it takes user space support to generate sound output on the host, e.g. console beeps. To allow this, introduce KVM_CREATE_PIT2 which controls in-kernel speaker port emulation via a flag passed along the new IOCTL. It also leaves room for future extensions of the PIT configuration interface. Changes in v3: - increase padding in kvm_pit_config to 64 bytes (as requested by Avi) Changes in v2: - Use extensible KVM_CREATE_PIT2 Signed-off-by: Jan Kiszka jan.kis...@siemens.com --- arch/x86/kvm/i8254.c | 14 -- arch/x86/kvm/i8254.h |2 +- arch/x86/kvm/x86.c | 12 +++- include/linux/kvm.h | 10 ++ 4 files changed, 30 insertions(+), 8 deletions(-) diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c index 4d6f0d2..584e3d3 100644 --- a/arch/x86/kvm/i8254.c +++ b/arch/x86/kvm/i8254.c @@ -560,7 +560,7 @@ static void pit_mask_notifer(struct kvm_irq_mask_notifier *kimn, bool mask) } } -struct kvm_pit *kvm_create_pit(struct kvm *kvm) +struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags) { struct kvm_pit *pit; struct kvm_kpit_state *pit_state; @@ -586,11 +586,13 @@ struct kvm_pit *kvm_create_pit(struct kvm *kvm) pit-dev.private = pit; kvm_io_bus_register_dev(kvm-pio_bus, pit-dev); - pit-speaker_dev.read = speaker_ioport_read; - pit-speaker_dev.write = speaker_ioport_write; - pit-speaker_dev.in_range = speaker_in_range; - pit-speaker_dev.private = pit; - kvm_io_bus_register_dev(kvm-pio_bus, pit-speaker_dev); + if (flags KVM_PIT_SPEAKER_DUMMY) { + pit-speaker_dev.read = speaker_ioport_read; + pit-speaker_dev.write = speaker_ioport_write; + pit-speaker_dev.in_range = speaker_in_range; + pit-speaker_dev.private = pit; + kvm_io_bus_register_dev(kvm-pio_bus, pit-speaker_dev); + } kvm-arch.vpit = pit; pit-kvm = kvm; diff --git a/arch/x86/kvm/i8254.h b/arch/x86/kvm/i8254.h index bbd863f..b267018 100644 --- a/arch/x86/kvm/i8254.h +++ b/arch/x86/kvm/i8254.h @@ -50,7 +50,7 @@ struct kvm_pit { void kvm_inject_pit_timer_irqs(struct kvm_vcpu *vcpu); void kvm_pit_load_count(struct kvm *kvm, int channel, u32 val); -struct kvm_pit *kvm_create_pit(struct kvm *kvm); +struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags); void kvm_free_pit(struct kvm *kvm); void kvm_pit_reset(struct kvm_pit *pit); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 44e87a5..c6e7896 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1026,6 +1026,7 @@ int kvm_dev_ioctl_check_extension(long ext) case KVM_CAP_REINJECT_CONTROL: case KVM_CAP_IRQ_INJECT_STATUS: case KVM_CAP_ASSIGN_DEV_IRQ: + case KVM_CAP_PIT2: r = 1; break; case KVM_CAP_COALESCED_MMIO: @@ -1829,6 +1830,7 @@ long kvm_arch_vm_ioctl(struct file *filp, union { struct kvm_pit_state ps; struct kvm_memory_alias alias; + struct kvm_pit_config pit_config; } u; switch (ioctl) { @@ -1889,12 +1891,20 @@ long kvm_arch_vm_ioctl(struct file *filp, } break; case KVM_CREATE_PIT: + u.pit_config.flags = KVM_PIT_SPEAKER_DUMMY; + goto create_pit; + case KVM_CREATE_PIT2: + r = -EFAULT; + if (copy_from_user(u.pit_config, argp, + sizeof(struct kvm_pit_config))) + goto out; + create_pit: mutex_lock(kvm-lock); r = -EEXIST; if (kvm-arch.vpit) goto create_pit_unlock; r = -ENOMEM; - kvm-arch.vpit = kvm_create_pit(kvm); + kvm-arch.vpit = kvm_create_pit(kvm, u.pit_config.flags); if (kvm-arch.vpit) r = 0; create_pit_unlock: diff --git a/include/linux/kvm.h b/include/linux/kvm.h index 3db5d8d..5575409 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -70,6 +70,14 @@ struct kvm_irqchip { } chip; }; +/* for KVM_CREATE_PIT2 */ +struct kvm_pit_config { + __u32 flags; + __u32 pad[15]; +}; + +#define KVM_PIT_SPEAKER_DUMMY 1 + #define KVM_EXIT_UNKNOWN 0 #define KVM_EXIT_EXCEPTION1 #define KVM_EXIT_IO 2 @@ -415,6 +423,7 @@ struct kvm_trace_rec { #define KVM_CAP_ASSIGN_DEV_IRQ 29 /* Another bug in KVM_SET_USER_MEMORY_REGION fixed: */ #define KVM_CAP_JOIN_MEMORY_REGIONS_WORKS 30 +#define KVM_CAP_PIT2 31 #ifdef KVM_CAP_IRQ_ROUTING @@ -498,6 +507,7 @@ struct kvm_irq_routing { #define KVM_ASSIGN_SET_MSIX_ENTRY \ _IOW(KVMIO, 0x74, struct kvm_assigned_msix_entry) #define KVM_DEASSIGN_DEV_IRQ _IOW(KVMIO,
[PATCH v4] qemu-kvm: Make PC speaker emulation aware of in-kernel PIT
When using the in-kernel PIT the speaker emulation has to synchronize the PIT state with KVM. Enhance the existing speaker sound device and allow it to take over port 0x61 by using KVM_CREATE_PIT2 where available. This unbreaks -soundhw pcspk in KVM mode. Changes in v4: - preserve full PIT state across read-modify-write - update kvm.h Changes in v3: - re-added incorrectly dropped kvm_enabled checks Changes in v2: - rebased over qemu-kvm and KVM_CREATE_PIT2 - refactored hooks in pcspk Signed-off-by: Jan Kiszka jan.kis...@siemens.com --- hw/pcspk.c | 48 kvm/kernel/include/linux/kvm.h | 10 kvm/libkvm/libkvm-x86.c| 26 +++--- 3 files changed, 76 insertions(+), 8 deletions(-) diff --git a/hw/pcspk.c b/hw/pcspk.c index ec1d0c6..c0b8347 100644 --- a/hw/pcspk.c +++ b/hw/pcspk.c @@ -27,6 +27,8 @@ #include isa.h #include audio/audio.h #include qemu-timer.h +#include i8254.h +#include qemu-kvm.h #define PCSPK_BUF_LEN 1792 #define PCSPK_SAMPLE_RATE 32000 @@ -48,6 +50,43 @@ typedef struct { static const char *s_spk = pcspk; static PCSpkState pcspk_state; +#ifdef USE_KVM_PIT +static void kvm_get_pit_ch2(PITState *pit, +struct kvm_pit_state *inkernel_state) +{ +struct kvm_pit_state pit_state; + +if (kvm_enabled() qemu_kvm_pit_in_kernel()) { +kvm_get_pit(kvm_context, pit_state); +pit-channels[2].mode = pit_state.channels[2].mode; +pit-channels[2].count = pit_state.channels[2].count; +pit-channels[2].count_load_time = pit_state.channels[2].count_load_time; +pit-channels[2].gate = pit_state.channels[2].gate; +if (inkernel_state) { +memcpy(inkernel_state, pit_state, sizeof(*inkernel_state)); +} +} +} + +static void kvm_set_pit_ch2(PITState *pit, +struct kvm_pit_state *inkernel_state) +{ +if (kvm_enabled() qemu_kvm_pit_in_kernel()) { +inkernel_state-channels[2].mode = pit-channels[2].mode; +inkernel_state-channels[2].count = pit-channels[2].count; +inkernel_state-channels[2].count_load_time = +pit-channels[2].count_load_time; +inkernel_state-channels[2].gate = pit-channels[2].gate; +kvm_set_pit(kvm_context, inkernel_state); +} +} +#else +static inline void kvm_get_pit_ch2(PITState *pit, + kvm_pit_state *inkernel_state) { } +static inline void kvm_set_pit_ch2(PITState *pit, + kvm_pit_state *inkernel_state) { } +#endif + static inline void generate_samples(PCSpkState *s) { unsigned int i; @@ -72,6 +111,8 @@ static void pcspk_callback(void *opaque, int free) PCSpkState *s = opaque; unsigned int n; +kvm_get_pit_ch2(s-pit, NULL); + if (pit_get_mode(s-pit, 2) != 3) return; @@ -121,6 +162,8 @@ static uint32_t pcspk_ioport_read(void *opaque, uint32_t addr) PCSpkState *s = opaque; int out; +kvm_get_pit_ch2(s-pit, NULL); + s-dummy_refresh_clock ^= (1 4); out = pit_get_out(s-pit, 2, qemu_get_clock(vm_clock)) 5; @@ -129,9 +172,12 @@ static uint32_t pcspk_ioport_read(void *opaque, uint32_t addr) static void pcspk_ioport_write(void *opaque, uint32_t addr, uint32_t val) { +struct kvm_pit_state inkernel_state; PCSpkState *s = opaque; const int gate = val 1; +kvm_get_pit_ch2(s-pit, inkernel_state); + s-data_on = (val 1) 1; pit_set_gate(s-pit, 2, gate); if (s-voice) { @@ -139,6 +185,8 @@ static void pcspk_ioport_write(void *opaque, uint32_t addr, uint32_t val) s-play_pos = 0; AUD_set_active_out(s-voice, gate s-data_on); } + +kvm_set_pit_ch2(s-pit, inkernel_state); } void pcspk_init(PITState *pit) diff --git a/kvm/kernel/include/linux/kvm.h b/kvm/kernel/include/linux/kvm.h index f5e9d66..5b4b90c 100644 --- a/kvm/kernel/include/linux/kvm.h +++ b/kvm/kernel/include/linux/kvm.h @@ -110,6 +110,14 @@ struct kvm_irqchip { } chip; }; +/* for KVM_CREATE_PIT2 */ +struct kvm_pit_config { + __u32 flags; + __u32 pad[15]; +}; + +#define KVM_PIT_SPEAKER_DUMMY 1 + #define KVM_EXIT_UNKNOWN 0 #define KVM_EXIT_EXCEPTION1 #define KVM_EXIT_IO 2 @@ -455,6 +463,7 @@ struct kvm_trace_rec { #define KVM_CAP_ASSIGN_DEV_IRQ 29 /* Another bug in KVM_SET_USER_MEMORY_REGION fixed: */ #define KVM_CAP_JOIN_MEMORY_REGIONS_WORKS 30 +#define KVM_CAP_PIT2 31 #ifdef KVM_CAP_IRQ_ROUTING @@ -538,6 +547,7 @@ struct kvm_irq_routing { #define KVM_ASSIGN_SET_MSIX_ENTRY \ _IOW(KVMIO, 0x74, struct kvm_assigned_msix_entry) #define KVM_DEASSIGN_DEV_IRQ _IOW(KVMIO, 0x75, struct kvm_assigned_irq) +#define KVM_CREATE_PIT2 _IOW(KVMIO, 0x76, struct kvm_pit_config) /* * ioctls for vcpu fds diff --git a/kvm/libkvm/libkvm-x86.c
KVM VT-d2?
Does KVM already take advantage of Intel VT-d2 features, e.g. interrupt remapping support? Has anyone verified how it improves interrupt delivery for PCI pass-through devices? Thanks, Anna -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[KVM-AUTOTEST PATCH] Instead of trying to remove kvm modules with rmmod, use modprobe -r, as it handles module dependencies better
In certain situations, trying to do a rmmod on the kvm modules might generate errors, as there is a chain of dependencies involved. Instead, let's use modprobe -r, as it handles dependencies. Signed-off-by: Lucas Meneghel Rodrigues mrodr...@redhat.com diff --git a/client/tests/kvm_runtest_2/kvm_install.py b/client/tests/kvm_runtes index dbf8401..c27c6c7 100755 --- a/client/tests/kvm_runtest_2/kvm_install.py +++ b/client/tests/kvm_runtest_2/kvm_install.py @@ -192,9 +192,9 @@ def __load_kvm_modules(): #utils.system(pkill qemu 1/dev/null 21, ignore_status=True) utils.system(pkill qemu, ignore_status=True) #if utils.system(grep kvm_%s /proc/modules 1/dev/null % vendor, ignore_s -utils.system(/sbin/rmmod kvm_%s % vendor, ignore_status=True) +utils.system(/sbin/modprobe -r kvm_%s % vendor, ignore_status=True) #if utils.system(grep kvm /proc/modules 1/dev/null, ignore_status=True) -utils.system(/sbin/rmmod kvm, ignore_status=True) +utils.system(/sbin/modprobe -r kvm, ignore_status=True) if utils.system(grep kvm /proc/modules 1/dev/null, ignore_status=True) = message = Failed to remove old KVM modules -- Lucas Meneghel Rodrigues Software Engineer (QE) Red Hat - Emerging Technologies -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
RE: KVM VT-d2?
We have verified VT-d2 features works with PCI passthrough on KVM. To enable it, you need to turn on interrupt remapping in kernel config. Interrupt remapping is a security/isolation feature where interrupt delivery is qualified with device's bus/device/function in interrupt remapping table entry when source ID checking is turn on. It does not directly inject interrupt to the guest OS. -Original Message- From: kvm-ow...@vger.kernel.org [mailto:kvm-ow...@vger.kernel.org] On Behalf Of Fischer, Anna Sent: Thursday, May 14, 2009 2:53 PM To: kvm@vger.kernel.org Subject: KVM VT-d2? Does KVM already take advantage of Intel VT-d2 features, e.g. interrupt remapping support? Has anyone verified how it improves interrupt delivery for PCI pass-through devices? Thanks, Anna -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [Qemu-devel] Re: Question about KVM and PC speaker
On Thu, 14 May 2009, Sebastian Herbszt wrote: malc wrote: On Wed, 13 May 2009, Sebastian Herbszt wrote: Jan Kiszka wrote: Moreover, does sound work at all with your qemu? The image I tried [1] issues two beeps after loading (obviously via direct hw access) - a good way to check general support. Note that one reason for broken host sound with qemu can be OSS. For that reason I always configure my qemu with --audio-drv-list=alsa. Thats a good hint :) Seems i used to compile qemu without --audio-drv-list. Since dsound and fmod drivers don't compile here (i likely miss some libs in my mingw), i used sdl. Don't do that. Here's a nice tutorial Kazu made that will probably help you: http://www.h7.dion.ne.jp/~qemu-win/Audio-en.html So you're saying the use of sdl for audio is not recommended? Yes. Now i can hear those two beeps with the image you suggested. Tho those are coming thru my sound card and not the hosts pc speaker (even with -soundhw pcspk, but maybe that option means something different). And it will always come through your soundcard. pcspk is not a passthrough thing. Thanks for the clarification. With INT 10h AH=0Eh i now can hear a beep too, but it doesn't stop and qemu somewhat freezes. Huh? With this INT 10h function qemu should beep once, but it does loop the beep infinitely. Normally i can exit qemu by clicking on the [x] window close icon, but while it does endlessly beep that doesn't work (vista says process doesn't respond). Using quit in the monitor window doesn't work either. Can you post some .com file sparing me from writing the code for one myself? -- mailto:av1...@comtv.ru -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
RE: KVM VT-d2?
I thought that one use case of VT-d2 interrupt remapping was to be able to safely and more efficiently deliver interrupts to the CPU that runs the particular VCPU of the guest that owns the I/O device that issues the interrupt. Shouldn't there at least be some performance (e.g. latency) improvement doing the remapping and checking in HW with a predefined table rather than multiplexing this in software in the hypervisor layer? -Original Message- From: Kay, Allen M [mailto:allen.m@intel.com] Sent: 14 May 2009 15:02 To: Fischer, Anna; kvm@vger.kernel.org Subject: RE: KVM VT-d2? We have verified VT-d2 features works with PCI passthrough on KVM. To enable it, you need to turn on interrupt remapping in kernel config. Interrupt remapping is a security/isolation feature where interrupt delivery is qualified with device's bus/device/function in interrupt remapping table entry when source ID checking is turn on. It does not directly inject interrupt to the guest OS. -Original Message- From: kvm-ow...@vger.kernel.org [mailto:kvm-ow...@vger.kernel.org] On Behalf Of Fischer, Anna Sent: Thursday, May 14, 2009 2:53 PM To: kvm@vger.kernel.org Subject: KVM VT-d2? Does KVM already take advantage of Intel VT-d2 features, e.g. interrupt remapping support? Has anyone verified how it improves interrupt delivery for PCI pass-through devices? Thanks, Anna -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
virtio_net with RSS?
Are there any plans to enhance virtio_net with receive-side scaling capabilities, so that an SMP guest OS can balance its network processing load more equally across multiple CPUs? Thanks, Anna -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
RE: KVM VT-d2?
In both interrupt remapping and no interrupt remapping cases, interrupts from the passthrough device are still delivered to the host kernel. KVM then injects the interrupt to guest via vlapic-vioapic-vmcs path. The value add from interrupt remapping is that a new source ID field in interrupt remapping table entry is checked before the interrupt is delivered to the host kernel. This prevents malicious guests with PCI passthrough devices to generate DOS attacks via DMA writes to the apic area. -Original Message- From: Fischer, Anna [mailto:anna.fisc...@hp.com] Sent: Thursday, May 14, 2009 4:12 PM To: Kay, Allen M Cc: kvm@vger.kernel.org Subject: RE: KVM VT-d2? I thought that one use case of VT-d2 interrupt remapping was to be able to safely and more efficiently deliver interrupts to the CPU that runs the particular VCPU of the guest that owns the I/O device that issues the interrupt. Shouldn't there at least be some performance (e.g. latency) improvement doing the remapping and checking in HW with a predefined table rather than multiplexing this in software in the hypervisor layer? -Original Message- From: Kay, Allen M [mailto:allen.m@intel.com] Sent: 14 May 2009 15:02 To: Fischer, Anna; kvm@vger.kernel.org Subject: RE: KVM VT-d2? We have verified VT-d2 features works with PCI passthrough on KVM. To enable it, you need to turn on interrupt remapping in kernel config. Interrupt remapping is a security/isolation feature where interrupt delivery is qualified with device's bus/device/function in interrupt remapping table entry when source ID checking is turn on. It does not directly inject interrupt to the guest OS. -Original Message- From: kvm-ow...@vger.kernel.org [mailto:kvm-ow...@vger.kernel.org] On Behalf Of Fischer, Anna Sent: Thursday, May 14, 2009 2:53 PM To: kvm@vger.kernel.org Subject: KVM VT-d2? Does KVM already take advantage of Intel VT-d2 features, e.g. interrupt remapping support? Has anyone verified how it improves interrupt delivery for PCI pass-through devices? Thanks, Anna -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[ kvm-Bugs-2638990 ] Segfault 284
Bugs item #2638990, was opened at 2009-02-25 23:35 Message generated for change (Settings changed) made by sf-robot You can respond by visiting: https://sourceforge.net/tracker/?func=detailatid=893831aid=2638990group_id=180599 Please note that this message will contain a full copy of the comment thread, including the initial issue submission, for this request, not just the latest update. Category: None Group: None Status: Closed Resolution: None Priority: 6 Private: No Submitted By: David Rasche (drasche2) Assigned to: Nobody/Anonymous (nobody) Summary: Segfault 284 Initial Comment: Host (2) Intel Xeon (E5430) Quad Core Processors (2.66GHz) 16G mem Host OS: Ubuntu 8.10 (64bit) kvm-72 libvirt 0.4.4 Guest OS Win2k3 Server (32 bit) After running for 8 to 48 hours, Win2k3 guest system crashes with no warning. Syslog shows the following segmentation fault: Feb 25 16:12:02 host-b kernel: [448190.415857] kvm[25511]: segfault at 284 ip 0043 386f sp 7fff97fa3a70 error 4 in kvm[40+19e000] this error has been confirmed on 2 different machines with exactly the same setup. We are running KVM through libvirt with the following xml setup. domain type='kvm' nameexchange/name uuide8d93082-c1db-426c-9ad3-ae651095ceb5/uuid memory4096000/memory currentMemory4096000/currentMemory vcpu3/vcpu os typehvm/type boot dev='hd'/ /os features acpi/ /features clock offset='localtime'/ on_poweroffdestroy/on_poweroff on_rebootrestart/on_reboot on_crashdestroy/on_crash devices emulator/usr/bin/kvm/emulator disk type='file' device='disk' source file='/mnt/vg0/lvol3/exchange.qcow2'/ target dev='hda' bus='ide'/ /disk disk type='block' device='disk' source dev='/dev/vg1/lv_exchdb'/ target dev='hdb' bus='ide'/ /disk disk type='file' device='cdrom' target dev='hdc' bus='ide'/ readonly/ /disk disk type='block' device='disk' source dev='/dev/vg2/lv_exchlog'/ target dev='hdd' bus='ide'/ /disk interface type='bridge' mac address='00:0c:29:cf:71:e4'/ source bridge='br0'/ /interface input type='tablet' bus='usb'/ input type='mouse' bus='ps2'/ graphics type='vnc' port='5900' listen='127.0.0.1'/ /devices /domain -- Comment By: SourceForge Robot (sf-robot) Date: 2009-05-15 02:20 Message: This Tracker item was closed automatically by the system. It was previously set to a Pending status, and the original submitter did not respond within 14 days (the time period specified by the administrator of this Tracker). -- Comment By: Simon Jagoe (ivanvimes) Date: 2009-04-30 18:47 Message: I'll start it up in gdb and watch it for the error. I'll post it when (if) it happens again. My server was up for at least a week before this occurred, so I may not be able to get it immediately. -- Comment By: Avi Kivity (avik) Date: 2009-04-30 18:36 Message: Please generate a core dump and post a stack trace: $ gdb /path/to/qemu core (gdb) backtrace -- Comment By: Simon Jagoe (ivanvimes) Date: 2009-04-30 18:26 Message: Sorry about the formatting of the kvm commandline I posted, I have re-formatted it so that sourceforge does not automagically wrap it: /usr/bin/kvm -S -M pc -m 1024 -smp 1 -name partridge \ -monitor pty -boot c \ -drive file=/dev/hare/partridge_root,if=ide,index=0,boot=on \ -drive file=/dev/hare/partridge_var,if=ide,index=1 \ -drive file=/dev/hare/partridge_opt,if=ide,index=2 \ -drive file=/dev/hare/partridge_home,if=ide,index=3 \ -net nic,macaddr=00:16:3e:30:99:7c,vlan=0 \ -net tap,fd=17,script=,vlan=0,ifname=vnet3 \ -serial none -parallel none -usb -vnc 127.0.0.1:0 -- Comment By: Simon Jagoe (ivanvimes) Date: 2009-04-30 18:24 Message: Thanks for the reply. The libvirt XML I posted calls kvm as follows: /usr/bin/kvm -S -M pc -m 1024 -smp 1 -name partridge -monitor pty -boot c \ -drive file=/dev/hare/partridge_root,if=ide,index=0,boot=on \ -drive file=/dev/hare/partridge_var,if=ide,index=1 \ -drive file=/dev/hare/partridge_opt,if=ide,index=2 \ -drive file=/dev/hare/partridge_home,if=ide,index=3 \ -net nic,macaddr=00:16:3e:30:99:7c,vlan=0 -net tap,fd=17,script=,vlan=0,ifname=vnet3 \ -serial none -parallel none -usb -vnc 127.0.0.1:0 -- Comment By: Brian Jackson (iggy_cav) Date: 2009-04-30 15:47 Message: For those of us that don't use/speak libvirt, what does command line is it using? -- Comment By: Simon Jagoe (ivanvimes) Date: 2009-04-30 08:09 Message: I am running an
Re: [KVM PATCH v7 2/3] kvm: add support for irqfd via eventfd-notification interface
On Thu, 14 May 2009, Gregory Haskins wrote: Avi Kivity wrote: Gregory Haskins wrote: KVM provides a complete virtual system environment for guests, including support for injecting interrupts modeled after the real exception/interrupt facilities present on the native platform (such as the IDT on x86). Virtual interrupts can come from a variety of sources (emulated devices, pass-through devices, etc) but all must be injected to the guest via the KVM infrastructure. This patch adds a new mechanism to inject a specific interrupt to a guest using a decoupled eventfd mechnanism: Any legal signal on the irqfd (using eventfd semantics from either userspace or kernel) will translate into an injected interrupt in the guest at the next available interrupt window. + +static void +irqfd_inject(struct work_struct *work) +{ +struct _irqfd *irqfd = container_of(work, struct _irqfd, work); +struct kvm *kvm = irqfd-kvm; + I think you need to -read() from the irqfd, otherwise the count will never clear. Yeah, and this is a disavantage to using eventfd vs a custom anon-fd implementation. However, the count is really only there for deciding whether to sleep a traditional eventfd recipient which doesn't really apply in this application. I suppose we could try to invoke the read method (or add a new method to eventfd to allow it to be cleared independent of the f_ops-read() (ala eventfd_signal() vs f_ops-write()). I'm not convinced we really need to worry about it, though. IMO we can just let the count accumulate. But if you insist this loose end should be addressed, perhaps Davide has some thoughts on how to best do this? The counter is 64bit, so at 1M IRQ/s will take about 585K years to saturate. But from a symmetry POV, it may be better to clear it. Maybe with a kernel-side eventfd_read()? - Davide -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: kvm-autotest: The automation plans?
Michael Goldish 写道: - jason wang jasow...@redhat.com wrote: sudhir kumar 写道: Hi Uri/Lucas, Do you have any plans for enhancing kvm-autotest? I was looking mainly on the following 2 aspects: (1). we have standalone migration only. Is there any plans of enhancing kvm-autotest so that we can trigger migration while a workload is running? Something like this: Start a workload(may be n instances of it). let the test execute for some time. Trigger migration. Log into the target. Check if the migration is succesful Check if the test results are consistent. We have some patches of ping pong migration and workload adding. The migration is based on public bridge and workload adding is based on running benchmark in the background of guest. (2). How can we run N parallel instances of a test? Will the current configuration be easily able to support it? Please provide your thoughts on the above features. The parallelized instances could be easily achieved through job.parallel() of autotest framework, and that is what we have used in our tests. We have make some helper routines such as get_free_port to be reentrant through file lock. We'll probably have to use file locks anyway when we work with TAP, but in VM.create(), not in get_free_port(), because we also want to prevent parallel qemu instances from choosing the same TAP device. I'm not sure how qemu handles this internally, and I'd rather be on the safe side. Do you release the file lock inside get_free_port or only after running qemu? We record the port usage and release the file lock inside get_free_port(). I agree with you that it's better to get/release the file lock in VM.create() because it is easier and it also eliminates the effort of doing lock in every helper function. For the TAP device, maybe we could give each TAP device used by qemu-kvm an random generated ifname to prevent qemu-kvm from choosing the same TAP devices. This method works well in our test. We've implemented following test cases: timedrift(already sent here), savevm/loadvm, suspend/resume, jumboframe, migration between two machines and others. We will sent it here for review in the following weeks. There are some other things could be improved: 1) Current kvm_test.cfg.sample/kvm_test.cfg is transparent to autotest server UI. This would make it hard to configure the tests in the server side. During our test, we have merged it into control and make it could be configured by editing control file function of autotest server side web UI. Would it not suffice to just modify the configuration, instead of completely define it, inside the control file? This is possible using parse_string(). For example: cfg = kvm_config.config(kvm_tests.cfg) cfg.parse_string(only weekly) cfg.parse_string(only Fedora RHEL Windows) cfg.parse_string( variants: - 1: only ide - 2: Fedora: no rtl8139 ) list = cfg.get_list() (get_list() returns the test dictionaries.) The advantage here is that we can have a standard kvm_tests.cfg that we all agree on and only rather small environment-specific modifications are made in the control file. Thanks, this way makes the things easier. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
PCI pass-through of multi-function device
Does KVM allow passing through a full multi-function PCI device to a guest, and make that device appear as a whole multi-function device rather than as multiple PCI single-function devices (e.g. Xen only does the latter where all PCI devices appear with function ID being 0 in the guest)? Thanks, Anna -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
KVM 10/Gb Ethernet PCIe passthrough with Linux/iSCSI and large block sizes
Greetings all, The first test results for Linux/iSCSI Initiators and targets for large block sizes using 10 Gb/sec Ethernet + PCIe device-passthrough into Linux/KVM guests have been posted at: http://linux-iscsi.org/index.php/KVM-LIO-Target So far, the results have been quite impressive using the Neterion X3100 series hardware with recent KVM-85 stable code (with Marcelo's patches, see the above link) on v2.6.29.2 KVM guests, and using v2.6.30-rc3 KVM Hosts. Using iSCSI RFC defined MC/S to scale a *single* KVM accessable Linux/iSCSI Logical Unit to 10 Gb/sec line-rate speeds has been successful using Core-iSCSI WRITE/READ (bi-directional) traffic using Linux-Test-Project disktest pthreaded benchmark with O_DIRECT enabled. Using Core-iSCSI MC/S w/ iSCSI READ (uni-directional) the average is about 6-7 Gb/sec, and with MC/S iSCSI WRITE (uni-directional) the average is about 5 Gb/sec to the RAMDISK_DR and FILEIO storage objects for these same streaming tests. Please see the link for more information on the tests and hardware/software setup. The tests have been run with both upstream Open-iSCSI and Core-iSCSI Initiators against Target_Core_Mod/LIO-Target v3.0 in KVM guests. It is important to note that these tests have been run with tcp_sendpage() disabled (tcp_sendpage() is enabled by default in LIO-Target and Open-iSCSI) in 10 Gb/sec KVM guests, which have been disable into order to get up running with the 10 Gb/sec hardware. 1 Gb/sec e1000e ports are stable with sendpage() in LIO-Target KVM guests, and these will be enabled in 10 Gb/sec hardware in subsequent tests. Also note that Open-iSCSI WRITEs using tcp_sendpage() have been ommited for this first run of tests. It is also important to note that both iSCSI MC/S and dm-multipath are methods to allow a single Linux/SCSI Logical Unit to scale across multiple TCP connections using the iSCSI Protocol. Both of these methods (iSCSI RFC fabric level multiplexing and OS-level SCSI Multipath) allow for means for scaling across multiple X3110 Vpaths (MSI-X TX/RX pairs), and MC/S is a method that has a low amount of overhead. Some of the future setups for KVM + 10 Gb/sec will be using dm-multipath block devices, 10 Gb/sec Ethernet PCIe multi-function mode into KVM guest, as well as PCIe SR-IOV on recent IOMMU capable hardware platforms. Many thanks to the Neterion folks and Sheng Yang for answering my questions! --nab -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCHv6 2/4] virtio: find_vqs/del_vqs virtio operations
And here's the fixup patch I applied: diff --git a/drivers/lguest/lguest_device.c b/drivers/lguest/lguest_device.c --- a/drivers/lguest/lguest_device.c +++ b/drivers/lguest/lguest_device.c @@ -318,11 +318,11 @@ static void lg_del_vqs(struct virtio_dev struct virtqueue *vq, *n; list_for_each_entry_safe(vq, n, vdev-vqs, list) - kvm_del_vq(vq); + lg_del_vq(vq); } static int lg_find_vqs(struct virtio_device *vdev, unsigned nvqs, - struct virtqueue *vqs[] + struct virtqueue *vqs[], vq_callback_t *callbacks[], const char *names[]) { @@ -331,7 +331,7 @@ static int lg_find_vqs(struct virtio_dev /* We must have this many virtqueues. */ if (nvqs ldev-desc-num_vq) - return ERR_PTR(-ENOENT); + return -ENOENT; for (i = 0; i nvqs; ++i) { vqs[i] = lg_find_vq(vdev, i, callbacks[i], names[i]); @@ -341,7 +341,7 @@ static int lg_find_vqs(struct virtio_dev return 0; error: - vp_del_vqs(vdev); + lg_del_vqs(vdev); return PTR_ERR(vqs[i]); } diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c --- a/net/9p/trans_virtio.c +++ b/net/9p/trans_virtio.c @@ -260,7 +260,7 @@ static int p9_virtio_probe(struct virtio return 0; out_free_vq: - vdev-config-del_vq(chan-vq); + vdev-config-del_vqs(vdev); fail: mutex_lock(virtio_9p_lock); chan_index--; @@ -331,7 +331,7 @@ static void p9_virtio_remove(struct virt BUG_ON(chan-inuse); if (chan-initialized) { - vdev-config-del_vq(chan-vq); + vdev-config-del_vqs(vdev); chan-initialized = false; } } -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html