[Bug 11963] S3: second resume fails unless BIOS Intel TXT Feature disabled - Thinkpad W500
https://bugzilla.kernel.org/show_bug.cgi?id=11963 Zhang Rui rui.zh...@intel.com changed: What|Removed |Added Blocks||56331 -- Configure bugmail: https://bugzilla.kernel.org/userprefs.cgi?tab=email --- You are receiving this mail because: --- You are watching the assignee of the bug. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH v8 4/7] KVM: Add reset/restore rtc_status support
On Mon, Apr 08, 2013 at 10:17:46PM +0800, Yang Zhang wrote: From: Yang Zhang yang.z.zh...@intel.com Signed-off-by: Yang Zhang yang.z.zh...@intel.com --- arch/x86/kvm/lapic.c |9 +++ arch/x86/kvm/lapic.h |2 + virt/kvm/ioapic.c| 60 ++ virt/kvm/ioapic.h|1 + 4 files changed, 72 insertions(+), 0 deletions(-) diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 0b73402..6796218 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -94,6 +94,14 @@ static inline int apic_test_vector(int vec, void *bitmap) return test_bit(VEC_POS(vec), (bitmap) + REG_POS(vec)); } +bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector) +{ + struct kvm_lapic *apic = vcpu-arch.apic; + + return apic_test_vector(vector, apic-regs + APIC_ISR) || + apic_test_vector(vector, apic-regs + APIC_IRR); +} + static inline void apic_set_vector(int vec, void *bitmap) { set_bit(VEC_POS(vec), (bitmap) + REG_POS(vec)); @@ -1618,6 +1626,7 @@ void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu, apic-highest_isr_cache = -1; kvm_x86_ops-hwapic_isr_update(vcpu-kvm, apic_find_highest_isr(apic)); kvm_make_request(KVM_REQ_EVENT, vcpu); + kvm_rtc_eoi_tracking_restore_one(vcpu); } void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu) diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h index 3e5a431..16304b1 100644 --- a/arch/x86/kvm/lapic.h +++ b/arch/x86/kvm/lapic.h @@ -166,4 +166,6 @@ static inline bool kvm_apic_has_events(struct kvm_vcpu *vcpu) return vcpu-arch.apic-pending_events; } +bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector); + #endif diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c index 27ae8dd..4699180 100644 --- a/virt/kvm/ioapic.c +++ b/virt/kvm/ioapic.c @@ -90,6 +90,64 @@ static unsigned long ioapic_read_indirect(struct kvm_ioapic *ioapic, return result; } +static void rtc_irq_reset(struct kvm_ioapic *ioapic) rtc_irq_eoi_tracking_reset() +{ + ioapic-rtc_status.pending_eoi = 0; + bitmap_zero(ioapic-rtc_status.dest_map, KVM_MAX_VCPUS); +} + +static void __rtc_irq_eoi_tracking_restore_one(struct kvm_vcpu *vcpu, + int vector) +{ + bool new_val, old_val; + struct kvm_ioapic *ioapic = vcpu-kvm-arch.vioapic; + union kvm_ioapic_redirect_entry *e; + + e = ioapic-redirtbl[RTC_GSI]; + if (!kvm_apic_match_dest(vcpu, NULL, 0, e-fields.dest_id, + e-fields.dest_mode)) + return; + + new_val = kvm_apic_pending_eoi(vcpu, vector); + old_val = test_bit(vcpu-vcpu_id, ioapic-rtc_status.dest_map); + + if (new_val == old_val) + return; + + if (new_val) { + __set_bit(vcpu-vcpu_id, ioapic-rtc_status.dest_map); + ioapic-rtc_status.pending_eoi++; + } else { + __clear_bit(vcpu-vcpu_id, ioapic-rtc_status.dest_map); + ioapic-rtc_status.pending_eoi--; + } WARN_ON(ioapic-rtc_status.pending_eoi 0); +} + +void kvm_rtc_eoi_tracking_restore_one(struct kvm_vcpu *vcpu) +{ + struct kvm_ioapic *ioapic = vcpu-kvm-arch.vioapic; + int vector; + + vector = ioapic-redirtbl[RTC_GSI].fields.vector; Do not access ioapic outside of the lock. Also since you access ioapic-redirtbl[RTC_GSI] in __rtc_irq_eoi_tracking_restore_one() anyway what's the point passing vector to it? + spin_lock(ioapic-lock); + __rtc_irq_eoi_tracking_restore_one(vcpu, vector); + spin_unlock(ioapic-lock); +} + +static void kvm_rtc_eoi_tracking_restore_all(struct kvm_ioapic *ioapic) +{ + struct kvm_vcpu *vcpu; + int i, vector; + + if (RTC_GSI = IOAPIC_NUM_PINS) + return; + + rtc_irq_reset(ioapic); + vector = ioapic-redirtbl[RTC_GSI].fields.vector; + kvm_for_each_vcpu(i, vcpu, ioapic-kvm) + __rtc_irq_eoi_tracking_restore_one(vcpu, vector); +} + static int ioapic_service(struct kvm_ioapic *ioapic, unsigned int idx) { union kvm_ioapic_redirect_entry *pent; @@ -428,6 +486,7 @@ void kvm_ioapic_reset(struct kvm_ioapic *ioapic) ioapic-ioregsel = 0; ioapic-irr = 0; ioapic-id = 0; + rtc_irq_reset(ioapic); update_handled_vectors(ioapic); } @@ -494,6 +553,7 @@ int kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state) memcpy(ioapic, state, sizeof(struct kvm_ioapic_state)); update_handled_vectors(ioapic); kvm_ioapic_make_eoibitmap_request(kvm); + kvm_rtc_eoi_tracking_restore_all(ioapic); spin_unlock(ioapic-lock); return 0; } diff --git a/virt/kvm/ioapic.h b/virt/kvm/ioapic.h index 761e5b5..313fc4e 100644 --- a/virt/kvm/ioapic.h +++ b/virt/kvm/ioapic.h @@ -79,6 +79,7 @@ static inline struct kvm_ioapic *ioapic_irqchip(struct kvm *kvm)
RE: [PATCH v8 4/7] KVM: Add reset/restore rtc_status support
Gleb Natapov wrote on 2013-04-09: On Mon, Apr 08, 2013 at 10:17:46PM +0800, Yang Zhang wrote: From: Yang Zhang yang.z.zh...@intel.com Signed-off-by: Yang Zhang yang.z.zh...@intel.com --- arch/x86/kvm/lapic.c |9 +++ arch/x86/kvm/lapic.h |2 + virt/kvm/ioapic.c| 60 ++ virt/kvm/ioapic.h |1 + 4 files changed, 72 insertions(+), 0 deletions(-) diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 0b73402..6796218 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -94,6 +94,14 @@ static inline int apic_test_vector(int vec, void *bitmap) return test_bit(VEC_POS(vec), (bitmap) + REG_POS(vec)); } +bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector) +{ +struct kvm_lapic *apic = vcpu-arch.apic; + +return apic_test_vector(vector, apic-regs + APIC_ISR) || +apic_test_vector(vector, apic-regs + APIC_IRR); +} + static inline void apic_set_vector(int vec, void *bitmap) { set_bit(VEC_POS(vec), (bitmap) + REG_POS(vec)); @@ -1618,6 +1626,7 @@ void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu, apic-highest_isr_cache = -1; kvm_x86_ops-hwapic_isr_update(vcpu-kvm, apic_find_highest_isr(apic)); kvm_make_request(KVM_REQ_EVENT, vcpu); + kvm_rtc_eoi_tracking_restore_one(vcpu); } void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu) diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h index 3e5a431..16304b1 100644 --- a/arch/x86/kvm/lapic.h +++ b/arch/x86/kvm/lapic.h @@ -166,4 +166,6 @@ static inline bool kvm_apic_has_events(struct kvm_vcpu *vcpu) return vcpu-arch.apic-pending_events; } +bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector); + #endif diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c index 27ae8dd..4699180 100644 --- a/virt/kvm/ioapic.c +++ b/virt/kvm/ioapic.c @@ -90,6 +90,64 @@ static unsigned long ioapic_read_indirect(struct kvm_ioapic *ioapic, return result; } +static void rtc_irq_reset(struct kvm_ioapic *ioapic) rtc_irq_eoi_tracking_reset() Sure. +{ +ioapic-rtc_status.pending_eoi = 0; +bitmap_zero(ioapic-rtc_status.dest_map, KVM_MAX_VCPUS); +} + +static void __rtc_irq_eoi_tracking_restore_one(struct kvm_vcpu *vcpu, +int vector) +{ +bool new_val, old_val; +struct kvm_ioapic *ioapic = vcpu-kvm-arch.vioapic; +union kvm_ioapic_redirect_entry *e; + +e = ioapic-redirtbl[RTC_GSI]; +if (!kvm_apic_match_dest(vcpu, NULL, 0, e-fields.dest_id, +e-fields.dest_mode)) +return; + +new_val = kvm_apic_pending_eoi(vcpu, vector); +old_val = test_bit(vcpu-vcpu_id, ioapic-rtc_status.dest_map); + +if (new_val == old_val) +return; + +if (new_val) { +__set_bit(vcpu-vcpu_id, ioapic-rtc_status.dest_map); +ioapic-rtc_status.pending_eoi++; +} else { +__clear_bit(vcpu-vcpu_id, ioapic-rtc_status.dest_map); +ioapic-rtc_status.pending_eoi--; +} WARN_ON(ioapic-rtc_status.pending_eoi 0); Sure. +} + +void kvm_rtc_eoi_tracking_restore_one(struct kvm_vcpu *vcpu) +{ +struct kvm_ioapic *ioapic = vcpu-kvm-arch.vioapic; +int vector; + +vector = ioapic-redirtbl[RTC_GSI].fields.vector; Do not access ioapic outside of the lock. Also since you access ioapic-redirtbl[RTC_GSI] in __rtc_irq_eoi_tracking_restore_one() anyway what's the point passing vector to it? Right. +spin_lock(ioapic-lock); +__rtc_irq_eoi_tracking_restore_one(vcpu, vector); +spin_unlock(ioapic-lock); +} + +static void kvm_rtc_eoi_tracking_restore_all(struct kvm_ioapic *ioapic) +{ +struct kvm_vcpu *vcpu; +int i, vector; + +if (RTC_GSI = IOAPIC_NUM_PINS) +return; + +rtc_irq_reset(ioapic); +vector = ioapic-redirtbl[RTC_GSI].fields.vector; +kvm_for_each_vcpu(i, vcpu, ioapic-kvm) +__rtc_irq_eoi_tracking_restore_one(vcpu, vector); +} + static int ioapic_service(struct kvm_ioapic *ioapic, unsigned int idx) { union kvm_ioapic_redirect_entry *pent; @@ -428,6 +486,7 @@ void kvm_ioapic_reset(struct kvm_ioapic *ioapic) ioapic-ioregsel = 0; ioapic-irr = 0;ioapic-id = 0; + rtc_irq_reset(ioapic); update_handled_vectors(ioapic); } @@ -494,6 +553,7 @@ int kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state) memcpy(ioapic, state, sizeof(struct kvm_ioapic_state)); update_handled_vectors(ioapic); kvm_ioapic_make_eoibitmap_request(kvm); + kvm_rtc_eoi_tracking_restore_all(ioapic); spin_unlock(ioapic-lock); return 0; } diff --git a/virt/kvm/ioapic.h b/virt/kvm/ioapic.h index 761e5b5..313fc4e 100644 --- a/virt/kvm/ioapic.h +++ b/virt/kvm/ioapic.h @@ -79,6 +79,7 @@ static inline struct kvm_ioapic *ioapic_irqchip(struct kvm *kvm) return
Re: [PATCH v8 7/7] KVM: Use eoi to track RTC interrupt delivery status
On Mon, Apr 08, 2013 at 10:17:49PM +0800, Yang Zhang wrote: From: Yang Zhang yang.z.zh...@intel.com Current interrupt coalescing logci which only used by RTC has conflict with Posted Interrupt. This patch introduces a new mechinism to use eoi to track interrupt: When delivering an interrupt to vcpu, the pending_eoi set to number of vcpu that received the interrupt. And decrease it when each vcpu writing eoi. No subsequent RTC interrupt can deliver to vcpu until all vcpus write eoi. Signed-off-by: Yang Zhang yang.z.zh...@intel.com --- virt/kvm/ioapic.c | 41 - 1 files changed, 40 insertions(+), 1 deletions(-) diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c index 8d1f662..197ef97 100644 --- a/virt/kvm/ioapic.c +++ b/virt/kvm/ioapic.c @@ -149,6 +149,29 @@ static void kvm_rtc_eoi_tracking_restore_all(struct kvm_ioapic *ioapic) __rtc_irq_eoi_tracking_restore_one(vcpu, vector); } +static void rtc_irq_ack_eoi(struct kvm_ioapic *ioapic, struct kvm_vcpu *vcpu, + int irq) rtc_irq_eoi() drop ack. We sometimes call EOI ack, but putting ack and eoi in the name is to much. +{ + if (irq != RTC_GSI) + return; + Lets move the check to the caller: if (i == RTC_GSI) rtc_irq_eoi() + if (test_and_clear_bit(vcpu-vcpu_id, ioapic-rtc_status.dest_map)) + --ioapic-rtc_status.pending_eoi; + + WARN_ON(ioapic-rtc_status.pending_eoi 0); +} + +static bool rtc_irq_check(struct kvm_ioapic *ioapic, int irq, bool line_status) rtc_irq_check_coalesced() +{ + if (irq != RTC_GSI || !line_status) + return false; + + if (ioapic-rtc_status.pending_eoi 0) + return true; /* coalesced */ + + return false; +} + static int ioapic_service(struct kvm_ioapic *ioapic, unsigned int idx, bool line_status) { @@ -262,6 +285,7 @@ static int ioapic_deliver(struct kvm_ioapic *ioapic, int irq, bool line_status) { union kvm_ioapic_redirect_entry *entry = ioapic-redirtbl[irq]; struct kvm_lapic_irq irqe; + int ret; ioapic_debug(dest=%x dest_mode=%x delivery_mode=%x vector=%x trig_mode=%x\n, @@ -277,7 +301,15 @@ static int ioapic_deliver(struct kvm_ioapic *ioapic, int irq, bool line_status) irqe.level = 1; irqe.shorthand = 0; - return kvm_irq_delivery_to_apic(ioapic-kvm, NULL, irqe, NULL); + if (irq == RTC_GSI line_status) { + BUG_ON(ioapic-rtc_status.pending_eoi != 0); + ret = kvm_irq_delivery_to_apic(ioapic-kvm, NULL, irqe, + ioapic-rtc_status.dest_map); + ioapic-rtc_status.pending_eoi = ret; + } else + ret = kvm_irq_delivery_to_apic(ioapic-kvm, NULL, irqe, NULL); + + return ret; } int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int irq_source_id, @@ -301,6 +333,11 @@ int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int irq_source_id, ret = 1; } else { int edge = (entry.fields.trig_mode == IOAPIC_EDGE_TRIG); + + if (rtc_irq_check(ioapic, irq, line_status)) { + ret = 0; /* coalesced */ + goto out; + } ioapic-irr |= mask; if ((edge old_irr != ioapic-irr) || (!edge !entry.fields.remote_irr)) @@ -308,6 +345,7 @@ int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int irq_source_id, else ret = 0; /* report coalesced interrupt */ } +out: trace_kvm_ioapic_set_irq(entry.bits, irq, ret == 0); spin_unlock(ioapic-lock); @@ -335,6 +373,7 @@ static void __kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu, if (ent-fields.vector != vector) continue; + rtc_irq_ack_eoi(ioapic, vcpu, i); /* * We are dropping lock while calling ack notifiers because ack * notifier callbacks for assigned devices call into IOAPIC -- 1.7.1 -- Gleb. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH v8 0/7] Use eoi to track RTC interrupt delivery status
On Mon, Apr 08, 2013 at 10:17:42PM +0800, Yang Zhang wrote: From: Yang Zhang yang.z.zh...@intel.com Current interrupt coalescing logci which only used by RTC has conflict with Posted Interrupt. This patch introduces a new mechinism to use eoi to track interrupt: When delivering an interrupt to vcpu, the pending_eoi set to number of vcpu that received the interrupt. And decrease it when each vcpu writing eoi. No subsequent RTC interrupt can deliver to vcpu until all vcpus write eoi. Only minor comment from me to this one. Paolo, Marcelo any comments? Changes from v7 to v8 * Revamping restore code. * Add BUG_ON to check pending_eoi. * Rebase on top of KVM. Changes from v6 to v7 * Only track the RTC interrupt when userspace uses *_LINE_* ioctl. * Call rtc_irq_restore() after lapic is restored. * Rebase on top of KVM. Changes from v5 to v6 * Move set dest_map logic into __apic_accept_irq(). * Use RTC_GSI to distinguish different platform, and drop all CONFIG_X86. * Rebase on top of KVM. Changes from v4 to v5 * Calculate destination vcpu on interrupt injection not hook into ioapic modification. * Rebase on top of KVM. Yang Zhang (7): KVM: Add vcpu info to ioapic_update_eoi() KVM: Introduce struct rtc_status KVM: Return destination vcpu on interrupt injection KVM: Add reset/restore rtc_status support KVM: Force vmexit with virtual interrupt delivery KVM: Let ioapic know the irq line status KVM: Use eoi to track RTC interrupt delivery status arch/x86/kvm/i8254.c |4 +- arch/x86/kvm/lapic.c | 36 + arch/x86/kvm/lapic.h |7 ++- arch/x86/kvm/x86.c |6 ++- include/linux/kvm_host.h | 11 +++-- virt/kvm/assigned-dev.c | 13 +++-- virt/kvm/eventfd.c | 15 +++-- virt/kvm/ioapic.c| 133 -- virt/kvm/ioapic.h| 20 ++- virt/kvm/irq_comm.c | 31 ++- virt/kvm/kvm_main.c |3 +- 11 files changed, 214 insertions(+), 65 deletions(-) -- Gleb. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH-v2 1/2] virtio-scsi: create VirtIOSCSICommon
Il 08/04/2013 23:59, Anthony Liguori ha scritto: This patch refactors existing virtio-scsi code into VirtIOSCSICommon in order to allow virtio_scsi_init_common() to be used by both internal virtio_scsi_init() and external vhost-scsi-pci code. Changes in Patch-v2: - Move -get_features() assignment to virtio_scsi_init() instead of virtio_scsi_init_common() Any reason we're not doing this as a QOM base class? Similiar to how the in-kernel PIT/PIC work using a common base class... Because when the patch was written virtio-scsi was not a QOM class. Paolo -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH v8 2/7] KVM: VMX: Register a new IPI for posted interrupt
On Mon, Apr 08, 2013 at 10:23:17PM +0800, Yang Zhang wrote: From: Yang Zhang yang.z.zh...@intel.com Posted Interrupt feature requires a special IPI to deliver posted interrupt to guest. And it should has a high priority so the interrupt will not be blocked by others. Normally, the posted interrupt will be consumed by vcpu if target vcpu is running and transparent to OS. But in some cases, the interrupt will arrive when target vcpu is scheduled out. And host will see it. So we need to register a dump handler to handle it. Ingo can I add your ACK to this one? In the past you agreed to the approach. Signed-off-by: Yang Zhang yang.z.zh...@intel.com --- arch/x86/include/asm/entry_arch.h |4 arch/x86/include/asm/hardirq.h |3 +++ arch/x86/include/asm/hw_irq.h |1 + arch/x86/include/asm/irq_vectors.h |5 + arch/x86/kernel/entry_64.S |5 + arch/x86/kernel/irq.c | 22 ++ arch/x86/kernel/irqinit.c |4 7 files changed, 44 insertions(+), 0 deletions(-) diff --git a/arch/x86/include/asm/entry_arch.h b/arch/x86/include/asm/entry_arch.h index 40afa00..9bd4eca 100644 --- a/arch/x86/include/asm/entry_arch.h +++ b/arch/x86/include/asm/entry_arch.h @@ -19,6 +19,10 @@ BUILD_INTERRUPT(reboot_interrupt,REBOOT_VECTOR) BUILD_INTERRUPT(x86_platform_ipi, X86_PLATFORM_IPI_VECTOR) +#ifdef CONFIG_HAVE_KVM +BUILD_INTERRUPT(kvm_posted_intr_ipi, POSTED_INTR_VECTOR) +#endif + /* * every pentium local APIC has two 'local interrupts', with a * soft-definable vector attached to both interrupts, one of diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h index 81f04ce..ab0ae1a 100644 --- a/arch/x86/include/asm/hardirq.h +++ b/arch/x86/include/asm/hardirq.h @@ -12,6 +12,9 @@ typedef struct { unsigned int irq_spurious_count; unsigned int icr_read_retry_count; #endif +#ifdef CONFIG_HAVE_KVM + unsigned int kvm_posted_intr_ipis; +#endif unsigned int x86_platform_ipis; /* arch dependent */ unsigned int apic_perf_irqs; unsigned int apic_irq_work_irqs; diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h index 10a78c3..1da97ef 100644 --- a/arch/x86/include/asm/hw_irq.h +++ b/arch/x86/include/asm/hw_irq.h @@ -28,6 +28,7 @@ /* Interrupt handlers registered during init_IRQ */ extern void apic_timer_interrupt(void); extern void x86_platform_ipi(void); +extern void kvm_posted_intr_ipi(void); extern void error_interrupt(void); extern void irq_work_interrupt(void); diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h index aac5fa6..5702d7e 100644 --- a/arch/x86/include/asm/irq_vectors.h +++ b/arch/x86/include/asm/irq_vectors.h @@ -102,6 +102,11 @@ */ #define X86_PLATFORM_IPI_VECTOR 0xf7 +/* Vector for KVM to deliver posted interrupt IPI */ +#ifdef CONFIG_HAVE_KVM +#define POSTED_INTR_VECTOR 0xf2 +#endif + /* * IRQ work vector: */ diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index c1d01e6..7272089 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -1166,6 +1166,11 @@ apicinterrupt LOCAL_TIMER_VECTOR \ apicinterrupt X86_PLATFORM_IPI_VECTOR \ x86_platform_ipi smp_x86_platform_ipi +#ifdef CONFIG_HAVE_KVM +apicinterrupt POSTED_INTR_VECTOR \ + kvm_posted_intr_ipi smp_kvm_posted_intr_ipi +#endif + apicinterrupt THRESHOLD_APIC_VECTOR \ threshold_interrupt smp_threshold_interrupt apicinterrupt THERMAL_APIC_VECTOR \ diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index e4595f1..6ae6ea1 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -228,6 +228,28 @@ void smp_x86_platform_ipi(struct pt_regs *regs) set_irq_regs(old_regs); } +#ifdef CONFIG_HAVE_KVM +/* + * Handler for POSTED_INTERRUPT_VECTOR. + */ +void smp_kvm_posted_intr_ipi(struct pt_regs *regs) +{ + struct pt_regs *old_regs = set_irq_regs(regs); + + ack_APIC_irq(); + + irq_enter(); + + exit_idle(); + + inc_irq_stat(kvm_posted_intr_ipis); + + irq_exit(); + + set_irq_regs(old_regs); +} +#endif + EXPORT_SYMBOL_GPL(vector_used_by_percpu_irq); #ifdef CONFIG_HOTPLUG_CPU diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c index 7dc4e45..a2a1fbc 100644 --- a/arch/x86/kernel/irqinit.c +++ b/arch/x86/kernel/irqinit.c @@ -172,6 +172,10 @@ static void __init apic_intr_init(void) /* IPI for X86 platform specific use */ alloc_intr_gate(X86_PLATFORM_IPI_VECTOR, x86_platform_ipi); +#ifdef CONFIG_HAVE_KVM + /* IPI for KVM to deliver posted interrupt */ + alloc_intr_gate(POSTED_INTR_VECTOR, kvm_posted_intr_ipi); +#endif /* IPI vectors for APIC spurious and error interrupts */ alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt);
Re: [PATCH v8 4/7] KVM: Call common update function when ioapic entry changed.
On Mon, Apr 08, 2013 at 10:23:19PM +0800, Yang Zhang wrote: From: Yang Zhang yang.z.zh...@intel.com Both TMR and EOI exit bitmap need to be updated when ioapic changed or vcpu's id/ldr/dfr changed. So use common function instead eoi exit bitmap specific function. Signed-off-by: Yang Zhang yang.z.zh...@intel.com --- arch/ia64/kvm/lapic.h|6 -- arch/x86/kvm/lapic.c |4 ++-- arch/x86/kvm/lapic.h |1 + arch/x86/kvm/vmx.c |3 +++ arch/x86/kvm/x86.c | 11 +++ include/linux/kvm_host.h |4 ++-- virt/kvm/ioapic.c| 22 +- virt/kvm/ioapic.h|6 ++ virt/kvm/irq_comm.c |4 ++-- virt/kvm/kvm_main.c |4 ++-- 10 files changed, 34 insertions(+), 31 deletions(-) diff --git a/arch/ia64/kvm/lapic.h b/arch/ia64/kvm/lapic.h index c3e2935..c5f92a9 100644 --- a/arch/ia64/kvm/lapic.h +++ b/arch/ia64/kvm/lapic.h @@ -27,10 +27,4 @@ int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq); #define kvm_apic_present(x) (true) #define kvm_lapic_enabled(x) (true) -static inline bool kvm_apic_vid_enabled(void) -{ - /* IA64 has no apicv supporting, do nothing here */ - return false; -} - #endif diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 6796218..6c83969 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -136,7 +136,7 @@ static inline void apic_set_spiv(struct kvm_lapic *apic, u32 val) apic_set_reg(apic, APIC_SPIV, val); } -static inline int apic_enabled(struct kvm_lapic *apic) +int apic_enabled(struct kvm_lapic *apic) Move the function into lapic.h and leave it inline. { return kvm_apic_sw_enabled(apic) kvm_apic_hw_enabled(apic); } @@ -217,7 +217,7 @@ out: if (old) kfree_rcu(old, rcu); - kvm_ioapic_make_eoibitmap_request(kvm); + kvm_vcpu_request_scan_ioapic(kvm); } static inline void kvm_apic_set_id(struct kvm_lapic *apic, u8 id) diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h index 16304b1..a2e2c6a 100644 --- a/arch/x86/kvm/lapic.h +++ b/arch/x86/kvm/lapic.h @@ -167,5 +167,6 @@ static inline bool kvm_apic_has_events(struct kvm_vcpu *vcpu) } bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector); +int apic_enabled(struct kvm_lapic *apic); #endif diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 05da991..5637a8a 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -6415,6 +6415,9 @@ static void vmx_hwapic_irr_update(struct kvm_vcpu *vcpu, int max_irr) static void vmx_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap) { + if (!vmx_vm_has_apicv(vcpu-kvm)) + return; + vmcs_write64(EOI_EXIT_BITMAP0, eoi_exit_bitmap[0]); vmcs_write64(EOI_EXIT_BITMAP1, eoi_exit_bitmap[1]); vmcs_write64(EOI_EXIT_BITMAP2, eoi_exit_bitmap[2]); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 5b146d2..53dc96f 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -5649,13 +5649,16 @@ static void kvm_gen_update_masterclock(struct kvm *kvm) #endif } -static void update_eoi_exitmap(struct kvm_vcpu *vcpu) +static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu) { u64 eoi_exit_bitmap[4]; + if (!apic_enabled(vcpu-arch.apic)) + return; + memset(eoi_exit_bitmap, 0, 32); - kvm_ioapic_calculate_eoi_exitmap(vcpu, eoi_exit_bitmap); + kvm_ioapic_scan_entry(vcpu, eoi_exit_bitmap); kvm_x86_ops-load_eoi_exitmap(vcpu, eoi_exit_bitmap); } @@ -5712,8 +5715,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) kvm_handle_pmu_event(vcpu); if (kvm_check_request(KVM_REQ_PMI, vcpu)) kvm_deliver_pmi(vcpu); - if (kvm_check_request(KVM_REQ_EOIBITMAP, vcpu)) - update_eoi_exitmap(vcpu); + if (kvm_check_request(KVM_REQ_SCAN_IOAPIC, vcpu)) + vcpu_scan_ioapic(vcpu); } if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) { diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 7bcdb6b..6f49d9d 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -126,7 +126,7 @@ static inline bool is_error_page(struct page *page) #define KVM_REQ_MASTERCLOCK_UPDATE 19 #define KVM_REQ_MCLOCK_INPROGRESS 20 #define KVM_REQ_EPR_EXIT 21 -#define KVM_REQ_EOIBITMAP 22 +#define KVM_REQ_SCAN_IOAPIC 22 #define KVM_USERSPACE_IRQ_SOURCE_ID 0 #define KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID 1 @@ -572,7 +572,7 @@ void kvm_put_guest_fpu(struct kvm_vcpu *vcpu); void kvm_flush_remote_tlbs(struct kvm *kvm); void kvm_reload_remote_mmus(struct kvm *kvm); void kvm_make_mclock_inprogress_request(struct kvm *kvm); -void kvm_make_update_eoibitmap_request(struct kvm *kvm); +void
Re: [PATCH v8 7/7] KVM: VMX: Use posted interrupt to deliver virtual interrupt
On Mon, Apr 08, 2013 at 10:23:22PM +0800, Yang Zhang wrote: From: Yang Zhang yang.z.zh...@intel.com If posted interrupt is avaliable, then uses it to inject virtual interrupt to guest. Signed-off-by: Yang Zhang yang.z.zh...@intel.com --- arch/x86/kvm/lapic.c | 29 ++--- arch/x86/kvm/vmx.c |2 +- arch/x86/kvm/x86.c |1 + 3 files changed, 20 insertions(+), 12 deletions(-) diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 8948979..46a4cca 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -353,6 +353,7 @@ static inline int apic_find_highest_irr(struct kvm_lapic *apic) if (!apic-irr_pending) return -1; + kvm_x86_ops-sync_pir_to_irr(apic-vcpu); result = apic_search_irr(apic); ASSERT(result == -1 || result = 16); @@ -683,18 +684,24 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, if (dest_map) __set_bit(vcpu-vcpu_id, dest_map); - result = !apic_test_and_set_irr(vector, apic); - trace_kvm_apic_accept_irq(vcpu-vcpu_id, delivery_mode, - trig_mode, vector, !result); - if (!result) { - if (trig_mode) - apic_debug(level trig mode repeatedly for - vector %d, vector); - break; - } + if (kvm_x86_ops-deliver_posted_interrupt) { + result = 1; + kvm_x86_ops-deliver_posted_interrupt(vcpu, vector); + } else { + result = !apic_test_and_set_irr(vector, apic); + + trace_kvm_apic_accept_irq(vcpu-vcpu_id, delivery_mode, + trig_mode, vector, !result); Missed that in previous review. Do no drop tracing for PI case. + if (!result) { + if (trig_mode) + apic_debug(level trig mode repeatedly + for vector %d, vector); + break; + } - kvm_make_request(KVM_REQ_EVENT, vcpu); - kvm_vcpu_kick(vcpu); + kvm_make_request(KVM_REQ_EVENT, vcpu); + kvm_vcpu_kick(vcpu); + } break; case APIC_DM_REMRD: diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 3de2d7f..cd1c6ff 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -84,7 +84,7 @@ module_param(vmm_exclusive, bool, S_IRUGO); static bool __read_mostly fasteoi = 1; module_param(fasteoi, bool, S_IRUGO); -static bool __read_mostly enable_apicv; +static bool __read_mostly enable_apicv = 1; module_param(enable_apicv, bool, S_IRUGO); /* diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 72be079..486f627 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2685,6 +2685,7 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s) { + kvm_x86_ops-sync_pir_to_irr(vcpu); memcpy(s-regs, vcpu-arch.apic-regs, sizeof *s); return 0; -- 1.7.1 -- Gleb. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 5/7 v3] KVM: PPC: exit to user space on ehpriv instruction
On 04/08/2013 06:32 PM, Bharat Bhushan wrote: From: Bharat Bhushan bharat.bhus...@freescale.com ehpriv instruction is used for setting software breakpoints by user space. This patch adds support to exit to user space with run-debug have relevant information. Signed-off-by: Bharat Bhushan bharat.bhus...@freescale.com --- arch/powerpc/kvm/e500_emulate.c | 10 ++ 1 files changed, 10 insertions(+), 0 deletions(-) diff --git a/arch/powerpc/kvm/e500_emulate.c b/arch/powerpc/kvm/e500_emulate.c index e78f353..cefdd38 100644 --- a/arch/powerpc/kvm/e500_emulate.c +++ b/arch/powerpc/kvm/e500_emulate.c @@ -26,6 +26,7 @@ #define XOP_TLBRE 946 #define XOP_TLBWE 978 #define XOP_TLBILX 18 +#define XOP_EHPRIV 270 #ifdef CONFIG_KVM_E500MC static int dbell2prio(ulong param) @@ -130,6 +131,15 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, emulated = kvmppc_e500_emul_tlbivax(vcpu, ea); break; + case XOP_EHPRIV: + run-exit_reason = KVM_EXIT_DEBUG; IIRC, the ehpriv instruction should generate a Hypervisor Privilege Exception to trap into the Hypervisor proactive. And we can use this ability to design something conveniently. And so, that is not only for the debug mechanism like you did. So here if 'run-exit_reason' is fixed to KVM_EXIT_DEBUG, how to distinguish other scenarios? So as I understand, we should use 'ehpriv oc' exactly then resolve 'oc' further to go different cases, right? Tiejun + run-debug.arch.address = vcpu-arch.pc; + run-debug.arch.status = 0; + kvmppc_account_exit(vcpu, DEBUG_EXITS); + emulated = EMULATE_EXIT_USER; + *advance = 0; + break; + default: emulated = EMULATE_FAIL; } -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v5 0/3] tcm_vhost hotplug
Asias He (3): tcm_vhost: Introduce tcm_vhost_check_feature() tcm_vhost: Add helper to check if endpoint is setup tcm_vhost: Add hotplug/hotunplug support drivers/vhost/tcm_vhost.c | 236 +- drivers/vhost/tcm_vhost.h | 10 ++ 2 files changed, 242 insertions(+), 4 deletions(-) -- 1.8.1.4 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v5 1/3] tcm_vhost: Introduce tcm_vhost_check_feature()
This helper is useful to check if a feature is supported. Signed-off-by: Asias He as...@redhat.com Reviewed-by: Stefan Hajnoczi stefa...@redhat.com --- drivers/vhost/tcm_vhost.c | 12 1 file changed, 12 insertions(+) diff --git a/drivers/vhost/tcm_vhost.c b/drivers/vhost/tcm_vhost.c index c127731..f0189bc 100644 --- a/drivers/vhost/tcm_vhost.c +++ b/drivers/vhost/tcm_vhost.c @@ -99,6 +99,18 @@ static int iov_num_pages(struct iovec *iov) ((unsigned long)iov-iov_base PAGE_MASK)) PAGE_SHIFT; } +static bool tcm_vhost_check_feature(struct vhost_scsi *vs, int feature) +{ + bool ret = false; + + mutex_lock(vs-dev.mutex); + if (vhost_has_feature(vs-dev, feature)) + ret = true; + mutex_unlock(vs-dev.mutex); + + return ret; +} + static int tcm_vhost_check_true(struct se_portal_group *se_tpg) { return 1; -- 1.8.1.4 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v5 2/3] tcm_vhost: Add helper to check if endpoint is setup
Signed-off-by: Asias He as...@redhat.com --- drivers/vhost/tcm_vhost.c | 18 ++ 1 file changed, 18 insertions(+) diff --git a/drivers/vhost/tcm_vhost.c b/drivers/vhost/tcm_vhost.c index f0189bc..7069881 100644 --- a/drivers/vhost/tcm_vhost.c +++ b/drivers/vhost/tcm_vhost.c @@ -111,6 +111,24 @@ static bool tcm_vhost_check_feature(struct vhost_scsi *vs, int feature) return ret; } +static bool tcm_vhost_check_endpoint(struct vhost_virtqueue *vq) +{ + bool ret = false; + + /* + * We can handle the vq only after the endpoint is setup by calling the + * VHOST_SCSI_SET_ENDPOINT ioctl. + * + * TODO: Check that we are running from vhost_worker which acts + * as read-side critical section for vhost kind of RCU. + * See the comments in struct vhost_virtqueue in drivers/vhost/vhost.h + */ + if (rcu_dereference_check(vq-private_data, 1)) + ret = true; + + return ret; +} + static int tcm_vhost_check_true(struct se_portal_group *se_tpg) { return 1; -- 1.8.1.4 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v5 3/3] tcm_vhost: Add hotplug/hotunplug support
In commit 365a7150094 ([SCSI] virtio-scsi: hotplug support for virtio-scsi), hotplug support is added to virtio-scsi. This patch adds hotplug and hotunplug support to tcm_vhost. You can create or delete a LUN in targetcli to hotplug or hotunplug a LUN in guest. Changes in v5: - Switch to int from u64 to vs_events_nr - Set s-vs_events_dropped flag in tcm_vhost_allocate_evt - Do not nest dev mutex within vq mutex - Use vs_events_lock to protect vs_events_dropped and vs_events_nr - Rebase to target/master Changes in v4: - Drop tcm_vhost_check_endpoint in tcm_vhost_send_evt - Add tcm_vhost_check_endpoint in vhost_scsi_evt_handle_kick Changes in v3: - Separate the bug fix to another thread Changes in v2: - Remove code duplication in tcm_vhost_{hotplug,hotunplug} - Fix racing of vs_events_nr - Add flush fix patch to this series Signed-off-by: Asias He as...@redhat.com Reviewed-by: Stefan Hajnoczi stefa...@redhat.com --- drivers/vhost/tcm_vhost.c | 206 +- drivers/vhost/tcm_vhost.h | 10 +++ 2 files changed, 212 insertions(+), 4 deletions(-) diff --git a/drivers/vhost/tcm_vhost.c b/drivers/vhost/tcm_vhost.c index 7069881..3351ed3 100644 --- a/drivers/vhost/tcm_vhost.c +++ b/drivers/vhost/tcm_vhost.c @@ -66,11 +66,13 @@ enum { * TODO: debug and remove the workaround. */ enum { - VHOST_SCSI_FEATURES = VHOST_FEATURES (~VIRTIO_RING_F_EVENT_IDX) + VHOST_SCSI_FEATURES = (VHOST_FEATURES (~VIRTIO_RING_F_EVENT_IDX)) | + (1ULL VIRTIO_SCSI_F_HOTPLUG) }; #define VHOST_SCSI_MAX_TARGET 256 #define VHOST_SCSI_MAX_VQ 128 +#define VHOST_SCSI_MAX_EVENT 128 struct vhost_scsi { /* Protected by vhost_scsi-dev.mutex */ @@ -82,6 +84,13 @@ struct vhost_scsi { struct vhost_work vs_completion_work; /* cmd completion work item */ struct llist_head vs_completion_list; /* cmd completion queue */ + + struct vhost_work vs_event_work; /* evt injection work item */ + struct llist_head vs_event_list; /* evt injection queue */ + + struct mutex vs_events_lock; /* protect vs_events_dropped,events_nr */ + bool vs_events_dropped; /* any missed events */ + int vs_events_nr; /* num of pending events */ }; /* Local pointer to allocated TCM configfs fabric module */ @@ -129,6 +138,17 @@ static bool tcm_vhost_check_endpoint(struct vhost_virtqueue *vq) return ret; } +static bool tcm_vhost_check_events_dropped(struct vhost_scsi *vs) +{ + bool ret; + + mutex_lock(vs-vs_events_lock); + ret = vs-vs_events_dropped; + mutex_unlock(vs-vs_events_lock); + + return ret; +} + static int tcm_vhost_check_true(struct se_portal_group *se_tpg) { return 1; @@ -379,6 +399,37 @@ static int tcm_vhost_queue_tm_rsp(struct se_cmd *se_cmd) return 0; } +static void tcm_vhost_free_evt(struct vhost_scsi *vs, struct tcm_vhost_evt *evt) +{ + mutex_lock(vs-vs_events_lock); + vs-vs_events_nr--; + kfree(evt); + mutex_unlock(vs-vs_events_lock); +} + +static struct tcm_vhost_evt *tcm_vhost_allocate_evt(struct vhost_scsi *vs, + u32 event, u32 reason) +{ + struct tcm_vhost_evt *evt; + + mutex_lock(vs-vs_events_lock); + if (vs-vs_events_nr VHOST_SCSI_MAX_EVENT) { + vs-vs_events_dropped = true; + mutex_unlock(vs-vs_events_lock); + return NULL; + } + + evt = kzalloc(sizeof(*evt), GFP_KERNEL); + if (evt) { + evt-event.event = event; + evt-event.reason = reason; + vs-vs_events_nr++; + } + mutex_unlock(vs-vs_events_lock); + + return evt; +} + static void vhost_scsi_free_cmd(struct tcm_vhost_cmd *tv_cmd) { struct se_cmd *se_cmd = tv_cmd-tvc_se_cmd; @@ -397,6 +448,74 @@ static void vhost_scsi_free_cmd(struct tcm_vhost_cmd *tv_cmd) kfree(tv_cmd); } +static void tcm_vhost_do_evt_work(struct vhost_scsi *vs, + struct virtio_scsi_event *event) +{ + struct vhost_virtqueue *vq = vs-vqs[VHOST_SCSI_VQ_EVT]; + struct virtio_scsi_event __user *eventp; + unsigned out, in; + int head, ret; + + if (!tcm_vhost_check_endpoint(vq)) + return; + + mutex_lock(vs-vs_events_lock); + mutex_lock(vq-mutex); +again: + vhost_disable_notify(vs-dev, vq); + head = vhost_get_vq_desc(vs-dev, vq, vq-iov, + ARRAY_SIZE(vq-iov), out, in, + NULL, NULL); + if (head 0) { + vs-vs_events_dropped = true; + goto out; + } + if (head == vq-num) { + if (vhost_enable_notify(vs-dev, vq)) + goto again; + vs-vs_events_dropped = true; + goto out; + } + + if ((vq-iov[out].iov_len != sizeof(struct virtio_scsi_event))) { + vq_err(vq, Expecting virtio_scsi_event, got %zu
RE: [PATCH v8 7/7] KVM: VMX: Use posted interrupt to deliver virtual interrupt
Gleb Natapov wrote on 2013-04-09: On Mon, Apr 08, 2013 at 10:23:22PM +0800, Yang Zhang wrote: From: Yang Zhang yang.z.zh...@intel.com If posted interrupt is avaliable, then uses it to inject virtual interrupt to guest. Signed-off-by: Yang Zhang yang.z.zh...@intel.com --- arch/x86/kvm/lapic.c | 29 ++--- arch/x86/kvm/vmx.c |2 +- arch/x86/kvm/x86.c |1 + 3 files changed, 20 insertions(+), 12 deletions(-) diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 8948979..46a4cca 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -353,6 +353,7 @@ static inline int apic_find_highest_irr(struct kvm_lapic *apic) if (!apic-irr_pending) return -1; +kvm_x86_ops-sync_pir_to_irr(apic-vcpu); result = apic_search_irr(apic); ASSERT(result == -1 || result = 16); @@ -683,18 +684,24 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, if (dest_map) __set_bit(vcpu-vcpu_id, dest_map); -result = !apic_test_and_set_irr(vector, apic); -trace_kvm_apic_accept_irq(vcpu-vcpu_id, delivery_mode, - trig_mode, vector, !result); -if (!result) { -if (trig_mode) -apic_debug(level trig mode repeatedly for -vector %d, vector); -break; -} +if (kvm_x86_ops-deliver_posted_interrupt) { +result = 1; +kvm_x86_ops-deliver_posted_interrupt(vcpu, vector); +} else { +result = !apic_test_and_set_irr(vector, apic); + +trace_kvm_apic_accept_irq(vcpu-vcpu_id, delivery_mode, +trig_mode, vector, !result); Missed that in previous review. Do no drop tracing for PI case. Hmm. I remember I have added the tracing for PI case. Don't know why it is not existing in this patch. Anyway, I will add it again. +if (!result) { +if (trig_mode) +apic_debug(level trig mode repeatedly +for vector %d, vector); +break; +} -kvm_make_request(KVM_REQ_EVENT, vcpu); -kvm_vcpu_kick(vcpu); +kvm_make_request(KVM_REQ_EVENT, vcpu); +kvm_vcpu_kick(vcpu); +} break; case APIC_DM_REMRD: diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 3de2d7f..cd1c6ff 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -84,7 +84,7 @@ module_param(vmm_exclusive, bool, S_IRUGO); static bool __read_mostly fasteoi = 1; module_param(fasteoi, bool, S_IRUGO); -static bool __read_mostly enable_apicv; +static bool __read_mostly enable_apicv = 1; module_param(enable_apicv, bool, S_IRUGO); /* diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 72be079..486f627 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2685,6 +2685,7 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s) { + kvm_x86_ops-sync_pir_to_irr(vcpu); memcpy(s-regs, vcpu-arch.apic-regs, sizeof *s); return 0; -- 1.7.1 -- Gleb. Best regards, Yang -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH] tcm_vhost: Fix tv_cmd leak in vhost_scsi_handle_vq
If we fail to submit the allocated tv_vmd to tcm_vhost_submission_work, we will leak the tv_vmd. Free tv_vmd on fail path. Signed-off-by: Asias He as...@redhat.com --- drivers/vhost/tcm_vhost.c | 11 --- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/vhost/tcm_vhost.c b/drivers/vhost/tcm_vhost.c index 3351ed3..1f9116c 100644 --- a/drivers/vhost/tcm_vhost.c +++ b/drivers/vhost/tcm_vhost.c @@ -860,7 +860,7 @@ static void vhost_scsi_handle_vq(struct vhost_scsi *vs, vq_err(vq, Expecting virtio_scsi_cmd_resp, got %zu bytes, out: %d, in: %d\n, vq-iov[out].iov_len, out, in); - break; + goto err; } tv_cmd-tvc_resp = vq-iov[out].iov_base; @@ -882,7 +882,7 @@ static void vhost_scsi_handle_vq(struct vhost_scsi *vs, exceeds SCSI_MAX_VARLEN_CDB_SIZE: %d\n, scsi_command_size(tv_cmd-tvc_cdb), TCM_VHOST_MAX_CDB_SIZE); - break; /* TODO */ + goto err; } tv_cmd-tvc_lun = ((v_req.lun[2] 8) | v_req.lun[3]) 0x3FFF; @@ -895,7 +895,7 @@ static void vhost_scsi_handle_vq(struct vhost_scsi *vs, data_direction == DMA_TO_DEVICE); if (unlikely(ret)) { vq_err(vq, Failed to map iov to sgl\n); - break; /* TODO */ + goto err; } } @@ -916,6 +916,11 @@ static void vhost_scsi_handle_vq(struct vhost_scsi *vs, } mutex_unlock(vq-mutex); + return; + +err: + vhost_scsi_free_cmd(tv_cmd); + mutex_unlock(vq-mutex); } static void vhost_scsi_ctl_handle_kick(struct vhost_work *work) -- 1.8.1.4 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] tcm_vhost: Fix tv_cmd leak in vhost_scsi_handle_vq
On Tue, Apr 09, 2013 at 05:16:33PM +0800, Asias He wrote: If we fail to submit the allocated tv_vmd to tcm_vhost_submission_work, we will leak the tv_vmd. Free tv_vmd on fail path. Signed-off-by: Asias He as...@redhat.com Another one for 3.9 I think. Acked-by: Michael S. Tsirkin m...@redhat.com --- drivers/vhost/tcm_vhost.c | 11 --- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/vhost/tcm_vhost.c b/drivers/vhost/tcm_vhost.c index 3351ed3..1f9116c 100644 --- a/drivers/vhost/tcm_vhost.c +++ b/drivers/vhost/tcm_vhost.c @@ -860,7 +860,7 @@ static void vhost_scsi_handle_vq(struct vhost_scsi *vs, vq_err(vq, Expecting virtio_scsi_cmd_resp, got %zu bytes, out: %d, in: %d\n, vq-iov[out].iov_len, out, in); - break; + goto err; } tv_cmd-tvc_resp = vq-iov[out].iov_base; @@ -882,7 +882,7 @@ static void vhost_scsi_handle_vq(struct vhost_scsi *vs, exceeds SCSI_MAX_VARLEN_CDB_SIZE: %d\n, scsi_command_size(tv_cmd-tvc_cdb), TCM_VHOST_MAX_CDB_SIZE); - break; /* TODO */ + goto err; } tv_cmd-tvc_lun = ((v_req.lun[2] 8) | v_req.lun[3]) 0x3FFF; @@ -895,7 +895,7 @@ static void vhost_scsi_handle_vq(struct vhost_scsi *vs, data_direction == DMA_TO_DEVICE); if (unlikely(ret)) { vq_err(vq, Failed to map iov to sgl\n); - break; /* TODO */ + goto err; } } @@ -916,6 +916,11 @@ static void vhost_scsi_handle_vq(struct vhost_scsi *vs, } mutex_unlock(vq-mutex); + return; + +err: + vhost_scsi_free_cmd(tv_cmd); + mutex_unlock(vq-mutex); } static void vhost_scsi_ctl_handle_kick(struct vhost_work *work) -- 1.8.1.4 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH] tcm_vhost: Wait for pending requests in vhost_scsi_flush()
This patch makes vhost_scsi_flush() wait for all the pending requests issued before the flush operation to be finished. Changes in v3: - Rebase - Drop 'tcm_vhost: Wait for pending requests in vhost_scsi_clear_endpoint()' in this series, we already did that in 'tcm_vhost: Use vq-private_data to indicate if the endpoint is setup' Changes in v2: - Increase/Decrease inflight requests in vhost_scsi_{allocate,free}_cmd and tcm_vhost_{allocate,free}_evt Signed-off-by: Asias He as...@redhat.com --- drivers/vhost/tcm_vhost.c | 88 --- drivers/vhost/tcm_vhost.h | 4 +++ 2 files changed, 87 insertions(+), 5 deletions(-) diff --git a/drivers/vhost/tcm_vhost.c b/drivers/vhost/tcm_vhost.c index 1f9116c..719ce13 100644 --- a/drivers/vhost/tcm_vhost.c +++ b/drivers/vhost/tcm_vhost.c @@ -91,6 +91,15 @@ struct vhost_scsi { struct mutex vs_events_lock; /* protect vs_events_dropped,events_nr */ bool vs_events_dropped; /* any missed events */ int vs_events_nr; /* num of pending events */ + + /* +* vs_inflight[0]/[1] are used to track requests issued +* before/during the flush operation +*/ + u64 vs_inflight[2]; + wait_queue_head_t vs_flush_wait; /* wait queue for flush operation */ + spinlock_t vs_flush_lock; /* lock to protect vs_during_flush */ + int vs_during_flush; /* flag to indicate if we are in flush operation */ }; /* Local pointer to allocated TCM configfs fabric module */ @@ -108,6 +117,46 @@ static int iov_num_pages(struct iovec *iov) ((unsigned long)iov-iov_base PAGE_MASK)) PAGE_SHIFT; } +static int tcm_vhost_inc_inflight(struct vhost_scsi *vs) +{ + int during_flush; + + spin_lock(vs-vs_flush_lock); + during_flush = vs-vs_during_flush; + vs-vs_inflight[during_flush]++; + spin_unlock(vs-vs_flush_lock); + + return during_flush; +} + +static void tcm_vhost_dec_inflight(struct vhost_scsi *vs, int during_flush) +{ + u64 inflight; + + spin_lock(vs-vs_flush_lock); + inflight = vs-vs_inflight[during_flush]--; + /* +* Wakeup the waiter when all the requests issued before the flush +* operation are finished and we are during the flush operation. +*/ + if (!inflight !during_flush vs-vs_during_flush) + wake_up(vs-vs_flush_wait); + spin_unlock(vs-vs_flush_lock); +} + +static bool tcm_vhost_done_inflight(struct vhost_scsi *vs) +{ + bool ret = false; + + /* The requests issued before the flush operation are finished ? */ + spin_lock(vs-vs_flush_lock); + if (!vs-vs_inflight[0]) + ret = true; + spin_unlock(vs-vs_flush_lock); + + return ret; +} + static bool tcm_vhost_check_feature(struct vhost_scsi *vs, int feature) { bool ret = false; @@ -402,6 +451,7 @@ static int tcm_vhost_queue_tm_rsp(struct se_cmd *se_cmd) static void tcm_vhost_free_evt(struct vhost_scsi *vs, struct tcm_vhost_evt *evt) { mutex_lock(vs-vs_events_lock); + tcm_vhost_dec_inflight(vs, evt-during_flush); vs-vs_events_nr--; kfree(evt); mutex_unlock(vs-vs_events_lock); @@ -423,6 +473,7 @@ static struct tcm_vhost_evt *tcm_vhost_allocate_evt(struct vhost_scsi *vs, if (evt) { evt-event.event = event; evt-event.reason = reason; + evt-during_flush = tcm_vhost_inc_inflight(vs); vs-vs_events_nr++; } mutex_unlock(vs-vs_events_lock); @@ -433,6 +484,7 @@ static struct tcm_vhost_evt *tcm_vhost_allocate_evt(struct vhost_scsi *vs, static void vhost_scsi_free_cmd(struct tcm_vhost_cmd *tv_cmd) { struct se_cmd *se_cmd = tv_cmd-tvc_se_cmd; + struct vhost_scsi *vs = tv_cmd-tvc_vhost; /* TODO locking against target/backend threads? */ transport_generic_free_cmd(se_cmd, 1); @@ -445,13 +497,16 @@ static void vhost_scsi_free_cmd(struct tcm_vhost_cmd *tv_cmd) kfree(tv_cmd-tvc_sgl); } + tcm_vhost_dec_inflight(vs, tv_cmd-during_flush); + kfree(tv_cmd); } static void tcm_vhost_do_evt_work(struct vhost_scsi *vs, - struct virtio_scsi_event *event) + struct tcm_vhost_evt *evt) { struct vhost_virtqueue *vq = vs-vqs[VHOST_SCSI_VQ_EVT]; + struct virtio_scsi_event *event = evt-event; struct virtio_scsi_event __user *eventp; unsigned out, in; int head, ret; @@ -511,7 +566,7 @@ static void tcm_vhost_evt_work(struct vhost_work *work) while (llnode) { evt = llist_entry(llnode, struct tcm_vhost_evt, list); llnode = llist_next(llnode); - tcm_vhost_do_evt_work(vs, evt-event); + tcm_vhost_do_evt_work(vs, evt); tcm_vhost_free_evt(vs, evt); } } @@ -529,8 +584,8 @@ static void vhost_scsi_complete_cmd_work(struct vhost_work *work)
Re: [PATCH 05/11] KVM: nVMX: Refactor handle_vmwrite
On Sun, Mar 10, 2013 at 06:05:55PM +0200, Abel Gordon wrote: Refactor existent code so we re-use vmcs12_write_any to copy fields from the shadow vmcs specified by the link pointer (used by the processor, implementation-specific) to the VMCS12 software format used by L0 to hold the fields in L1 memory address space. Signed-off-by: Abel Gordon ab...@il.ibm.com --- arch/x86/kvm/vmx.c | 52 +++ 1 file changed, 28 insertions(+), 24 deletions(-) --- .before/arch/x86/kvm/vmx.c2013-03-10 18:00:54.0 +0200 +++ .after/arch/x86/kvm/vmx.c 2013-03-10 18:00:54.0 +0200 @@ -5741,6 +5741,33 @@ static inline bool vmcs12_read_any(struc } } + +static inline bool vmcs12_write_any(struct kvm_vcpu *vcpu, + unsigned long field, u64 field_value){ + short offset = vmcs_field_to_offset(field); + char *p = ((char *) get_vmcs12(vcpu)) + offset; + if (offset 0) + return 0; The function returns bool, so use true/false please. + + switch (vmcs_field_type(field)) { + case VMCS_FIELD_TYPE_U16: + *(u16 *)p = field_value; + return 1; + case VMCS_FIELD_TYPE_U32: + *(u32 *)p = field_value; + return 1; + case VMCS_FIELD_TYPE_U64: + *(u64 *)p = field_value; + return 1; + case VMCS_FIELD_TYPE_NATURAL_WIDTH: + *(natural_width *)p = field_value; + return 1; + default: + return 0; /* can never happen. */ + } + +} + /* * VMX instructions which assume a current vmcs12 (i.e., that VMPTRLD was * used before) all generate the same failure when it is missing. @@ -5806,8 +5833,6 @@ static int handle_vmwrite(struct kvm_vcp gva_t gva; unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION); u32 vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO); - char *p; - short offset; /* The value to write might be 32 or 64 bits, depending on L1's long * mode, and eventually we need to write that into a field of several * possible lengths. The code below first zero-extends the value to 64 @@ -5846,28 +5871,7 @@ static int handle_vmwrite(struct kvm_vcp return 1; } - offset = vmcs_field_to_offset(field); - if (offset 0) { - nested_vmx_failValid(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT); - skip_emulated_instruction(vcpu); - return 1; - } - p = ((char *) get_vmcs12(vcpu)) + offset; - - switch (vmcs_field_type(field)) { - case VMCS_FIELD_TYPE_U16: - *(u16 *)p = field_value; - break; - case VMCS_FIELD_TYPE_U32: - *(u32 *)p = field_value; - break; - case VMCS_FIELD_TYPE_U64: - *(u64 *)p = field_value; - break; - case VMCS_FIELD_TYPE_NATURAL_WIDTH: - *(natural_width *)p = field_value; - break; - default: + if (!vmcs12_write_any(vcpu, field, field_value)) { nested_vmx_failValid(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT); skip_emulated_instruction(vcpu); return 1; -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html -- Gleb. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 09/11] KVM: nVMX: Copy VMCS12 to processor-specific shadow vmcs
On Sun, Mar 10, 2013 at 06:07:56PM +0200, Abel Gordon wrote: Introduce a function used to copy fields from the software controlled VMCS12 to the processor-specific shadow vmcs Signed-off-by: Abel Gordon ab...@il.ibm.com --- arch/x86/kvm/vmx.c | 45 +++ 1 file changed, 45 insertions(+) --- .before/arch/x86/kvm/vmx.c2013-03-10 18:00:55.0 +0200 +++ .after/arch/x86/kvm/vmx.c 2013-03-10 18:00:55.0 +0200 @@ -672,6 +672,7 @@ static void vmx_get_segment(struct kvm_v struct kvm_segment *var, int seg); static bool guest_state_valid(struct kvm_vcpu *vcpu); static u32 vmx_segment_access_rights(struct kvm_segment *var); +static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx); static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx); static DEFINE_PER_CPU(struct vmcs *, vmxarea); @@ -5813,6 +5814,50 @@ static void copy_shadow_to_vmcs12(struct vmcs_load(vmx-loaded_vmcs-vmcs); } +static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx) +{ + int num_lists = 2; int num_lists = ARRAY_SIZE(fields) + unsigned long *fields[] = { + (unsigned long *)shadow_read_write_fields, + (unsigned long *)shadow_read_only_fields + }; + int max_fields[] = { + max_shadow_read_write_fields, + max_shadow_read_only_fields + }; + int i, q; + unsigned long field; + u64 field_value = 0; + struct vmcs *shadow_vmcs = vmx-nested.current_shadow_vmcs; + + vmcs_load(shadow_vmcs); + + for (q = 0; q num_lists; q++) { + for (i = 0; i max_fields[q]; i++) { + field = fields[q][i]; + vmcs12_read_any(vmx-vcpu, field, field_value); + + switch (vmcs_field_type(field)) { + case VMCS_FIELD_TYPE_U16: + vmcs_write16(field, (u16)field_value); + break; + case VMCS_FIELD_TYPE_U32: + vmcs_write32(field, (u32)field_value); + break; + case VMCS_FIELD_TYPE_U64: + vmcs_write64(field, (u64)field_value); + break; + case VMCS_FIELD_TYPE_NATURAL_WIDTH: + vmcs_writel(field, (long)field_value); + break; + } + } + } + + vmcs_clear(shadow_vmcs); + vmcs_load(vmx-loaded_vmcs-vmcs); +} + /* * VMX instructions which assume a current vmcs12 (i.e., that VMPTRLD was * used before) all generate the same failure when it is missing. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html -- Gleb. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 10/11] KVM: nVMX: Synchronize VMCS12 content with the shadow vmcs
On Mon, Mar 11, 2013 at 09:54:47AM +0200, Abel Gordon wrote: Nadav Har'El n...@math.technion.ac.il wrote on 11/03/2013 12:43:35 AM: On Sun, Mar 10, 2013, Abel Gordon wrote about [PATCH 10/11] KVM: nVMX: Synchronize VMCS12 content with the shadow vmcs: nested_vmx_vmexit(vcpu); + if (enable_shadow_vmcs) + copy_vmcs12_to_shadow(to_vmx(vcpu)); I was curious why your patch adds this call to copy_vmcs12_to_shadow after every nested_vmx_vmexit (3 times), instead of making this call inside nested_vmx_vmexit(), say right after prepare_vmcs12(). Until I saw: Because nested code sometimes modifies vmcs fileds after nested_vmx_vmexit (see below). I was afraid nested logic may be changed in the future and some field may become out-of-sync. If we do have to call copy_vmcs12_to_shadow explicitly, then, it will be more difficult to miss some field. I think the patch already miss some fields. What if nested_vmx_run() fails and calls nested_vmx_entry_failure(). nested_vmx_entry_failure() sets vmcs12-vm_exit_reason and vmcs12-exit_qualification, but where do we copy them back to shadow before going back to L1? May be we need to introduce vmcs12 accessors to track what is changes and if something need to be copied to shadow before going back to L1. -- Gleb. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: KVM call agenda for 2013-04-09
Juan Quintela quint...@redhat.com wrote: Hi Today there is a call, there was agenda but I didn't noticed that it was sent as private mail to me: From Abel Gordon: I would like to present a technical report we just published that describes the design and evaluation of the work we did to improve virtual net/block I/O scalability and performance based on vhost and hosting multiple KVM guests. We started some discussion in the qemu-mailing list about this work and dataplane: http://lists.gnu.org/archive/html/qemu-devel/2013-02/msg02702.html http://lists.gnu.org/archive/html/qemu-devel/2013-03/msg00028.html The report can be downloaded from the following link: http://goo.gl/RKppy (full link: http://domino.research.ibm.com/library/cyberdig.nsf/1e4115aea78b6e7c85256b360066f0d4/479e3578ed05bfac85257b4200427735!OpenDocumentHighlight=0,h-0319 ) -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [Qemu-devel] KVM call agenda for 2013-04-09
Meeting notes on Abel's presentation: Aim: improve vhost scalability Shared vhost thread == Problem: Linux scheduler does not see state of virtqueues, cannot make good scheduling decisions Solution: Shared thread serves multiple VMs and therefore influences I/O scheduling instead of kernel thread per vhost device Exitless communication = * Polling on host to notice guest vring updates without guest pio instruction * Use CPU affinity to bind vcpus to separate cores and let polling run on dedicated cores * Existless Interrupt (ELI) or future hardware APIC virtualization feature to inject virtual interrupts without vmexit and EOI See paper for performance results (impressive numbers): http://domino.research.ibm.com/library/cyberdig.nsf/papers/479E3578ED05BFAC85257B4200427735/$File/h-0319.pdf Abel will publish rebased code on GitHub but does not have time to upstream them. The next step: QEMU/KVM community can digest the paper + patches and decide on ideas to upstream. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] tcm_vhost: Fix tv_cmd leak in vhost_scsi_handle_vq
On Tue, 2013-04-09 at 17:16 +0800, Asias He wrote: If we fail to submit the allocated tv_vmd to tcm_vhost_submission_work, we will leak the tv_vmd. Free tv_vmd on fail path. Signed-off-by: Asias He as...@redhat.com --- drivers/vhost/tcm_vhost.c | 11 --- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/vhost/tcm_vhost.c b/drivers/vhost/tcm_vhost.c index 3351ed3..1f9116c 100644 --- a/drivers/vhost/tcm_vhost.c +++ b/drivers/vhost/tcm_vhost.c @@ -860,7 +860,7 @@ static void vhost_scsi_handle_vq(struct vhost_scsi *vs, vq_err(vq, Expecting virtio_scsi_cmd_resp, got %zu bytes, out: %d, in: %d\n, vq-iov[out].iov_len, out, in); - break; + goto err; } tv_cmd-tvc_resp = vq-iov[out].iov_base; @@ -882,7 +882,7 @@ static void vhost_scsi_handle_vq(struct vhost_scsi *vs, exceeds SCSI_MAX_VARLEN_CDB_SIZE: %d\n, scsi_command_size(tv_cmd-tvc_cdb), TCM_VHOST_MAX_CDB_SIZE); - break; /* TODO */ + goto err; } tv_cmd-tvc_lun = ((v_req.lun[2] 8) | v_req.lun[3]) 0x3FFF; @@ -895,7 +895,7 @@ static void vhost_scsi_handle_vq(struct vhost_scsi *vs, data_direction == DMA_TO_DEVICE); if (unlikely(ret)) { vq_err(vq, Failed to map iov to sgl\n); - break; /* TODO */ + goto err; } } Mmmm, I think these cases also require a VIRTIO_SCSI_S_BAD_TARGET + __copy_to_user + vhost_add_used_and_signal similar to how !tv_tpg is handled.. Otherwise virtio-scsi will end up in scsi timeout - abort, no..? Ditto for the vhost_scsi_allocate_cmd failure case.. vhost-net uses vhost_discard_vq_desc for some failure cases, is that needed here for the failure cases before __copy_from_user is called..? @@ -916,6 +916,11 @@ static void vhost_scsi_handle_vq(struct vhost_scsi *vs, } mutex_unlock(vq-mutex); + return; + +err: + vhost_scsi_free_cmd(tv_cmd); + mutex_unlock(vq-mutex); } static void vhost_scsi_ctl_handle_kick(struct vhost_work *work) -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: Virtualbox svga card in KVM
On Apr 6, 2013, at 2:52 AM, Sriram Murthy wrote: For starters, virtual box has better SVGA WDDM drivers that allows for a much richer display when the VM display is local. Does it support S3 and S4 with Windows 8? Yan. I am yet to completely understand both the KVM and the virtualbox SVGA card (actually, the virtualbox SVGA card is based off of the KVM VGA card), so I may not be the authority here. -Sriram - Original Message - From: Stefan Hajnoczi stefa...@gmail.com To: Sriram Murthy srira...@yahoo.com Cc: kvm@vger.kernel.org; qemu list qemu-de...@nongnu.org Sent: Friday, April 5, 2013 12:06 AM Subject: Re: Virtualbox svga card in KVM On Thu, Mar 21, 2013 at 10:53:21AM -0400, Alon Levy wrote: I am planning on bringing in the virtualbox svga card into kvm as a new svga card type (vbox probably?) so that we can load the VirtualBox SVGA card drivers in the guest. I'm curious if the vbox SVGA card has features that existing QEMU graphics cards do not provide? Stefan -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH v8 0/7] Use eoi to track RTC interrupt delivery status
Il 09/04/2013 09:08, Gleb Natapov ha scritto: Current interrupt coalescing logci which only used by RTC has conflict with Posted Interrupt. This patch introduces a new mechinism to use eoi to track interrupt: When delivering an interrupt to vcpu, the pending_eoi set to number of vcpu that received the interrupt. And decrease it when each vcpu writing eoi. No subsequent RTC interrupt can deliver to vcpu until all vcpus write eoi. Only minor comment from me to this one. Paolo, Marcelo any comments? No, thanks. Paolo -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH v8 6/7] KVM: VMX: Add the algorithm of deliver posted interrupt
Il 08/04/2013 16:23, Yang Zhang ha scritto: + * interrupt from PIR in next vmentry. + */ +static void vmx_deliver_posted_interrupt(struct kvm_vcpu *vcpu, int vector) +{ + struct vcpu_vmx *vmx = to_vmx(vcpu); + int r; + + if (pi_test_and_set_pir(vector, vmx-pi_desc)) + return; + + r = pi_test_and_set_on(vmx-pi_desc); + kvm_make_request(KVM_REQ_EVENT, vcpu); + if (!r (vcpu-mode == IN_GUEST_MODE)) + apic-send_IPI_mask(get_cpu_mask(vcpu-cpu), + POSTED_INTR_VECTOR); + else + kvm_vcpu_kick(vcpu); + + return; +} No need for this return. Paolo -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [Qemu-devel] Virtualbox svga card in KVM
Hi, Am 08.04.2013 18:05, schrieb Sriram Murthy: The Virtualbox SVGA card was derived out of the KVM VGA card, so there are quite a few similarities (I am deliberately being vague here as I am still in the process of discovering the features of both these cards completely). Having said that, the APIs and the data structures themselves have been modified to add new features (like displaying a custom bmp as the VGA bootup logo) and it has a custom vga bios as well. Also, it is better that it be its own separate device model, so that maintenance of the vbox code becomes easier later. Further, I am thinking on the lines of retaining the VIrtualbox SVGA card code as is, and write a small KVM abstraction layer, so that it will be easy to port the bug fixes into the vbox SVGA card later on. Any comments/suggestions welcome here. Personally, I think that the connection between VirtualBox and QEMU is very unidirectional if there is any... So code-wise our focus should rather be to avoid code copies/divergence within our tree and to share code with existing in-tree devices, especially if you are not paid to continuously take care of this device once accepted into QEMU - that's how I interpret PMM's question below. There is nothing generally wrong with using KVM for guest driver development or to make existing stripped-down guest images work at all by adding such a special device. However, proposing to adopt a random vendor's paravirtual graphics card just because it has a few more resolutions and drivers on a particular platform does not strike me as a big advantage over SPICE, VMware VGA or past virtio-vga/-fb standardization attempts. Regards, Andreas -Sriram - Original Message - From: Peter Maydell peter.mayd...@linaro.org To: Sriram Murthy srira...@yahoo.com Cc: Stefan Hajnoczi stefa...@gmail.com; qemu list qemu-de...@nongnu.org; kvm@vger.kernel.org kvm@vger.kernel.org Sent: Monday, April 8, 2013 8:11 AM Subject: Re: [Qemu-devel] Virtualbox svga card in KVM On 6 April 2013 00:52, Sriram Murthy srira...@yahoo.com wrote: (actually, the virtualbox SVGA card is based off of the KVM VGA card) Is it possible to implement it as an extension to the VGA card device, or has it diverged incompatibly such that it has to be its own separate device model? thanks -- PMM -- SUSE LINUX Products GmbH, Maxfeldstr. 5, 90409 Nürnberg, Germany GF: Jeff Hawn, Jennifer Guild, Felix Imendörffer; HRB 16746 AG Nürnberg -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
kvmtool : [PATCH] PowerPc : Fix compilation for ppc64
'lkvm' compilation on ppc64 fails with the following error : ...[snip].. LINK guest/init LINK lkvm /usr/bin/ld: powerpc:common architecture of input file `guest/guest_init.o' is incompatible with powerpc:common64 output collect2: ld returned 1 exit status make: *** [lkvm] Error 1 This patch corrects the error above, and enables 'lkvm' to compile on ppc64 architecture. Signed-off-by: Prerna Saxena pre...@linux.vnet.ibm.com --- tools/kvm/Makefile |4 1 file changed, 4 insertions(+) diff --git a/tools/kvm/Makefile b/tools/kvm/Makefile index 0c59faa..269e29e 100644 --- a/tools/kvm/Makefile +++ b/tools/kvm/Makefile @@ -335,7 +335,11 @@ $(PROGRAM_ALIAS): $(PROGRAM) $(GUEST_INIT): guest/init.c $(E) LINK $@ $(Q) $(CC) -static guest/init.c -o $@ +ifeq ($(ARCH), powerpc) + $(Q) $(LD) -r -b binary --oformat elf64-powerpc -o guest/guest_init.o $(GUEST_INIT) +else $(Q) $(LD) -r -b binary -o guest/guest_init.o $(GUEST_INIT) +endif $(DEPS): -- 1.7.10.4 Regards, -- Prerna Saxena Linux Technology Centre, IBM Systems and Technology Lab, Bangalore, India -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
RE: RFC: vfio API changes needed for powerpc (v3)
So now the sequence would be something like: 1)VFIO_GROUP_SET_CONTAINER // add groups to the container 2)VFIO_SET_IOMMU(VFIO_FSL_PAMU)// set iommu model 3)count = VFIO_IOMMU_GET_MSI_BANK_COUNT// returns max # of MSI banks 4)VFIO_IOMMU_SET_ATTR(ATTR_GEOMETRY) // set overall aperture 5)VFIO_IOMMU_SET_ATTR(ATTR_WINDOWS) // set # of windows, including MSI banks 6) For (int I = 0; I count; i++) VFIO_IOMMU_PAMU_MAP_MSI_BANK() // map the MSI banks, do not enable aperture here. 7) Memory Listener will call- VFIO_IOMMU_MAP_DMA// map the guest's memory --- kernel enables aperture here on first VFIO_IOMMU_MAP_DMA 8)VFIO_DEVICE_SET_IRQS --- VFIO in kernel makes pci_enable_msix()/pci_enable_msi_block() calls, this sets actual MSI addr/data in physical device. --- As the address set by above APIs is not what we want so - is using MSIX, VFIO will update address in the MSI-X table - if using MSI, update MSI address in PCI configuration space. Thanks -Bharat -Original Message- From: Yoder Stuart-B08248 Sent: Friday, April 05, 2013 3:40 AM To: Alex Williamson Cc: Wood Scott-B07421; ag...@suse.de; Bhushan Bharat-R65777; Sethi Varun-B16395; kvm@vger.kernel.org; qemu-de...@nongnu.org; io...@lists.linux-foundation.org Subject: RFC: vfio API changes needed for powerpc (v3) -v3 updates -made vfio_pamu_attr a union, added flags -s/VFIO_PAMU_/VFIO_IOMMU_PAMU_/ for the ioctls to make it more clear which fd is being operated on -added flags to vfio_pamu_msi_bank_map/umap -VFIO_PAMU_GET_MSI_BANK_COUNT now just returns a __u32 not a struct -fixed some typos The Freescale PAMU is an aperture-based IOMMU with the following characteristics. Each device has an entry in a table in memory describing the iova-phys mapping. The mapping has: -an overall aperture that is power of 2 sized, and has a start iova that is naturally aligned -has 1 or more windows within the aperture -number of windows must be power of 2, max is 256 -size of each window is determined by aperture size / # of windows -iova of each window is determined by aperture start iova / # of windows -the mapped region in each window can be different than the window size...mapping must power of 2 -physical address of the mapping must be naturally aligned with the mapping size These ioctls operate on the VFIO file descriptor (/dev/vfio/vfio). /* * VFIO_IOMMU_PAMU_GET_ATTR * * Gets the iommu attributes for the current vfio container. This * ioctl is applicable to an iommu type of VFIO_PAMU only. * Caller sets argsz and attribute. The ioctl fills in * the provided struct vfio_pamu_attr based on the attribute * value that was set. * Return: 0 on success, -errno on failure */ struct vfio_pamu_attr { __u32 argsz; __u32 flags;/* no flags currently */ __u32 attribute; union { /* VFIO_ATTR_GEOMETRY */ struct { __u64 aperture_start; /* first addr that can be mapped */ __u64 aperture_end; /* last addr that can be mapped */ } attr; /* VFIO_ATTR_WINDOWS */ __u32 windows; /* number of windows in the aperture */ /* initially this will be the max number * of windows that can be set */ /* VFIO_ATTR_PAMU_STASH */ struct { __u32 cpu; /* CPU number for stashing */ __u32 cache; /* cache ID for stashing */ } stash; } }; #define VFIO_IOMMU_PAMU_GET_ATTR _IO(VFIO_TYPE, VFIO_BASE + x, struct vfio_pamu_attr) /* * VFIO_IOMMU_PAMU_SET_ATTR * * Sets the iommu attributes for the current vfio container. This * ioctl is applicable to an iommu type of VFIO_PAMU only. * Caller sets struct vfio_pamu attr, including argsz and attribute and * setting any fields that are valid for the attribute. * Return: 0 on success, -errno on failure */ #define VFIO_IOMMU_PAMU_SET_ATTR _IO(VFIO_TYPE, VFIO_BASE + x, struct vfio_pamu_attr) /* * VFIO_IOMMU_PAMU_GET_MSI_BANK_COUNT * * Returns the number of MSI banks for this platform. This tells user space * how many aperture windows should be reserved for MSI banks when setting * the PAMU geometry and window count. * Return: __u32 bank count on success, -errno on failure */ #define VFIO_IOMMU_PAMU_GET_MSI_BANK_COUNT _IO(VFIO_TYPE, VFIO_BASE + x, __u32) /* * VFIO_IOMMU_PAMU_MAP_MSI_BANK *
PCI-passthrough on AMD - OpenVox A400P
Hi all! I've a cluster (active/passive) with two KVM VM with Asterisk and Pacemaker+Corosync. I wonder if anyone tried to use a OpenVox A400P card from a virtual machine. My idea is to have both nodes accessing this card. I don't know if it is possible, I wish that in this scenario when a node is accessing the card, the other don't use it. Of course this is a scenario for testing, but I wonder if I can do this kind of configuration on the cluster over virtual machines. Hardware: Motherboard: ASUS M2N32-SLI DELUXE ACPI BIOS Revision 0706 Processor: AMD Athlon(tm) 64 X2 Dual Core Processor 3800+ The hardware seems to support IOMMU, but it is not enabled: # dmesg | grep -i iommu [0.00] Please enable the IOMMU option in the BIOS setup [0.654618] PCI-DMA: using GART IOMMU. [0.654623] PCI-DMA: Reserving 64MB of IOMMU area in the AGP aperture Software versions: # uname -a Linux ss01 2.6.39-bpo.2-amd64 #1 SMP Thu Aug 4 11:42:06 UTC 2011 x86_64 GNU/Linux # kvm --version QEMU emulator version 1.1.2 (qemu-kvm-1.1.2+dfsg-5~bpo60+1, Debian), Copyright (c) 2003-2008 Fabrice Bellard Thanks in advance for your reply. Regards, Daniel -- Daniel Bareiro - System Administrator Fingerprint: BFB3 08D6 B4D1 31B2 72B9 29CE 6696 BF1B 14E6 1D37 Powered by Debian GNU/Linux Squeeze - Linux user #188.598 signature.asc Description: Digital signature
Re: PCI-passthrough on AMD - OpenVox A400P
On Tue, 2013-04-09 at 14:58 -0300, Daniel Bareiro wrote: Hi all! I've a cluster (active/passive) with two KVM VM with Asterisk and Pacemaker+Corosync. I wonder if anyone tried to use a OpenVox A400P card from a virtual machine. My idea is to have both nodes accessing this card. I don't know if it is possible, I wish that in this scenario when a node is accessing the card, the other don't use it. Of course this is a scenario for testing, but I wonder if I can do this kind of configuration on the cluster over virtual machines. Hardware: Motherboard: ASUS M2N32-SLI DELUXE ACPI BIOS Revision 0706 Processor: AMD Athlon(tm) 64 X2 Dual Core Processor 3800+ The hardware seems to support IOMMU, but it is not enabled: # dmesg | grep -i iommu [0.00] Please enable the IOMMU option in the BIOS setup [0.654618] PCI-DMA: using GART IOMMU. [0.654623] PCI-DMA: Reserving 64MB of IOMMU area in the AGP aperture Software versions: # uname -a Linux ss01 2.6.39-bpo.2-amd64 #1 SMP Thu Aug 4 11:42:06 UTC 2011 x86_64 GNU/Linux # kvm --version QEMU emulator version 1.1.2 (qemu-kvm-1.1.2+dfsg-5~bpo60+1, Debian), Copyright (c) 2003-2008 Fabrice Bellard Thanks in advance for your reply. AFAIK, Athlon64 does not support AMD-Vi, which is the IOMMU support you'd need. PCI passthrough also only works for VMs running on the same system where the card is installed, and doesn't offer multiplexing of a single device. Thanks, Alex -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[GIT PULL] vfio fix for 3.9-rc7
Hi Linus, Here's one small fix for vfio that I'd like to get in for 3.9; tightening the range checking around a vfio ioctl. Thanks! Alex The following changes since commit 25e9789ddd9d14a8971f4a421d04f282719ab733: vfio: include linux/slab.h for kmalloc (2013-03-15 12:58:20 -0600) are available in the git repository at: git://github.com/awilliam/linux-vfio.git tags/vfio-v3.9-rc7 for you to fetch changes up to 904c680c7bf016a8619a045850937427f8d7368c: vfio-pci: Fix possible integer overflow (2013-03-26 11:33:16 -0600) vfio overflow fix for v3.9-rc7 Alex Williamson (1): vfio-pci: Fix possible integer overflow drivers/vfio/pci/vfio_pci.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [GIT PULL] vfio fix for 3.9-rc7
On Tue, Apr 9, 2013 at 9:05 PM, Alex Williamson alex.william...@redhat.com wrote: Hi Linus, Here's one small fix for vfio that I'd like to get in for 3.9; tightening the range checking around a vfio ioctl. Thanks! Alex The following changes since commit 25e9789ddd9d14a8971f4a421d04f282719ab733: vfio: include linux/slab.h for kmalloc (2013-03-15 12:58:20 -0600) are available in the git repository at: git://github.com/awilliam/linux-vfio.git tags/vfio-v3.9-rc7 for you to fetch changes up to 904c680c7bf016a8619a045850937427f8d7368c: vfio-pci: Fix possible integer overflow (2013-03-26 11:33:16 -0600) Is this commit queued for -stable? I don't see a stable tag in the commit message. Thanks, //richard -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [GIT PULL] vfio fix for 3.9-rc7
On Tue, 2013-04-09 at 21:40 +0200, richard -rw- weinberger wrote: On Tue, Apr 9, 2013 at 9:05 PM, Alex Williamson alex.william...@redhat.com wrote: Hi Linus, Here's one small fix for vfio that I'd like to get in for 3.9; tightening the range checking around a vfio ioctl. Thanks! Alex The following changes since commit 25e9789ddd9d14a8971f4a421d04f282719ab733: vfio: include linux/slab.h for kmalloc (2013-03-15 12:58:20 -0600) are available in the git repository at: git://github.com/awilliam/linux-vfio.git tags/vfio-v3.9-rc7 for you to fetch changes up to 904c680c7bf016a8619a045850937427f8d7368c: vfio-pci: Fix possible integer overflow (2013-03-26 11:33:16 -0600) Is this commit queued for -stable? I don't see a stable tag in the commit message. No, I forgot to add it. I'll take the long way around and send it to stable once it's applied. Thanks, Alex -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH] KVM: PPC: emulate dcbst
From: Stuart Yoder stuart.yo...@freescale.com Signed-off-by: Stuart Yoder stuart.yo...@freescale.com --- arch/powerpc/kvm/emulate.c |2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/powerpc/kvm/emulate.c b/arch/powerpc/kvm/emulate.c index 7a73b6f..631a265 100644 --- a/arch/powerpc/kvm/emulate.c +++ b/arch/powerpc/kvm/emulate.c @@ -38,6 +38,7 @@ #define OP_31_XOP_TRAP 4 #define OP_31_XOP_LWZX 23 +#define OP_31_XOP_DCBST 54 #define OP_31_XOP_TRAP_64 68 #define OP_31_XOP_DCBF 86 #define OP_31_XOP_LBZX 87 @@ -370,6 +371,7 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) emulated = kvmppc_emulate_mtspr(vcpu, sprn, rs); break; + case OP_31_XOP_DCBST: case OP_31_XOP_DCBF: case OP_31_XOP_DCBI: /* Do nothing. The guest is performing dcbi because -- 1.7.9.7 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 2/7 v3] Rename EMULATE_DO_PAPR to EMULATE_EXIT_USER
On Mon, Apr 08, 2013 at 04:02:13PM +0530, Bharat Bhushan wrote: Instruction emulation return EMULATE_DO_PAPR when it requires exit to userspace on book3s. Similar return is required for booke. EMULATE_DO_PAPR reads out to be confusing so it is renamed to EMULATE_EXIT_USER. This and the following patch look like an unnecessary and confusing change to me. If you want an EMULATE_EXIT_USER, why not just add it and use it in your new code, and leave the EMULATE_DO_PAPR code alone? Paul. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[RFC PATCH] Emulate MOVBE
Hi guys, so I was trying to repro tglx's bug in smpboot.c and for some reason, the most reliable way to trigger it was to boot an 32-bit atom smp guest in kvm (don't ask :)). The problem, however, was that atom wants MOVBE and qemu doesn't emulate it yet (except Richard's patches which I used in order to be able to actually even boot a guest). However, without hw acceleration, qemu is pretty slow, and waiting for an smp guest to boot in sw-only emulation is not fun. So, just for funsies, I decided to give the MOVBE emulation a try. Patch is below, 8 core smp atom guest boots fine and in 6-ish seconds with it. :-) I know, I know, it still needs cleaning up and proper rFLAGS handling but that is for later. For now, I'd very much like to hear whether this approach even makes sense and if so, what should be improved. Thanks, and thanks to Andre and Jörg for their help. Not-yet-signed-off-by: Borislav Petkov b...@suse.de -- diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h index 15f960c06ff7..ae01c765cd77 100644 --- a/arch/x86/include/asm/kvm_emulate.h +++ b/arch/x86/include/asm/kvm_emulate.h @@ -281,6 +281,7 @@ struct x86_emulate_ctxt { /* decode cache */ u8 twobyte; + u8 thirdbyte; u8 b; u8 intercept; u8 lock_prefix; diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index a335cc6cde72..0ccff339359d 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -152,6 +152,7 @@ #define Avx ((u64)1 43) /* Advanced Vector Extensions */ #define Fastop ((u64)1 44) /* Use opcode::u.fastop */ #define NoWrite ((u64)1 45) /* No writeback */ +#define ThreeByte ((u64)1 46) /* Three byte opcodes 0F 38 and 0F 3A */ #define X2(x...) x, x #define X3(x...) X2(x), x @@ -3107,6 +3108,34 @@ static int em_mov(struct x86_emulate_ctxt *ctxt) return X86EMUL_CONTINUE; } +static int em_movbe(struct x86_emulate_ctxt *ctxt) +{ + char *valptr = ctxt-src.valptr; + + switch (ctxt-op_bytes) { + case 2: + *(u16 *)valptr = swab16(*(u16 *)valptr); + break; + case 4: + *(u32 *)valptr = swab32(*(u32 *)valptr); + + /* +* clear upper dword for 32-bit operand size in 64-bit mode. +*/ + if (ctxt-mode == X86EMUL_MODE_PROT64) + *((u32 *)valptr + 1) = 0x0; + break; + case 8: + *(u64 *)valptr = swab64(*(u64 *)valptr); + break; + default: + return X86EMUL_PROPAGATE_FAULT; + } + + memcpy(ctxt-dst.valptr, ctxt-src.valptr, ctxt-op_bytes); + return X86EMUL_CONTINUE; +} + static int em_cr_write(struct x86_emulate_ctxt *ctxt) { if (ctxt-ops-set_cr(ctxt, ctxt-modrm_reg, ctxt-src.val)) @@ -3974,7 +4003,8 @@ static const struct opcode twobyte_table[256] = { I(ImplicitOps | VendorSpecific, em_sysenter), I(ImplicitOps | Priv | VendorSpecific, em_sysexit), N, N, - N, N, N, N, N, N, N, N, + I(ModRM | Mov | ThreeByte | VendorSpecific, em_movbe), + N, N, N, N, N, N, N, /* 0x40 - 0x4F */ X16(D(DstReg | SrcMem | ModRM | Mov)), /* 0x50 - 0x5F */ @@ -4323,6 +4353,15 @@ done_prefixes: } ctxt-d = opcode.flags; + if (ctxt-d ThreeByte) { + ctxt-thirdbyte = insn_fetch(u8, ctxt); + + if (ctxt-thirdbyte == 0xf0) + ctxt-d |= DstReg | SrcMem; + else + ctxt-d |= DstMem | SrcReg; + } + if (ctxt-d ModRM) ctxt-modrm = insn_fetch(u8, ctxt); -- Regards/Gruss, Boris. Sent from a fat crate under my desk. Formatting is fine. -- -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [RFC PATCH] Emulate MOVBE
On Wed, Apr 10, 2013 at 01:46:02AM +0200, Borislav Petkov wrote: +static int em_movbe(struct x86_emulate_ctxt *ctxt) +{ + char *valptr = ctxt-src.valptr; + + switch (ctxt-op_bytes) { + case 2: + *(u16 *)valptr = swab16(*(u16 *)valptr); + break; + case 4: + *(u32 *)valptr = swab32(*(u32 *)valptr); Note to self: this destroys the src operand but it shouldn't. Fix it tomorrow. -- Regards/Gruss, Boris. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [RFC PATCH] Emulate MOVBE
On 04/09/2013 05:03 PM, Borislav Petkov wrote: Note to self: this destroys the src operand but it shouldn't. Fix it tomorrow. I thought movbe was already in qemu just not on by default...? -hpa -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
RE: [PATCH v8 6/7] KVM: VMX: Add the algorithm of deliver posted interrupt
Paolo Bonzini wrote on 2013-04-10: Il 08/04/2013 16:23, Yang Zhang ha scritto: + * interrupt from PIR in next vmentry. + */ +static void vmx_deliver_posted_interrupt(struct kvm_vcpu *vcpu, int vector) +{ +struct vcpu_vmx *vmx = to_vmx(vcpu); +int r; + +if (pi_test_and_set_pir(vector, vmx-pi_desc)) +return; + +r = pi_test_and_set_on(vmx-pi_desc); +kvm_make_request(KVM_REQ_EVENT, vcpu); +if (!r (vcpu-mode == IN_GUEST_MODE)) +apic-send_IPI_mask(get_cpu_mask(vcpu-cpu), +POSTED_INTR_VECTOR); +else +kvm_vcpu_kick(vcpu); + +return; +} No need for this return. Right. Will remove it in next version. Best regards, Yang -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
RE: [PATCH] KVM: PPC: emulate dcbst
-Original Message- From: Yoder Stuart-B08248 Sent: Tuesday, April 09, 2013 3:36 PM To: ag...@suse.de Cc: kvm-...@vger.kernel.org; kvm@vger.kernel.org; Yoder Stuart-B08248 Subject: [PATCH] KVM: PPC: emulate dcbst From: Stuart Yoder stuart.yo...@freescale.com Signed-off-by: Stuart Yoder stuart.yo...@freescale.com --- One thing I should have mentioned...without this patch on the host 64-bit guest kernels are currently broken when using -smp. Not sure how far back things are broken, the oldest kernel I had a chance to try was like 3.8-rc3. Stuart -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 0/3] tcm_vhost fix cmd leak and bad target
Asias He (3): tcm_vhost: Fix tv_cmd leak in vhost_scsi_handle_vq tcm_vhost: Add vhost_scsi_send_bad_target() helper tcm_vhost: Send bad target to guest when cmd fails drivers/vhost/tcm_vhost.c | 44 1 file changed, 28 insertions(+), 16 deletions(-) -- 1.8.1.4 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 1/3] tcm_vhost: Fix tv_cmd leak in vhost_scsi_handle_vq
If we fail to submit the allocated tv_vmd to tcm_vhost_submission_work, we will leak the tv_vmd. Free tv_vmd on fail path. Signed-off-by: Asias He as...@redhat.com --- drivers/vhost/tcm_vhost.c | 9 +++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/vhost/tcm_vhost.c b/drivers/vhost/tcm_vhost.c index 10f2d30..e8d1a1f 100644 --- a/drivers/vhost/tcm_vhost.c +++ b/drivers/vhost/tcm_vhost.c @@ -715,7 +715,7 @@ static void vhost_scsi_handle_vq(struct vhost_scsi *vs, exceeds SCSI_MAX_VARLEN_CDB_SIZE: %d\n, scsi_command_size(tv_cmd-tvc_cdb), TCM_VHOST_MAX_CDB_SIZE); - break; /* TODO */ + goto err; } tv_cmd-tvc_lun = ((v_req.lun[2] 8) | v_req.lun[3]) 0x3FFF; @@ -728,7 +728,7 @@ static void vhost_scsi_handle_vq(struct vhost_scsi *vs, data_direction == DMA_TO_DEVICE); if (unlikely(ret)) { vq_err(vq, Failed to map iov to sgl\n); - break; /* TODO */ + goto err; } } @@ -749,6 +749,11 @@ static void vhost_scsi_handle_vq(struct vhost_scsi *vs, } mutex_unlock(vq-mutex); + return; + +err: + vhost_scsi_free_cmd(tv_cmd); + mutex_unlock(vq-mutex); } static void vhost_scsi_ctl_handle_kick(struct vhost_work *work) -- 1.8.1.4 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 2/3] tcm_vhost: Add vhost_scsi_send_bad_target() helper
Share the send bad target code with other use cases. Signed-off-by: Asias He as...@redhat.com --- drivers/vhost/tcm_vhost.c | 31 ++- 1 file changed, 18 insertions(+), 13 deletions(-) diff --git a/drivers/vhost/tcm_vhost.c b/drivers/vhost/tcm_vhost.c index e8d1a1f..1c719ed 100644 --- a/drivers/vhost/tcm_vhost.c +++ b/drivers/vhost/tcm_vhost.c @@ -569,6 +569,23 @@ static void tcm_vhost_submission_work(struct work_struct *work) } } +static void vhost_scsi_send_bad_target(struct vhost_scsi *vs, + struct vhost_virtqueue *vq, int head, unsigned out) +{ + struct virtio_scsi_cmd_resp __user *resp; + struct virtio_scsi_cmd_resp rsp; + int ret; + + memset(rsp, 0, sizeof(rsp)); + rsp.response = VIRTIO_SCSI_S_BAD_TARGET; + resp = vq-iov[out].iov_base; + ret = __copy_to_user(resp, rsp, sizeof(rsp)); + if (!ret) + vhost_add_used_and_signal(vs-dev, vq, head, 0); + else + pr_err(Faulted on virtio_scsi_cmd_resp\n); +} + static void vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq) { @@ -664,19 +681,7 @@ static void vhost_scsi_handle_vq(struct vhost_scsi *vs, /* Target does not exist, fail the request */ if (unlikely(!tv_tpg)) { - struct virtio_scsi_cmd_resp __user *resp; - struct virtio_scsi_cmd_resp rsp; - - memset(rsp, 0, sizeof(rsp)); - rsp.response = VIRTIO_SCSI_S_BAD_TARGET; - resp = vq-iov[out].iov_base; - ret = __copy_to_user(resp, rsp, sizeof(rsp)); - if (!ret) - vhost_add_used_and_signal(vs-dev, - vq, head, 0); - else - pr_err(Faulted on virtio_scsi_cmd_resp\n); - + vhost_scsi_send_bad_target(vs, vq, out, head); continue; } -- 1.8.1.4 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 3/3] tcm_vhost: Send bad target to guest when cmd fails
Send bad target to guest in case: 1) we can not allocate the cmd 2) fail to submit the cmd Signed-off-by: Asias He as...@redhat.com --- drivers/vhost/tcm_vhost.c | 10 ++ 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/vhost/tcm_vhost.c b/drivers/vhost/tcm_vhost.c index 1c719ed..4dc6f2d 100644 --- a/drivers/vhost/tcm_vhost.c +++ b/drivers/vhost/tcm_vhost.c @@ -694,7 +694,7 @@ static void vhost_scsi_handle_vq(struct vhost_scsi *vs, if (IS_ERR(tv_cmd)) { vq_err(vq, vhost_scsi_allocate_cmd failed %ld\n, PTR_ERR(tv_cmd)); - break; + goto err_cmd; } pr_debug(Allocated tv_cmd: %p exp_data_len: %d, data_direction : %d\n, tv_cmd, exp_data_len, data_direction); @@ -720,7 +720,7 @@ static void vhost_scsi_handle_vq(struct vhost_scsi *vs, exceeds SCSI_MAX_VARLEN_CDB_SIZE: %d\n, scsi_command_size(tv_cmd-tvc_cdb), TCM_VHOST_MAX_CDB_SIZE); - goto err; + goto err_free; } tv_cmd-tvc_lun = ((v_req.lun[2] 8) | v_req.lun[3]) 0x3FFF; @@ -733,7 +733,7 @@ static void vhost_scsi_handle_vq(struct vhost_scsi *vs, data_direction == DMA_TO_DEVICE); if (unlikely(ret)) { vq_err(vq, Failed to map iov to sgl\n); - goto err; + goto err_free; } } @@ -756,8 +756,10 @@ static void vhost_scsi_handle_vq(struct vhost_scsi *vs, mutex_unlock(vq-mutex); return; -err: +err_free: vhost_scsi_free_cmd(tv_cmd); +err_cmd: + vhost_scsi_send_bad_target(vs, vq, out, head); mutex_unlock(vq-mutex); } -- 1.8.1.4 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] tcm_vhost: Fix tv_cmd leak in vhost_scsi_handle_vq
On Tue, Apr 09, 2013 at 08:46:42AM -0700, Nicholas A. Bellinger wrote: On Tue, 2013-04-09 at 17:16 +0800, Asias He wrote: If we fail to submit the allocated tv_vmd to tcm_vhost_submission_work, we will leak the tv_vmd. Free tv_vmd on fail path. Signed-off-by: Asias He as...@redhat.com --- drivers/vhost/tcm_vhost.c | 11 --- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/vhost/tcm_vhost.c b/drivers/vhost/tcm_vhost.c index 3351ed3..1f9116c 100644 --- a/drivers/vhost/tcm_vhost.c +++ b/drivers/vhost/tcm_vhost.c @@ -860,7 +860,7 @@ static void vhost_scsi_handle_vq(struct vhost_scsi *vs, vq_err(vq, Expecting virtio_scsi_cmd_resp, got %zu bytes, out: %d, in: %d\n, vq-iov[out].iov_len, out, in); - break; + goto err; } tv_cmd-tvc_resp = vq-iov[out].iov_base; @@ -882,7 +882,7 @@ static void vhost_scsi_handle_vq(struct vhost_scsi *vs, exceeds SCSI_MAX_VARLEN_CDB_SIZE: %d\n, scsi_command_size(tv_cmd-tvc_cdb), TCM_VHOST_MAX_CDB_SIZE); - break; /* TODO */ + goto err; } tv_cmd-tvc_lun = ((v_req.lun[2] 8) | v_req.lun[3]) 0x3FFF; @@ -895,7 +895,7 @@ static void vhost_scsi_handle_vq(struct vhost_scsi *vs, data_direction == DMA_TO_DEVICE); if (unlikely(ret)) { vq_err(vq, Failed to map iov to sgl\n); - break; /* TODO */ + goto err; } } Mmmm, I think these cases also require a VIRTIO_SCSI_S_BAD_TARGET + __copy_to_user + vhost_add_used_and_signal similar to how !tv_tpg is handled.. Otherwise virtio-scsi will end up in scsi timeout - abort, no..? Ditto for the vhost_scsi_allocate_cmd failure case.. Sent out new patches. vhost-net uses vhost_discard_vq_desc for some failure cases, is that needed here for the failure cases before __copy_from_user is called..? I don't think it is useful. vhost_discard_vq_desc reverse the effect of vhost_get_vq_desc. If we put it back in the queue and next time we will still fail. @@ -916,6 +916,11 @@ static void vhost_scsi_handle_vq(struct vhost_scsi *vs, } mutex_unlock(vq-mutex); + return; + +err: + vhost_scsi_free_cmd(tv_cmd); + mutex_unlock(vq-mutex); } static void vhost_scsi_ctl_handle_kick(struct vhost_work *work) -- Asias -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 5/7 v3] KVM: PPC: exit to user space on ehpriv instruction
On 04/08/2013 06:32 PM, Bharat Bhushan wrote: From: Bharat Bhushan bharat.bhus...@freescale.com ehpriv instruction is used for setting software breakpoints by user space. This patch adds support to exit to user space with run-debug have relevant information. Signed-off-by: Bharat Bhushan bharat.bhus...@freescale.com --- arch/powerpc/kvm/e500_emulate.c | 10 ++ 1 files changed, 10 insertions(+), 0 deletions(-) diff --git a/arch/powerpc/kvm/e500_emulate.c b/arch/powerpc/kvm/e500_emulate.c index e78f353..cefdd38 100644 --- a/arch/powerpc/kvm/e500_emulate.c +++ b/arch/powerpc/kvm/e500_emulate.c @@ -26,6 +26,7 @@ #define XOP_TLBRE 946 #define XOP_TLBWE 978 #define XOP_TLBILX 18 +#define XOP_EHPRIV 270 #ifdef CONFIG_KVM_E500MC static int dbell2prio(ulong param) @@ -130,6 +131,15 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, emulated = kvmppc_e500_emul_tlbivax(vcpu, ea); break; + case XOP_EHPRIV: + run-exit_reason = KVM_EXIT_DEBUG; IIRC, the ehpriv instruction should generate a Hypervisor Privilege Exception to trap into the Hypervisor proactive. And we can use this ability to design something conveniently. And so, that is not only for the debug mechanism like you did. So here if 'run-exit_reason' is fixed to KVM_EXIT_DEBUG, how to distinguish other scenarios? So as I understand, we should use 'ehpriv oc' exactly then resolve 'oc' further to go different cases, right? Tiejun + run-debug.arch.address = vcpu-arch.pc; + run-debug.arch.status = 0; + kvmppc_account_exit(vcpu, DEBUG_EXITS); + emulated = EMULATE_EXIT_USER; + *advance = 0; + break; + default: emulated = EMULATE_FAIL; } -- To unsubscribe from this list: send the line unsubscribe kvm-ppc in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH] KVM: PPC: emulate dcbst
From: Stuart Yoder stuart.yo...@freescale.com Signed-off-by: Stuart Yoder stuart.yo...@freescale.com --- arch/powerpc/kvm/emulate.c |2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/powerpc/kvm/emulate.c b/arch/powerpc/kvm/emulate.c index 7a73b6f..631a265 100644 --- a/arch/powerpc/kvm/emulate.c +++ b/arch/powerpc/kvm/emulate.c @@ -38,6 +38,7 @@ #define OP_31_XOP_TRAP 4 #define OP_31_XOP_LWZX 23 +#define OP_31_XOP_DCBST 54 #define OP_31_XOP_TRAP_64 68 #define OP_31_XOP_DCBF 86 #define OP_31_XOP_LBZX 87 @@ -370,6 +371,7 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) emulated = kvmppc_emulate_mtspr(vcpu, sprn, rs); break; + case OP_31_XOP_DCBST: case OP_31_XOP_DCBF: case OP_31_XOP_DCBI: /* Do nothing. The guest is performing dcbi because -- 1.7.9.7 -- To unsubscribe from this list: send the line unsubscribe kvm-ppc in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html