On Fri, Nov 23, 2012 at 11:46:30AM +0000, Zhang, Yang Z wrote:
> Gleb Natapov wrote on 2012-11-22:
> > On Wed, Nov 21, 2012 at 04:09:36PM +0800, Yang Zhang wrote:
> >> Virtual interrupt delivery avoids KVM to inject vAPIC interrupts
> >> manually, which is fully taken care of by the hardware. This needs
> >> some special awareness into existing interrupr injection path:
> >>
> >> - for pending interrupt, instead of direct injection, we may need
> >> update architecture specific indicators before resuming to guest.
> >> - A pending interrupt, which is masked by ISR, should be also
> >> considered in above update action, since hardware will decide
> >> when to inject it at right time. Current has_interrupt and
> >> get_interrupt only returns a valid vector from injection p.o.v.
> >> Signed-off-by: Yang Zhang <[email protected]>
> >> Signed-off-by: Kevin Tian <[email protected]>
> >> ---
> >> arch/x86/include/asm/kvm_host.h | 4 + arch/x86/include/asm/vmx.h
> >> | 11 ++++ arch/x86/kvm/irq.c | 44 ++++++++++++++
> >> arch/x86/kvm/lapic.c | 44 +++++++++++++-
> >> arch/x86/kvm/lapic.h | 13 ++++ arch/x86/kvm/svm.c
> >> | 6 ++ arch/x86/kvm/vmx.c | 125
> >> ++++++++++++++++++++++++++++++++++++++- arch/x86/kvm/x86.c
> >> | 16 +++++- virt/kvm/ioapic.c | 1 + 9 files
> >> changed, 260 insertions(+), 4 deletions(-)
> >> diff --git a/arch/x86/include/asm/kvm_host.h
> >> b/arch/x86/include/asm/kvm_host.h index b2e11f4..8e07a86 100644 ---
> >> a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h
> >> @@ -682,6 +682,10 @@ struct kvm_x86_ops {
> >> void (*enable_nmi_window)(struct kvm_vcpu *vcpu);
> >> void (*enable_irq_window)(struct kvm_vcpu *vcpu);
> >> void (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr);
> >> + int (*has_virtual_interrupt_delivery)(struct kvm_vcpu *vcpu);
> >> + void (*update_irq)(struct kvm_vcpu *vcpu);
> >> + void (*set_eoi_exitmap)(struct kvm_vcpu *vcpu, int vector,
> >> + int need_eoi, int global);
> >> int (*set_tss_addr)(struct kvm *kvm, unsigned int addr);
> >> int (*get_tdp_level)(void);
> >> u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio);
> >> diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
> >> index 21101b6..1003341 100644
> >> --- a/arch/x86/include/asm/vmx.h
> >> +++ b/arch/x86/include/asm/vmx.h
> >> @@ -62,6 +62,7 @@
> >> #define EXIT_REASON_MCE_DURING_VMENTRY 41 #define
> >> EXIT_REASON_TPR_BELOW_THRESHOLD 43 #define EXIT_REASON_APIC_ACCESS
> >> 44 +#define EXIT_REASON_EOI_INDUCED 45 #define
> >> EXIT_REASON_EPT_VIOLATION 48 #define EXIT_REASON_EPT_MISCONFIG
> >> 49 #define EXIT_REASON_WBINVD 54 @@ -143,6 +144,7 @@
> >> #define SECONDARY_EXEC_WBINVD_EXITING 0x00000040 #define
> >> SECONDARY_EXEC_UNRESTRICTED_GUEST 0x00000080 #define
> >> SECONDARY_EXEC_APIC_REGISTER_VIRT 0x00000100 +#define
> >> SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY 0x00000200 #define
> >> SECONDARY_EXEC_PAUSE_LOOP_EXITING 0x00000400 #define
> >> SECONDARY_EXEC_ENABLE_INVPCID 0x00001000
> >> @@ -180,6 +182,7 @@ enum vmcs_field {
> >> GUEST_GS_SELECTOR = 0x0000080a, GUEST_LDTR_SELECTOR
> >> = 0x0000080c, GUEST_TR_SELECTOR = 0x0000080e,
> >> + GUEST_INTR_STATUS = 0x00000810, HOST_ES_SELECTOR
> >> = 0x00000c00, HOST_CS_SELECTOR = 0x00000c02,
> >> HOST_SS_SELECTOR = 0x00000c04, @@ -207,6 +210,14 @@
> >> enum vmcs_field { APIC_ACCESS_ADDR_HIGH = 0x00002015,
> >> EPT_POINTER
> >> = 0x0000201a, EPT_POINTER_HIGH =
> >> 0x0000201b,
> >> + EOI_EXIT_BITMAP0 = 0x0000201c,
> >> + EOI_EXIT_BITMAP0_HIGH = 0x0000201d,
> >> + EOI_EXIT_BITMAP1 = 0x0000201e,
> >> + EOI_EXIT_BITMAP1_HIGH = 0x0000201f,
> >> + EOI_EXIT_BITMAP2 = 0x00002020,
> >> + EOI_EXIT_BITMAP2_HIGH = 0x00002021,
> >> + EOI_EXIT_BITMAP3 = 0x00002022,
> >> + EOI_EXIT_BITMAP3_HIGH = 0x00002023,
> >> GUEST_PHYSICAL_ADDRESS = 0x00002400,
> >> GUEST_PHYSICAL_ADDRESS_HIGH = 0x00002401,
> >> VMCS_LINK_POINTER = 0x00002800,
> >> diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c
> >> index 7e06ba1..c7356a3 100644
> >> --- a/arch/x86/kvm/irq.c
> >> +++ b/arch/x86/kvm/irq.c
> >> @@ -60,6 +60,29 @@ int kvm_cpu_has_interrupt(struct kvm_vcpu *v)
> >> EXPORT_SYMBOL_GPL(kvm_cpu_has_interrupt);
> >>
> >> /*
> >> + * check if there is pending interrupt without
> >> + * intack. This _apicv version is used when hardware
> >> + * supports APIC virtualization with virtual interrupt
> >> + * delivery support. In such case, KVM is not required
> >> + * to poll pending APIC interrupt, and thus this
> >> + * interface is used to poll pending interupts from
> >> + * non-APIC source.
> >> + */
> >> +int kvm_cpu_has_extint(struct kvm_vcpu *v)
> >> +{
> >> + struct kvm_pic *s;
> >> +
> >> + if (!irqchip_in_kernel(v->kvm))
> >> + return v->arch.interrupt.pending;
> >> +
> > This does not belong here. If !irqchip_in_kernel() the function will not
> > be called. Hmm actually with !irqchip_in_kernel() kernel will oops in
> > kvm_apic_vid_enabled() since it dereference vcpu->arch.apic without
> > checking if it is NULL.
>
> Right. Will remove it in next version and add the check in
> kvm_apic_vid_enabled.
>
> >
> >> + if (kvm_apic_accept_pic_intr(v)) {
> >> + s = pic_irqchip(v->kvm); /* PIC */
> >> + return s->output;
> >> + } else
> >> + return 0;
> > This is code duplication from kvm_cpu_has_interrupt(). Write common
> > function and call it from kvm_cpu_has_interrupt(), but even that is
> > not needed, see below.
>
> Why it is not needed?
Because it you change kvm_cpu_has_interrupt() like I described below the
code path that uses this function will not be needed.
>
> >> +}
> >> +
> >> +/*
> >> * Read pending interrupt vector and intack.
> >> */
> >> int kvm_cpu_get_interrupt(struct kvm_vcpu *v) @@ -82,6 +105,27 @@ int
> >> kvm_cpu_get_interrupt(struct kvm_vcpu *v) }
> >> EXPORT_SYMBOL_GPL(kvm_cpu_get_interrupt);
> >> +/*
> >> + * Read pending interrupt vector and intack.
> >> + * Similar to kvm_cpu_has_interrupt_apicv, to get
> >> + * interrupts from non-APIC sources.
> >> + */
> >> +int kvm_cpu_get_extint(struct kvm_vcpu *v)
> >> +{
> >> + struct kvm_pic *s;
> >> + int vector = -1;
> >> +
> >> + if (!irqchip_in_kernel(v->kvm))
> >> + return v->arch.interrupt.nr;
> > Same as above.
> >
> >> +
> >> + if (kvm_apic_accept_pic_intr(v)) {
> >> + s = pic_irqchip(v->kvm);
> >> + s->output = 0; /* PIC */
> >> + vector = kvm_pic_read_irq(v->kvm);
> > Ditto about code duplication.
> >
> >> + }
> >> + return vector;
> >> +}
> >> +
> >> void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu)
> >> {
> >> kvm_inject_apic_timer_irqs(vcpu);
> >> diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
> >> index a63ffdc..af48361 100644
> >> --- a/arch/x86/kvm/lapic.c
> >> +++ b/arch/x86/kvm/lapic.c
> >> @@ -643,6 +643,12 @@ out:
> >> return ret;
> >> }
> >> +void kvm_set_eoi_exitmap(struct kvm_vcpu *vcpu, int vector,
> >> + int need_eoi, int global)
> >> +{
> >> + kvm_x86_ops->set_eoi_exitmap(vcpu, vector, need_eoi, global);
> >> +}
> >> +
> >> /*
> >> * Add a pending IRQ into lapic.
> >> * Return 1 if successfully added and 0 if discarded.
> >> @@ -664,8 +670,11 @@ static int __apic_accept_irq(struct kvm_lapic *apic,
> >> int
> > delivery_mode,
> >> if (trig_mode) {
> >> apic_debug("level trig mode for vector %d", vector);
> >> apic_set_vector(vector, apic->regs + APIC_TMR);
> >> - } else
> >> + kvm_set_eoi_exitmap(vcpu, vector, 1, 0);
> >> + } else {
> >> apic_clear_vector(vector, apic->regs + APIC_TMR);
> >> + kvm_set_eoi_exitmap(vcpu, vector, 0, 0);
> > Why not use APIC_TMR directly instead of kvm_set_eoi_exitmap() logic?
>
> Good idea. It seems more reasonable.
>
> >> + }
> >>
> >> result = !apic_test_and_set_irr(vector, apic);
> >> trace_kvm_apic_accept_irq(vcpu->vcpu_id, delivery_mode, @@
> >> -769,6
> >> +778,26 @@ static int apic_set_eoi(struct kvm_lapic *apic) return
> >> vector; }
> >> +/*
> >> + * this interface assumes a trap-like exit, which has already finished
> >> + * desired side effect including vISR and vPPR update.
> >> + */
> >> +void kvm_apic_set_eoi_accelerated(struct kvm_vcpu *vcpu, int vector)
> >> +{
> >> + struct kvm_lapic *apic = vcpu->arch.apic;
> >> + int trigger_mode;
> >> +
> >> + if (apic_test_and_clear_vector(vector, apic->regs + APIC_TMR))
> >> + trigger_mode = IOAPIC_LEVEL_TRIG;
> >> + else
> >> + trigger_mode = IOAPIC_EDGE_TRIG;
> >> +
> >> + if (!(kvm_apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_DIRECTED_EOI))
> >> + kvm_ioapic_update_eoi(apic->vcpu->kvm, vector, trigger_mode);
> >> + kvm_make_request(KVM_REQ_EVENT, apic->vcpu);
> > More code duplication. Why not call apic_set_eoi() and skip isr/ppr
> > logic there if vid is enabled, or put the logic in common function and
> > call from both places.
>
> Ok, will change it in next patch.
>
> >> +}
> >> +EXPORT_SYMBOL_GPL(kvm_apic_set_eoi_accelerated);
> >> +
> >> static void apic_send_ipi(struct kvm_lapic *apic) { u32 icr_low =
> >> kvm_apic_get_reg(apic, APIC_ICR); @@ -1510,6 +1539,8 @@ int
> >> kvm_create_lapic(struct kvm_vcpu *vcpu) kvm_lapic_reset(vcpu);
> >> kvm_iodevice_init(&apic->dev, &apic_mmio_ops);
> >> + if (kvm_x86_ops->has_virtual_interrupt_delivery(vcpu))
> >> + apic->vid_enabled = true;
> > What do you have vid_enabled for. This is global, not per apic, state.
> When inject interrupt to guest, we need this to check whether vid is enabled.
> If not, use old way to handle the interrupt.
> I thing put it in apic is reasonable. Though all vcpu use same configuration,
> APICv feature is per vcpu too.
>
How APICv is per vcpu? It is global. Just call
has_virtual_interrupt_delivery(vcpu)
instead of vid_enabled thing.
> >
> >> return 0; nomem_free_apic: kfree(apic); @@ -1533,6 +1564,17 @@ int
> >> kvm_apic_has_interrupt(struct kvm_vcpu *vcpu) return highest_irr; }
> >> +int kvm_apic_get_highest_irr(struct kvm_vcpu *vcpu)
> >> +{
> >> + struct kvm_lapic *apic = vcpu->arch.apic;
> >> +
> >> + if (!apic || !apic_enabled(apic))
> >> + return -1;
> >> +
> >> + return apic_find_highest_irr(apic);
> >> +}
> >> +EXPORT_SYMBOL_GPL(kvm_apic_get_highest_irr);
> >> +
> >> int kvm_apic_accept_pic_intr(struct kvm_vcpu *vcpu)
> >> {
> >> u32 lvt0 = kvm_apic_get_reg(vcpu->arch.apic, APIC_LVT0);
> >> diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
> >> index c42f111..2503a64 100644
> >> --- a/arch/x86/kvm/lapic.h
> >> +++ b/arch/x86/kvm/lapic.h
> >> @@ -20,6 +20,7 @@ struct kvm_lapic {
> >> u32 divide_count; struct kvm_vcpu *vcpu; bool irr_pending; +
> >> bool
> >> vid_enabled; /* Number of bits set in ISR. */ s16 isr_count;
> >> /* The
> >> highest vector set in ISR; if -1 - invalid, must scan ISR. */ @@ -39,6
> >> +40,9 @@ void kvm_free_lapic(struct kvm_vcpu *vcpu); int
> >> kvm_apic_has_interrupt(struct kvm_vcpu *vcpu); int
> >> kvm_apic_accept_pic_intr(struct kvm_vcpu *vcpu); int
> >> kvm_get_apic_interrupt(struct kvm_vcpu *vcpu);
> >> +int kvm_cpu_has_extint(struct kvm_vcpu *v);
> >> +int kvm_cpu_get_extint(struct kvm_vcpu *v);
> >> +int kvm_apic_get_highest_irr(struct kvm_vcpu *vcpu);
> >> void kvm_lapic_reset(struct kvm_vcpu *vcpu); u64
> >> kvm_lapic_get_cr8(struct kvm_vcpu *vcpu); void
> >> kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long cr8); @@ -50,6
> >> +54,8 @@ void kvm_apic_set_version(struct kvm_vcpu *vcpu); int
> >> kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest); int
> >> kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda); int
> >> kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq);
> >> +void kvm_set_eoi_exitmap(struct kvm_vcpu *vcpu, int vector,
> >> + int need_eoi, int global);
> >> int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type);
> >>
> >> bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
> >> @@ -65,6 +71,7 @@ u64 kvm_get_lapic_tscdeadline_msr(struct kvm_vcpu
> > *vcpu);
> >> void kvm_set_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu, u64 data);
> >>
> >> int kvm_apic_write_nodecode(struct kvm_vcpu *vcpu, u32 offset);
> >> +void kvm_apic_set_eoi_accelerated(struct kvm_vcpu *vcpu, int vector);
> >>
> >> void kvm_lapic_set_vapic_addr(struct kvm_vcpu *vcpu, gpa_t vapic_addr);
> >> void kvm_lapic_sync_from_vapic(struct kvm_vcpu *vcpu);
> >> @@ -81,6 +88,12 @@ static inline bool
> > kvm_hv_vapic_assist_page_enabled(struct kvm_vcpu *vcpu)
> >> return vcpu->arch.hv_vapic & HV_X64_MSR_APIC_ASSIST_PAGE_ENABLE;
> >> }
> >> +static inline bool kvm_apic_vid_enabled(struct kvm_vcpu *vcpu)
> >> +{
> >> + struct kvm_lapic *apic = vcpu->arch.apic;
> >> + return apic->vid_enabled;
> >> +}
> > vcpu->arch.apic can be NULL from where this is called.
> >
> >> +
> >> int kvm_lapic_enable_pv_eoi(struct kvm_vcpu *vcpu, u64 data);
> >> void kvm_lapic_init(void);
> >> diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
> >> index d017df3..b290aba 100644
> >> --- a/arch/x86/kvm/svm.c
> >> +++ b/arch/x86/kvm/svm.c
> >> @@ -3564,6 +3564,11 @@ static void update_cr8_intercept(struct kvm_vcpu
> > *vcpu, int tpr, int irr)
> >> set_cr_intercept(svm, INTERCEPT_CR8_WRITE);
> >> }
> >> +static int svm_has_virtual_interrupt_delivery(struct kvm_vcpu *vcpu)
> >> +{
> >> + return 0;
> >> +}
> >> +
> >> static int svm_nmi_allowed(struct kvm_vcpu *vcpu) { struct vcpu_svm
> >> *svm = to_svm(vcpu); @@ -4283,6 +4288,7 @@ static struct kvm_x86_ops
> >> svm_x86_ops = { .enable_nmi_window = enable_nmi_window,
> >> .enable_irq_window = enable_irq_window, .update_cr8_intercept =
> >> update_cr8_intercept,
> >> + .has_virtual_interrupt_delivery = svm_has_virtual_interrupt_delivery,
> >>
> >> .set_tss_addr = svm_set_tss_addr,
> >> .get_tdp_level = get_npt_level,
> >> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> >> index e9287aa..c0d74ce 100644
> >> --- a/arch/x86/kvm/vmx.c
> >> +++ b/arch/x86/kvm/vmx.c
> >> @@ -86,6 +86,9 @@ module_param(fasteoi, bool, S_IRUGO);
> >> static bool __read_mostly enable_apicv_reg = 0;
> >> module_param(enable_apicv_reg, bool, S_IRUGO);
> >> +static bool __read_mostly enable_apicv_vid = 0;
> >> +module_param(enable_apicv_vid, bool, S_IRUGO);
> >> +
> >> /*
> >> * If nested=1, nested virtualization is supported, i.e., guests may use
> >> * VMX and be a hypervisor for its own guests. If nested=0, guests may not
> >> @@ -432,6 +435,10 @@ struct vcpu_vmx {
> >>
> >> bool rdtscp_enabled;
> >> + u8 eoi_exitmap_changed;
> >> + u64 eoi_exit_bitmap[4];
> >> + u64 eoi_exit_bitmap_global[4];
> >> +
> >> /* Support for a guest hypervisor (nested VMX) */
> >> struct nested_vmx nested;
> >> };
> >> @@ -770,6 +777,12 @@ static inline bool
> > cpu_has_vmx_apic_register_virt(void)
> >> SECONDARY_EXEC_APIC_REGISTER_VIRT;
> >> }
> >> +static inline bool cpu_has_vmx_virtual_intr_delivery(void)
> >> +{
> >> + return vmcs_config.cpu_based_2nd_exec_ctrl &
> >> + SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY;
> >> +}
> >> +
> >> static inline bool cpu_has_vmx_flexpriority(void)
> >> {
> >> return cpu_has_vmx_tpr_shadow() &&
> >> @@ -2480,7 +2493,8 @@ static __init int setup_vmcs_config(struct
> > vmcs_config *vmcs_conf)
> >> SECONDARY_EXEC_PAUSE_LOOP_EXITING |
> >> SECONDARY_EXEC_RDTSCP |
> >> SECONDARY_EXEC_ENABLE_INVPCID |
> >> - SECONDARY_EXEC_APIC_REGISTER_VIRT;
> >> + SECONDARY_EXEC_APIC_REGISTER_VIRT |
> >> + SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY;
> >> if (adjust_vmx_controls(min2, opt2,
> >> MSR_IA32_VMX_PROCBASED_CTLS2,
> >> &_cpu_based_2nd_exec_control) < 0)
> >> @@ -2494,7 +2508,8 @@ static __init int setup_vmcs_config(struct
> >> vmcs_config *vmcs_conf)
> >>
> >> if (!(_cpu_based_exec_control & CPU_BASED_TPR_SHADOW))
> >> _cpu_based_2nd_exec_control &= ~(
> >> - SECONDARY_EXEC_APIC_REGISTER_VIRT);
> >> + SECONDARY_EXEC_APIC_REGISTER_VIRT |
> >> + SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY);
> >>
> >> if (_cpu_based_2nd_exec_control & SECONDARY_EXEC_ENABLE_EPT) {
> >> /*
> >> CR3 accesses and invlpg don't need to cause VM Exits when EPT @@
> >> -2696,6 +2711,9 @@ static __init int hardware_setup(void) if
> >> (!cpu_has_vmx_apic_register_virt()) enable_apicv_reg = 0;
> >> + if (!cpu_has_vmx_virtual_intr_delivery())
> >> + enable_apicv_vid = 0;
> >> +
> >> if (nested)
> >> nested_vmx_setup_ctls_msrs();
> >> @@ -3811,6 +3829,8 @@ static u32 vmx_secondary_exec_control(struct
> > vcpu_vmx *vmx)
> >> exec_control &= ~SECONDARY_EXEC_PAUSE_LOOP_EXITING;
> >> if (!enable_apicv_reg)
> >> exec_control &= ~SECONDARY_EXEC_APIC_REGISTER_VIRT;
> >> + if (!enable_apicv_vid)
> >> + exec_control &= ~SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY;
> >> return exec_control;
> >> }
> >> @@ -3855,6 +3875,15 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
> >> vmx_secondary_exec_control(vmx));
> >> }
> >> + if (enable_apicv_vid) {
> >> + vmcs_write64(EOI_EXIT_BITMAP0, 0);
> >> + vmcs_write64(EOI_EXIT_BITMAP1, 0);
> >> + vmcs_write64(EOI_EXIT_BITMAP2, 0);
> >> + vmcs_write64(EOI_EXIT_BITMAP3, 0);
> >> +
> >> + vmcs_write16(GUEST_INTR_STATUS, 0);
> >> + }
> >> +
> >> if (ple_gap) {
> >> vmcs_write32(PLE_GAP, ple_gap);
> >> vmcs_write32(PLE_WINDOW, ple_window);
> >> @@ -4770,6 +4799,16 @@ static int handle_apic_access(struct kvm_vcpu
> > *vcpu)
> >> return emulate_instruction(vcpu, 0) == EMULATE_DONE;
> >> }
> >> +static int handle_apic_eoi_induced(struct kvm_vcpu *vcpu)
> >> +{
> >> + unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
> >> + int vector = exit_qualification & 0xff;
> >> +
> >> + /* EOI-induced VM exit is trap-like and thus no need to adjust IP */
> >> + kvm_apic_set_eoi_accelerated(vcpu, vector);
> >> + return 1;
> >> +}
> >> +
> >> static int handle_apic_write(struct kvm_vcpu *vcpu)
> >> {
> >> unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
> >> @@ -5719,6 +5758,7 @@ static int (*const kvm_vmx_exit_handlers[])(struct
> > kvm_vcpu *vcpu) = {
> >> [EXIT_REASON_TPR_BELOW_THRESHOLD] = handle_tpr_below_threshold,
> >> [EXIT_REASON_APIC_ACCESS] = handle_apic_access,
> >> [EXIT_REASON_APIC_WRITE] = handle_apic_write,
> >> + [EXIT_REASON_EOI_INDUCED] = handle_apic_eoi_induced,
> >> [EXIT_REASON_WBINVD] = handle_wbinvd,
> >> [EXIT_REASON_XSETBV] = handle_xsetbv,
> >> [EXIT_REASON_TASK_SWITCH] = handle_task_switch,
> >> @@ -6049,6 +6089,11 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu)
> >>
> >> static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr)
> >> {
> >> + /* no need for tpr_threshold update if APIC virtual
> >> + * interrupt delivery is enabled */
> >> + if (!enable_apicv_vid)
> >> + return ;
> >
> > Just set kvm_x86_ops->update_cr8_intercept to NULL if !enable_apicv_vid
> > and the function will not be called.
>
> Sure.
>
> >> +
> >> if (irr == -1 || tpr < irr) {
> >> vmcs_write32(TPR_THRESHOLD, 0);
> >> return;
> >> @@ -6057,6 +6102,79 @@ static void update_cr8_intercept(struct kvm_vcpu
> > *vcpu, int tpr, int irr)
> >> vmcs_write32(TPR_THRESHOLD, irr);
> >> }
> >> +static int vmx_has_virtual_interrupt_delivery(struct kvm_vcpu *vcpu)
> >> +{
> >> + return irqchip_in_kernel(vcpu->kvm) && enable_apicv_vid;
> >> +}
> >> +
> >> +static void vmx_update_irq(struct kvm_vcpu *vcpu)
> >> +{
> >> + u16 status;
> >> + u8 old;
> >> + int vector;
> >> + struct vcpu_vmx *vmx = to_vmx(vcpu);
> >> +
> >> + if (!enable_apicv_vid)
> >> + return ;
> > Ditto. Set kvm_x86_ops->update_irq to a function that does nothing if
> > !enable_apicv_vid. BTW you do not set this callback in SVM code and call
> > it unconditionally.
> >
> >> +
> >> + vector = kvm_apic_get_highest_irr(vcpu);
> >> + if (vector == -1)
> >> + return;
> >> +
> >> + status = vmcs_read16(GUEST_INTR_STATUS);
> >> + old = (u8)status & 0xff;
> >> + if ((u8)vector != old) {
> >> + status &= ~0xff;
> >> + status |= (u8)vector;
> >> + vmcs_write16(GUEST_INTR_STATUS, status);
> >> + }
> > Please write RVI assessor functions.
> Sure.
>
> >> +
> >> + if (vmx->eoi_exitmap_changed) {
> >> +#define UPDATE_EOI_EXITMAP(v, e) { \
> >> + if ((v)->eoi_exitmap_changed & (1 << (e))) \
> >> + vmcs_write64(EOI_EXIT_BITMAP##e, \
> >> + (v)->eoi_exit_bitmap[e] | (v)->eoi_exit_bitmap_global[e]); }
> > Inline function would do. But why calculate this on each entry? We want
> > EOI exits only for level IOAPIC interrupts and edge IOAPIC interrupt
> > with registered notifiers. This configuration rarely changes.
>
> eoi_exitmap_changed is used to track whether the trig mode is changed. As you
> said, it changes rarely, so this codes seldom will be executed.
>
But code still checks whether bitmap was changed during each interrupt
injection. Recalculate bitmap when notifier is added/removed or ioapic
configuration changes. Use request bit to reload new bitmap.
> >
> >
> >> +
> >> + UPDATE_EOI_EXITMAP(vmx, 0);
> >> + UPDATE_EOI_EXITMAP(vmx, 1);
> >> + UPDATE_EOI_EXITMAP(vmx, 2);
> >> + UPDATE_EOI_EXITMAP(vmx, 3);
> >> + vmx->eoi_exitmap_changed = 0;
> >> + }
> >> +}
> >> +
> >> +static void vmx_set_eoi_exitmap(struct kvm_vcpu *vcpu,
> >> + int vector,
> >> + int need_eoi, int global)
> >> +{
> >> + struct vcpu_vmx *vmx = to_vmx(vcpu);
> >> + int index, offset, changed;
> >> + unsigned long *eoi_exitmap;
> >> +
> >> + if (!enable_apicv_vid)
> >> + return ;
> >> +
> >> + if (WARN_ONCE((vector < 0) || (vector > 255),
> >> + "KVM VMX: vector (%d) out of range\n", vector))
> >> + return;
> >> +
> >> + index = vector >> 6;
> >> + offset = vector & 63;
> >> + if (global)
> >> + eoi_exitmap =
> >> + (unsigned long *)&vmx->eoi_exit_bitmap_global[index];
> >> + else
> >> + eoi_exitmap = (unsigned long *)&vmx->eoi_exit_bitmap[index];
> >> +
> >> + if (need_eoi)
> >> + changed = !test_and_set_bit(offset, eoi_exitmap);
> >> + else
> >> + changed = test_and_clear_bit(offset, eoi_exitmap);
> >> +
> >> + if (changed)
> >> + vmx->eoi_exitmap_changed |= 1 << index;
> >> +}
> >> +
> >> static void vmx_complete_atomic_exit(struct vcpu_vmx *vmx) { u32
> >> exit_intr_info; @@ -7320,6 +7438,9 @@ static struct kvm_x86_ops
> >> vmx_x86_ops = { .enable_nmi_window = enable_nmi_window,
> >> .enable_irq_window = enable_irq_window, .update_cr8_intercept =
> >> update_cr8_intercept,
> >> + .has_virtual_interrupt_delivery = vmx_has_virtual_interrupt_delivery,
> >> + .update_irq = vmx_update_irq,
> > You need to initialize this one in svm.c too.
> >
> >> + .set_eoi_exitmap = vmx_set_eoi_exitmap,
> >>
> >> .set_tss_addr = vmx_set_tss_addr,
> >> .get_tdp_level = get_ept_level,
> >> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> >> index 4f76417..8b8de3b 100644
> >> --- a/arch/x86/kvm/x86.c
> >> +++ b/arch/x86/kvm/x86.c
> >> @@ -5190,6 +5190,13 @@ static void inject_pending_event(struct kvm_vcpu
> > *vcpu)
> >> vcpu->arch.nmi_injected = true;
> >> kvm_x86_ops->set_nmi(vcpu);
> >> }
> >> + } else if (kvm_apic_vid_enabled(vcpu)) {
> >> + if (kvm_cpu_has_extint(vcpu) &&
> >> + kvm_x86_ops->interrupt_allowed(vcpu)) {
> >> + kvm_queue_interrupt(vcpu,
> >> + kvm_cpu_get_extint(vcpu), false);
> >> + kvm_x86_ops->set_irq(vcpu);
> >> + }
> > Drop all this and modify kvm_cpu_has_interrupt()/kvm_cpu_get_interrupt()
> > to consider apic interrupts only if vid is enabled then the if below
> > will just work.
> Ok.
>
> >
> >> } else if (kvm_cpu_has_interrupt(vcpu)) {
> >> if (kvm_x86_ops->interrupt_allowed(vcpu)) {
> >> kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu),
> >> @@ -5289,12 +5296,19 @@ static int vcpu_enter_guest(struct kvm_vcpu
> > *vcpu)
> >> }
> >>
> >> if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) {
> >> + /* update archtecture specific hints for APIC
> >> + * virtual interrupt delivery */
> >> + kvm_x86_ops->update_irq(vcpu);
> >> +
> >> inject_pending_event(vcpu);
> >>
> >> /* enable NMI/IRQ window open exits if needed */
> >> if (vcpu->arch.nmi_pending)
> >> kvm_x86_ops->enable_nmi_window(vcpu);
> >> - else if (kvm_cpu_has_interrupt(vcpu) || req_int_win)
> >> + else if (kvm_apic_vid_enabled(vcpu)) {
> >> + if (kvm_cpu_has_extint(vcpu))
> >> + kvm_x86_ops->enable_irq_window(vcpu);
> > Same as above. With proper kvm_cpu_has_interrupt() implementation this
> > id is not needed.
> >
> >> + } else if (kvm_cpu_has_interrupt(vcpu) || req_int_win)
> >> kvm_x86_ops->enable_irq_window(vcpu);
> >>
> >> if (kvm_lapic_enabled(vcpu)) {
> >> diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c
> >> index 166c450..898aa62 100644
> >> --- a/virt/kvm/ioapic.c
> >> +++ b/virt/kvm/ioapic.c
> >> @@ -186,6 +186,7 @@ static int ioapic_deliver(struct kvm_ioapic *ioapic,
> >> int
> > irq)
> >> /* need to read apic_id from apic regiest since
> >> * it can be
> >> rewritten */ irqe.dest_id = ioapic->kvm->bsp_vcpu_id;
> >> + kvm_set_eoi_exitmap(ioapic->kvm->vcpus[0], irqe.vector, 1, 1);
> >> }
> >> #endif return kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe);
> >> --
> >> 1.7.1
> >
> > --
> > Gleb.
>
>
> Best regards,
> Yang
>
>
> --
> To unsubscribe from this list: send the line "unsubscribe kvm" in
> the body of a message to [email protected]
> More majordomo info at http://vger.kernel.org/majordomo-info.html
--
Gleb.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [email protected]
More majordomo info at http://vger.kernel.org/majordomo-info.html