Gleb Natapov wrote on 2013-02-04:
> On Mon, Feb 04, 2013 at 05:05:14PM +0800, Yang Zhang wrote:
>> From: Yang Zhang <[email protected]>
>>
>> Posted Interrupt allows APIC interrupts to inject into guest directly
>> without any vmexit.
>>
>> - When delivering a interrupt to guest, if target vcpu is running,
>> update Posted-interrupt requests bitmap and send a notification event
>> to the vcpu. Then the vcpu will handle this interrupt automatically,
>> without any software involvemnt.
>> - If target vcpu is not running or there already a notification event
>> pending in the vcpu, do nothing. The interrupt will be handled by
>> next vm entry
>> Signed-off-by: Yang Zhang <[email protected]>
>> ---
>> arch/x86/include/asm/entry_arch.h | 1 +
>> arch/x86/include/asm/hw_irq.h | 1 +
>> arch/x86/include/asm/irq_vectors.h | 4 +
>> arch/x86/include/asm/kvm_host.h | 3 + arch/x86/include/asm/vmx.h
>> | 4 + arch/x86/kernel/entry_64.S | 5 +
>> arch/x86/kernel/irq.c | 19 ++++
>> arch/x86/kernel/irqinit.c | 4 + arch/x86/kvm/lapic.c
>> | 15 +++- arch/x86/kvm/lapic.h | 1 +
>> arch/x86/kvm/svm.c | 6 ++ arch/x86/kvm/vmx.c
>> | 164 +++++++++++++++++++++++++++++++-----
>> arch/x86/kvm/x86.c | 4 + include/linux/kvm_host.h
>> | 1 + 14 files changed, 208 insertions(+), 24 deletions(-)
>> diff --git a/arch/x86/include/asm/entry_arch.h
>> b/arch/x86/include/asm/entry_arch.h index 40afa00..7b0a29e 100644 ---
>> a/arch/x86/include/asm/entry_arch.h +++
>> b/arch/x86/include/asm/entry_arch.h @@ -18,6 +18,7 @@
>> BUILD_INTERRUPT(reboot_interrupt,REBOOT_VECTOR)
>> #endif
>>
>> BUILD_INTERRUPT(x86_platform_ipi, X86_PLATFORM_IPI_VECTOR)
>> +BUILD_INTERRUPT(posted_intr_ipi, POSTED_INTR_VECTOR)
> Missing CONFIG_HAVE_KVM ifdef. Have you verified that this patch
> compiles with KVM support disabled? Also give it a name that will
> associate it with KVM.
Yes, but seems it is selected by x86 by default. And it always enabled when
building kernel.
I will remove the select in Kconfig and try again.
>>
>> /*
>> * every pentium local APIC has two 'local interrupts', with a
>> diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h
>> index eb92a6e..ee61af3 100644
>> --- a/arch/x86/include/asm/hw_irq.h
>> +++ b/arch/x86/include/asm/hw_irq.h
>> @@ -28,6 +28,7 @@
>> /* Interrupt handlers registered during init_IRQ */ extern void
>> apic_timer_interrupt(void); extern void x86_platform_ipi(void);
>> +extern void posted_intr_ipi(void); extern void error_interrupt(void);
>> extern void irq_work_interrupt(void);
>> diff --git a/arch/x86/include/asm/irq_vectors.h
>> b/arch/x86/include/asm/irq_vectors.h index 1508e51..6421a63 100644 ---
>> a/arch/x86/include/asm/irq_vectors.h +++
>> b/arch/x86/include/asm/irq_vectors.h @@ -102,6 +102,10 @@
>> */
>> #define X86_PLATFORM_IPI_VECTOR 0xf7
>> +#ifdef CONFIG_HAVE_KVM
>> +#define POSTED_INTR_VECTOR 0xf2
>> +#endif
>> +
>> /*
>> * IRQ work vector:
>> */
>> diff --git a/arch/x86/include/asm/kvm_host.h
>> b/arch/x86/include/asm/kvm_host.h index b8388e9..bab1c0a 100644 ---
>> a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h
>> @@ -704,6 +704,9 @@ struct kvm_x86_ops {
>> void (*hwapic_isr_update)(struct kvm *kvm, int isr);
>> void (*load_eoi_exitmap)(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap);
>> void (*set_virtual_x2apic_mode)(struct kvm_vcpu *vcpu, bool set);
>> + bool (*send_notification_event)(struct kvm_vcpu *vcpu,
>> + int vector, int *result);
>> + bool (*sync_pir_to_irr)(struct kvm_vcpu *vcpu);
>> int (*set_tss_addr)(struct kvm *kvm, unsigned int addr);
>> int (*get_tdp_level)(void);
>> u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio);
>> diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
>> index 694586c..f5ec72c 100644
>> --- a/arch/x86/include/asm/vmx.h
>> +++ b/arch/x86/include/asm/vmx.h
>> @@ -153,6 +153,7 @@
>> #define PIN_BASED_EXT_INTR_MASK 0x00000001
>> #define PIN_BASED_NMI_EXITING 0x00000008
>> #define PIN_BASED_VIRTUAL_NMIS 0x00000020
>> +#define PIN_BASED_POSTED_INTR 0x00000080
>>
>> #define VM_EXIT_SAVE_DEBUG_CONTROLS 0x00000002 #define
>> VM_EXIT_HOST_ADDR_SPACE_SIZE 0x00000200 @@ -175,6 +176,7 @@
>> /* VMCS Encodings */ enum vmcs_field { VIRTUAL_PROCESSOR_ID
>> = 0x00000000, + POSTED_INTR_NV = 0x00000002,
>> GUEST_ES_SELECTOR = 0x00000800, GUEST_CS_SELECTOR
>> = 0x00000802, GUEST_SS_SELECTOR = 0x00000804,
>> @@ -209,6 +211,8 @@ enum vmcs_field { VIRTUAL_APIC_PAGE_ADDR_HIGH
>> = 0x00002013, APIC_ACCESS_ADDR = 0x00002014,
>> APIC_ACCESS_ADDR_HIGH = 0x00002015,
>> + POSTED_INTR_DESC_ADDR = 0x00002016,
>> + POSTED_INTR_DESC_ADDR_HIGH = 0x00002017,
>> EPT_POINTER = 0x0000201a,
>> EPT_POINTER_HIGH = 0x0000201b,
>> EOI_EXIT_BITMAP0 = 0x0000201c,
>> diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
>> index 70641af..c6c47a3 100644
>> --- a/arch/x86/kernel/entry_64.S
>> +++ b/arch/x86/kernel/entry_64.S
>> @@ -1177,6 +1177,11 @@ apicinterrupt LOCAL_TIMER_VECTOR \
>> apicinterrupt X86_PLATFORM_IPI_VECTOR \
>> x86_platform_ipi smp_x86_platform_ipi
>> +#ifdef CONFIG_HAVE_KVM
>> +apicinterrupt POSTED_INTR_VECTOR \
>> + posted_intr_ipi smp_posted_intr_ipi
>> +#endif
>> +
>> apicinterrupt THRESHOLD_APIC_VECTOR \
>> threshold_interrupt smp_threshold_interrupt
>> apicinterrupt THERMAL_APIC_VECTOR \
>> diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
>> index e4595f1..3551cf2 100644
>> --- a/arch/x86/kernel/irq.c
>> +++ b/arch/x86/kernel/irq.c
>> @@ -228,6 +228,25 @@ void smp_x86_platform_ipi(struct pt_regs *regs)
>> set_irq_regs(old_regs);
>> }
>> +/* + * Handler for POSTED_INTERRUPT_VECTOR. + */ #ifdef
>> CONFIG_HAVE_KVM +void smp_posted_intr_ipi(struct pt_regs *regs) +{
>> + struct pt_regs *old_regs = set_irq_regs(regs); + + ack_APIC_irq();
>> +
>> + irq_enter(); + + exit_idle(); + + irq_exit(); +
>> + set_irq_regs(old_regs); +} + +
> One blank line is enough.
>
>> EXPORT_SYMBOL_GPL(vector_used_by_percpu_irq);
>>
>> #ifdef CONFIG_HOTPLUG_CPU
>> diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c
>> index 6e03b0d..f90c5ae 100644
>> --- a/arch/x86/kernel/irqinit.c
>> +++ b/arch/x86/kernel/irqinit.c
>> @@ -205,6 +205,10 @@ static void __init apic_intr_init(void)
>>
>> /* IPI for X86 platform specific use */
>> alloc_intr_gate(X86_PLATFORM_IPI_VECTOR, x86_platform_ipi);
>> +#ifdef CONFIG_HAVE_KVM
>> + /* IPI for posted interrupt use */
>> + alloc_intr_gate(POSTED_INTR_VECTOR, posted_intr_ipi);
>> +#endif
>>
>> /* IPI vectors for APIC spurious and error interrupts */
>> alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt);
>> diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
>> index 02b51dd..df6b6a3 100644
>> --- a/arch/x86/kvm/lapic.c
>> +++ b/arch/x86/kvm/lapic.c
>> @@ -379,6 +379,7 @@ static inline int apic_find_highest_irr(struct kvm_lapic
> *apic)
>> if (!apic->irr_pending)
>> return -1;
>> + kvm_x86_ops->sync_pir_to_irr(apic->vcpu);
>> result = apic_search_irr(apic);
>> ASSERT(result == -1 || result >= 16);
>> @@ -685,6 +686,7 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int
> delivery_mode,
>> {
>> int result = 0;
>> struct kvm_vcpu *vcpu = apic->vcpu;
>> + bool send = false;
>>
>> switch (delivery_mode) {
>> case APIC_DM_LOWEST:
>> @@ -700,7 +702,12 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int
> delivery_mode,
>> } else
>> apic_clear_vector(vector, apic->regs + APIC_TMR);
>> - result = !apic_test_and_set_irr(vector, apic);
>> + if (kvm_x86_ops->vm_has_apicv(vcpu->kvm))
> Just call send_notification_event() and do the check inside. And call it
> deliver_posted_interrupt() or something. It does more than just sends
> notification event. Actually it may not send it at all.
The code logic is different w/ or w/o apicv. So even put the check inside
callee, we still need check it in caller. I think current solution is more
clear.
>> + send = kvm_x86_ops->send_notification_event(vcpu,
>> + vector, &result);
>> + else
>> + result = !apic_test_and_set_irr(vector, apic);
>> +
>> trace_kvm_apic_accept_irq(vcpu->vcpu_id, delivery_mode,
>> trig_mode, vector, !result);
>> if (!result) {
>> @@ -710,8 +717,10 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int
> delivery_mode,
>> break;
>> }
>> - kvm_make_request(KVM_REQ_EVENT, vcpu);
>> - kvm_vcpu_kick(vcpu);
>> + if (!send) {
>> + kvm_make_request(KVM_REQ_EVENT, vcpu);
>> + kvm_vcpu_kick(vcpu);
>> + }
>> break;
>>
>> case APIC_DM_REMRD:
>> diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
>> index 1676d34..632111f 100644
>> --- a/arch/x86/kvm/lapic.h
>> +++ b/arch/x86/kvm/lapic.h
>> @@ -46,6 +46,7 @@ void kvm_lapic_set_eoi(struct kvm_vcpu *vcpu);
>> void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value);
>> u64 kvm_lapic_get_base(struct kvm_vcpu *vcpu);
>> void kvm_apic_set_version(struct kvm_vcpu *vcpu);
>> +void kvm_apic_update_irr(struct kvm_vcpu *vcpu, unsigned int *pir);
>>
>> int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest);
>> int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda);
>> diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
>> index a7d60d7..37f961d 100644
>> --- a/arch/x86/kvm/svm.c
>> +++ b/arch/x86/kvm/svm.c
>> @@ -3591,6 +3591,11 @@ static void svm_hwapic_isr_update(struct kvm *kvm,
> int isr)
>> return;
>> }
>> +static bool svm_sync_pir_to_irr(struct kvm_vcpu *vcpu)
>> +{
>> + return false;
>> +}
>> +
>> static int svm_nmi_allowed(struct kvm_vcpu *vcpu) { struct vcpu_svm
>> *svm = to_svm(vcpu); @@ -4319,6 +4324,7 @@ static struct kvm_x86_ops
>> svm_x86_ops = { .vm_has_apicv = svm_vm_has_apicv,
>> .load_eoi_exitmap
>> = svm_load_eoi_exitmap, .hwapic_isr_update = svm_hwapic_isr_update,
>> + .sync_pir_to_irr = svm_sync_pir_to_irr,
>>
>> .set_tss_addr = svm_set_tss_addr,
>> .get_tdp_level = get_npt_level,
>> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
>> index e826d29..d2b02f2 100644
>> --- a/arch/x86/kvm/vmx.c
>> +++ b/arch/x86/kvm/vmx.c
>> @@ -84,8 +84,8 @@ module_param(vmm_exclusive, bool, S_IRUGO);
>> static bool __read_mostly fasteoi = 1;
>> module_param(fasteoi, bool, S_IRUGO);
>> -static bool __read_mostly enable_apicv_reg_vid = 1;
>> -module_param(enable_apicv_reg_vid, bool, S_IRUGO);
>> +static bool __read_mostly enable_apicv = 1;
>> +module_param(enable_apicv, bool, S_IRUGO);
>>
>> /*
>> * If nested=1, nested virtualization is supported, i.e., guests may use
>> @@ -370,6 +370,41 @@ struct nested_vmx {
>> struct page *apic_access_page;
>> };
>> +#define POSTED_INTR_ON 0
>> +/* Posted-Interrupt Descriptor */
>> +struct pi_desc {
>> + u32 pir[8]; /* Posted interrupt requested */
>> + union {
>> + struct {
>> + u8 on:1,
>> + rsvd:7;
>> + } control;
>> + u32 rsvd[8];
>> + } u;
>> +} __aligned(64);
>> +
>> +static bool pi_test_on(struct pi_desc *pi_desc)
>> +{
>> + return test_bit(POSTED_INTR_ON, (unsigned long *)&pi_desc->u.control);
>> +}
>> +
>> +static bool pi_test_and_set_on(struct pi_desc *pi_desc)
>> +{
>> + return test_and_set_bit(POSTED_INTR_ON,
>> + (unsigned long *)&pi_desc->u.control);
>> +}
>> +
>> +static bool pi_test_and_clear_on(struct pi_desc *pi_desc)
>> +{
>> + return test_and_clear_bit(POSTED_INTR_ON,
>> + (unsigned long *)&pi_desc->u.control);
>> +}
>> +
>> +static int pi_test_and_set_pir(int vector, struct pi_desc *pi_desc)
>> +{
>> + return test_and_set_bit(vector, (unsigned long *)pi_desc->pir);
>> +}
>> +
>> struct vcpu_vmx {
>> struct kvm_vcpu vcpu;
>> unsigned long host_rsp;
>> @@ -434,6 +469,9 @@ struct vcpu_vmx {
>>
>> bool rdtscp_enabled;
>> + /* Posted interrupt descriptor */
>> + struct pi_desc *pi;
>> +
> You haven't answered on my previous review why are you trying save 46
> bytes here.
Sorry. I cannot get your point. It's just a pointer and only takes 8 bytes.
>> /* Support for a guest hypervisor (nested VMX) */
>> struct nested_vmx nested;
>> };
>> @@ -788,6 +826,18 @@ static inline bool
> cpu_has_vmx_virtual_intr_delivery(void)
>> SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY;
>> }
>> +static inline bool cpu_has_vmx_posted_intr(void)
>> +{
>> + return vmcs_config.pin_based_exec_ctrl & PIN_BASED_POSTED_INTR;
>> +}
>> +
>> +static inline bool cpu_has_vmx_apicv(void)
>> +{
>> + return cpu_has_vmx_apic_register_virt() &&
>> + cpu_has_vmx_virtual_intr_delivery() &&
>> + cpu_has_vmx_posted_intr();
>> +}
>> +
>> static inline bool cpu_has_vmx_flexpriority(void)
>> {
>> return cpu_has_vmx_tpr_shadow() &&
>> @@ -2535,12 +2585,6 @@ static __init int setup_vmcs_config(struct
> vmcs_config *vmcs_conf)
>> u32 _vmexit_control = 0;
>> u32 _vmentry_control = 0;
>> - min = PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING;
>> - opt = PIN_BASED_VIRTUAL_NMIS;
>> - if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PINBASED_CTLS,
>> - &_pin_based_exec_control) < 0)
>> - return -EIO;
>> -
>> min = CPU_BASED_HLT_EXITING |
>> #ifdef CONFIG_X86_64
>> CPU_BASED_CR8_LOAD_EXITING |
>> @@ -2617,6 +2661,17 @@ static __init int setup_vmcs_config(struct
> vmcs_config *vmcs_conf)
>> &_vmexit_control) < 0)
>> return -EIO;
>> + min = PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING;
>> + opt = PIN_BASED_VIRTUAL_NMIS | PIN_BASED_POSTED_INTR;
>> + if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PINBASED_CTLS,
>> + &_pin_based_exec_control) < 0)
>> + return -EIO;
>> +
>> + if (!(_cpu_based_2nd_exec_control &
>> + SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY) ||
>> + !(_vmexit_control & VM_EXIT_ACK_INTR_ON_EXIT))
>> + _pin_based_exec_control &= ~PIN_BASED_POSTED_INTR;
>> +
>> min = 0; opt = VM_ENTRY_LOAD_IA32_PAT; if
>> (adjust_vmx_controls(min,
>> opt, MSR_IA32_VMX_ENTRY_CTLS, @@ -2795,11 +2850,10 @@ static __init
>> int hardware_setup(void) if (!cpu_has_vmx_ple()) ple_gap
>> = 0;
>> - if (!cpu_has_vmx_apic_register_virt() ||
>> - !cpu_has_vmx_virtual_intr_delivery())
>> - enable_apicv_reg_vid = 0;
>> + if (!cpu_has_vmx_apicv())
>> + enable_apicv = 0;
>>
>> - if (enable_apicv_reg_vid)
>> + if (enable_apicv)
>> kvm_x86_ops->update_cr8_intercept = NULL;
>> else
>> kvm_x86_ops->hwapic_irr_update = NULL;
>> @@ -3868,6 +3922,61 @@ static void
> vmx_disable_intercept_msr_write_x2apic(u32 msr)
>> msr, MSR_TYPE_W);
>> }
>> +static int vmx_vm_has_apicv(struct kvm *kvm)
>> +{
>> + return enable_apicv && irqchip_in_kernel(kvm);
>> +}
>> +
>> +static bool vmx_send_notification_event(struct kvm_vcpu *vcpu,
>> + int vector, int *result)
>> +{
>> + struct vcpu_vmx *vmx = to_vmx(vcpu);
>> +
>> + *result = !pi_test_and_set_pir(vector, vmx->pi);
> The problem here is that interrupt may still be pending in IRR so
> eventually it will be coalesced, but we report it as delivered here. I
> do not see solution for this yet.
Yes, it's true and it may result in the interrupt losing. But even in real
hardware, an interrupt also will lost in some cases: for example, cpu doesn't
turn on irq in time or there is a high priority interrupt pending in IRR.
And since there already an interrupt pending in IRR, so the interrupt still
will be handled.
>> + if (!pi_test_and_set_on(vmx->pi) && (vcpu->mode == IN_GUEST_MODE)) {
>> + kvm_make_request(KVM_REQ_PENDING_PIR, vcpu);
> Why not set KVM_REQ_EVENT here? What this intermediate event is needed
> for?
see answer in below.
>> + apic->send_IPI_mask(get_cpu_mask(vcpu->cpu),
>> + POSTED_INTR_VECTOR);
>> + if (!pi_test_on(vmx->pi))
> Isn't it too optimistic of you to expect IPI to be delivered and
> processed by remote CPU by this point?
I have collected some data in my box and it shows about 5 percent of the posted
interrupt will be handled when calling this check. How about add a unlikely()
here?
Also it means 5% of check the request is unnecessary. And check KVM_REQ_EVENT
is more costly, so I use a more light request to do it.
>> + clear_bit(KVM_REQ_PENDING_PIR, &vcpu->requests) ;
>> + return true;
>> + }
>> + return false;
>> +}
>> +
>> +static bool vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu)
>> +{
>> + struct vcpu_vmx *vmx = to_vmx(vcpu);
>> + struct kvm_lapic *apic = vcpu->arch.apic;
>> + unsigned int i, old, new, ret_val, irr_offset, pir_val;
>> + bool make_request = false;
>> +
>> + if (!vmx_vm_has_apicv(vcpu->kvm) || !pi_test_and_clear_on(vmx->pi))
>> + return false;
>> +
>> + for (i = 0; i <= 7; i++) {
>> + pir_val = xchg(&vmx->pi->pir[i], 0);
>> + if (pir_val) {
>> + irr_offset = APIC_IRR + i * 0x10;
>> + do {
>> + old = kvm_apic_get_reg(apic, irr_offset);
>> + new = old | pir_val;
>> + ret_val = cmpxchg((u32 *)(apic->regs +
>> + irr_offset), old, new);
>> + } while (unlikely(ret_val != old));
>> + make_request = true;
>> + }
>> + }
>> +
>> + return make_request;
>> +}
>> +
>> +static void free_pi(struct vcpu_vmx *vmx)
>> +{
>> + if (vmx_vm_has_apicv(vmx->vcpu.kvm))
>> + kfree(vmx->pi);
>> +}
>> +
>> /*
>> * Set up the vmcs's constant host-state fields, i.e., host-state fields
>> that
>> * will not change in the lifetime of the guest.
>> @@ -3928,6 +4037,15 @@ static void set_cr4_guest_host_mask(struct
> vcpu_vmx *vmx)
>> vmcs_writel(CR4_GUEST_HOST_MASK,
>> ~vmx->vcpu.arch.cr4_guest_owned_bits); }
>> +static u32 vmx_pin_based_exec_ctrl(struct vcpu_vmx *vmx)
>> +{
>> + u32 pin_based_exec_ctrl = vmcs_config.pin_based_exec_ctrl;
>> +
>> + if (!vmx_vm_has_apicv(vmx->vcpu.kvm))
>> + pin_based_exec_ctrl &= ~PIN_BASED_POSTED_INTR;
>> + return pin_based_exec_ctrl;
>> +}
>> +
>> static u32 vmx_exec_control(struct vcpu_vmx *vmx) { u32 exec_control
>> = vmcs_config.cpu_based_exec_ctrl; @@ -3945,11 +4063,6 @@ static u32
>> vmx_exec_control(struct vcpu_vmx *vmx) return exec_control; }
>> -static int vmx_vm_has_apicv(struct kvm *kvm)
>> -{
>> - return enable_apicv_reg_vid && irqchip_in_kernel(kvm);
>> -}
>> -
>> static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx) { u32
>> exec_control = vmcs_config.cpu_based_2nd_exec_ctrl; @@ -4005,8 +4118,7
>> @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
>> vmcs_write64(VMCS_LINK_POINTER, -1ull); /* 22.3.1.5 */
>>
>> /* Control */
>> - vmcs_write32(PIN_BASED_VM_EXEC_CONTROL,
>> - vmcs_config.pin_based_exec_ctrl);
>> + vmcs_write32(PIN_BASED_VM_EXEC_CONTROL, vmx_pin_based_exec_ctrl(vmx));
>>
>> vmcs_write32(CPU_BASED_VM_EXEC_CONTROL,
> vmx_exec_control(vmx));
>>
>> @@ -4015,13 +4127,17 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
>> vmx_secondary_exec_control(vmx));
>> }
>> - if (enable_apicv_reg_vid) {
>> + if (vmx_vm_has_apicv(vmx->vcpu.kvm)) {
>> vmcs_write64(EOI_EXIT_BITMAP0, 0);
>> vmcs_write64(EOI_EXIT_BITMAP1, 0);
>> vmcs_write64(EOI_EXIT_BITMAP2, 0);
>> vmcs_write64(EOI_EXIT_BITMAP3, 0);
>>
>> vmcs_write16(GUEST_INTR_STATUS, 0);
>> +
>> + vmx->pi = kzalloc(sizeof(struct pi_desc), GFP_KERNEL);
>> + vmcs_write64(POSTED_INTR_NV, POSTED_INTR_VECTOR);
>> + vmcs_write64(POSTED_INTR_DESC_ADDR, __pa((vmx->pi)));
>> }
>>
>> if (ple_gap) { @@ -4171,6 +4287,9 @@ static int vmx_vcpu_reset(struct
>> kvm_vcpu *vcpu) vmcs_write64(APIC_ACCESS_ADDR,
>>
>> page_to_phys(vmx->vcpu.kvm->arch.apic_access_page));
>> + if (vmx_vm_has_apicv(vcpu->kvm))
>> + memset(vmx->pi, 0, sizeof(struct pi_desc));
>> +
>> if (vmx->vpid != 0)
>> vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid);
>> @@ -6746,6 +6865,7 @@ static void vmx_free_vcpu(struct kvm_vcpu *vcpu)
>>
>> free_vpid(vmx); free_nested(vmx); + free_pi(vmx);
>> free_loaded_vmcs(vmx->loaded_vmcs); kfree(vmx->guest_msrs);
>> kvm_vcpu_uninit(vcpu); @@ -7647,6 +7767,8 @@ static struct
>> kvm_x86_ops vmx_x86_ops = { .load_eoi_exitmap =
>> vmx_load_eoi_exitmap,
>> .hwapic_irr_update = vmx_hwapic_irr_update, .hwapic_isr_update =
>> vmx_hwapic_isr_update,
>> + .sync_pir_to_irr = vmx_sync_pir_to_irr,
>> + .send_notification_event = vmx_send_notification_event,
>>
>> .set_tss_addr = vmx_set_tss_addr, .get_tdp_level = get_ept_level,
>> @@
>> -7750,7 +7872,7 @@ static int __init vmx_init(void)
>> memcpy(vmx_msr_bitmap_longmode_x2apic,
>> vmx_msr_bitmap_longmode,
>> PAGE_SIZE);
>> - if (enable_apicv_reg_vid) {
>> + if (enable_apicv) {
>> for (msr = 0x800; msr <= 0x8ff; msr++)
>> vmx_disable_intercept_msr_read_x2apic(msr);
>> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
>> index 9f25d70..6e1e6e7 100644
>> --- a/arch/x86/kvm/x86.c
>> +++ b/arch/x86/kvm/x86.c
>> @@ -2681,6 +2681,7 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
>> static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu,
>>
>> struct kvm_lapic_state *s) { + kvm_x86_ops->sync_pir_to_irr(vcpu);
>> memcpy(s->regs, vcpu->arch.apic->regs, sizeof *s);
>>
>> return 0; @@ -5698,6 +5699,9 @@ static int vcpu_enter_guest(struct
>> kvm_vcpu *vcpu) kvm_deliver_pmi(vcpu); if
>> (kvm_check_request(KVM_REQ_EOIBITMAP, vcpu))
>> update_eoi_exitmap(vcpu);
>> + if (kvm_check_request(KVM_REQ_PENDING_PIR, vcpu))
>> + if (kvm_x86_ops->sync_pir_to_irr(vcpu))
>> + kvm_make_request(KVM_REQ_EVENT, vcpu);
>> }
>>
>> if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) {
>> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
>> index 0350e0d..a410819 100644
>> --- a/include/linux/kvm_host.h
>> +++ b/include/linux/kvm_host.h
>> @@ -124,6 +124,7 @@ static inline bool is_error_page(struct page *page)
>> #define KVM_REQ_MCLOCK_INPROGRESS 20
>> #define KVM_REQ_EPR_EXIT 21
>> #define KVM_REQ_EOIBITMAP 22
>> +#define KVM_REQ_PENDING_PIR 23
>>
>> #define KVM_USERSPACE_IRQ_SOURCE_ID 0
>> #define KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID 1
>> --
>> 1.7.1
>
> --
> Gleb.
Best regards,
Yang
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [email protected]
More majordomo info at http://vger.kernel.org/majordomo-info.html