On Mon, 2018-12-03 at 14:59 +0100, KarimAllah Ahmed wrote:
> The "APIC-access address" is simply a token that the hypervisor puts into
> the PFN of a 4K EPTE (or PTE if using shadow paging) that triggers APIC
> virtualization whenever a page walk terminates with that PFN. This address
> has to be a legal address (i.e.  within the physical address supported by
> the CPU), but it need not have WB memory behind it. In fact, it need not
> have anything at all behind it. When bit 31 ("activate secondary controls")
> of the primary processor-based VM-execution controls is set and bit 0
> ("virtualize APIC accesses") of the secondary processor-based VM-execution
> controls is set, the PFN recorded in the VMCS "APIC-access address" field
> will never be touched. (Instead, the access triggers APIC virtualization,
> which may access the PFN recorded in the "Virtual-APIC address" field of
> the VMCS.)
> 
> So stop mapping the "APIC-access address" page into the kernel and even
> drop the requirements to have a valid page backing it. Instead, just use
> some token that:
> 
> 1) Not one of the valid guest pages.
> 2) Within the physical address supported by the CPU.
> 
> Suggested-by: Jim Mattson <jmatt...@google.com>
> Signed-off-by: KarimAllah Ahmed <karah...@amazon.de>
> ---
> 
> Thanks Jim for the commit message :)
> ---
>  arch/x86/include/asm/kvm_host.h |  1 +
>  arch/x86/kvm/mmu.c              | 10 ++++++
>  arch/x86/kvm/vmx.c              | 71 
> ++++++++++++++++++-----------------------
>  3 files changed, 42 insertions(+), 40 deletions(-)
> 
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index fbda5a9..7e50196 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -1077,6 +1077,7 @@ struct kvm_x86_ops {
>       void (*load_eoi_exitmap)(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap);
>       void (*set_virtual_apic_mode)(struct kvm_vcpu *vcpu);
>       void (*set_apic_access_page_addr)(struct kvm_vcpu *vcpu, hpa_t hpa);
> +     bool (*nested_apic_access_addr)(struct kvm_vcpu *vcpu, gpa_t gpa, hpa_t 
> *hpa);
>       void (*deliver_posted_interrupt)(struct kvm_vcpu *vcpu, int vector);
>       int (*sync_pir_to_irr)(struct kvm_vcpu *vcpu);
>       int (*set_tss_addr)(struct kvm *kvm, unsigned int addr);
> diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
> index 7c03c0f..ae46a8d 100644
> --- a/arch/x86/kvm/mmu.c
> +++ b/arch/x86/kvm/mmu.c
> @@ -3962,9 +3962,19 @@ bool kvm_can_do_async_pf(struct kvm_vcpu *vcpu)
>  static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn,
>                        gva_t gva, kvm_pfn_t *pfn, bool write, bool *writable)
>  {
> +     hpa_t hpa;
>       struct kvm_memory_slot *slot;
>       bool async;
>  
> +     if (is_guest_mode(vcpu) &&
> +         kvm_x86_ops->nested_apic_access_addr &&
> +         kvm_x86_ops->nested_apic_access_addr(vcpu, gfn_to_gpa(gfn), &hpa)) {
> +             *pfn = hpa >> PAGE_SHIFT;
> +             if (writable)
> +                     *writable = true;
> +             return false;
> +     }

Now thinking further about this, I actually still need to validate that the L12 
EPT for this gfn actually contains the apic_access address. To ensure that I 
only fixup the fault when the L1 hypervisor sets up both VMCS L12 APIC_ACCESS 
and L12 EPT to contain the same address.

Will fix and send v2.

> +
>       /*
>        * Don't expose private memslots to L2.
>        */
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index 83a614f..340cf56 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -864,7 +864,6 @@ struct nested_vmx {
>        * Guest pages referred to in the vmcs02 with host-physical
>        * pointers, so we must keep them pinned while L2 runs.
>        */
> -     struct page *apic_access_page;
>       struct kvm_host_map virtual_apic_map;
>       struct kvm_host_map pi_desc_map;
>       struct kvm_host_map msr_bitmap_map;
> @@ -8512,10 +8511,6 @@ static void free_nested(struct kvm_vcpu *vcpu)
>       kfree(vmx->nested.cached_vmcs12);
>       kfree(vmx->nested.cached_shadow_vmcs12);
>       /* Unpin physical memory we referred to in the vmcs02 */
> -     if (vmx->nested.apic_access_page) {
> -             kvm_release_page_dirty(vmx->nested.apic_access_page);
> -             vmx->nested.apic_access_page = NULL;
> -     }
>       kvm_vcpu_unmap(&vmx->nested.virtual_apic_map);
>       kvm_vcpu_unmap(&vmx->nested.pi_desc_map);
>       vmx->nested.pi_desc = NULL;
> @@ -11901,41 +11896,27 @@ static void vmx_inject_page_fault_nested(struct 
> kvm_vcpu *vcpu,
>  static inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu,
>                                                struct vmcs12 *vmcs12);
>  
> +static hpa_t vmx_apic_access_addr(void)
> +{
> +     /*
> +      * The physical address choosen here has to:
> +      * 1) Never be an address that could be assigned to a guest.
> +      * 2) Within the maximum physical limits of the CPU.
> +      *
> +      * So our choice below is completely random, but at least it follows
> +      * these two rules.
> +      */
> +     return __pa_symbol(_text) & PAGE_MASK;
> +}
> +
>  static void nested_get_vmcs12_pages(struct kvm_vcpu *vcpu)
>  {
>       struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
>       struct vcpu_vmx *vmx = to_vmx(vcpu);
>       struct kvm_host_map *map;
> -     struct page *page;
> -     u64 hpa;
>  
> -     if (nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) {
> -             /*
> -              * Translate L1 physical address to host physical
> -              * address for vmcs02. Keep the page pinned, so this
> -              * physical address remains valid. We keep a reference
> -              * to it so we can release it later.
> -              */
> -             if (vmx->nested.apic_access_page) { /* shouldn't happen */
> -                     kvm_release_page_dirty(vmx->nested.apic_access_page);
> -                     vmx->nested.apic_access_page = NULL;
> -             }
> -             page = kvm_vcpu_gpa_to_page(vcpu, vmcs12->apic_access_addr);
> -             /*
> -              * If translation failed, no matter: This feature asks
> -              * to exit when accessing the given address, and if it
> -              * can never be accessed, this feature won't do
> -              * anything anyway.
> -              */
> -             if (!is_error_page(page)) {
> -                     vmx->nested.apic_access_page = page;
> -                     hpa = page_to_phys(vmx->nested.apic_access_page);
> -                     vmcs_write64(APIC_ACCESS_ADDR, hpa);
> -             } else {
> -                     vmcs_clear_bits(SECONDARY_VM_EXEC_CONTROL,
> -                                     
> SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES);
> -             }
> -     }
> +     if (nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES))
> +             vmcs_write64(APIC_ACCESS_ADDR, vmx_apic_access_addr());
>  
>       if (nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW)) {
>               map = &vmx->nested.virtual_apic_map;
> @@ -14196,12 +14177,6 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, 
> u32 exit_reason,
>       /* This is needed for same reason as it was needed in prepare_vmcs02 */
>       vmx->host_rsp = 0;
>  
> -     /* Unpin physical memory we referred to in vmcs02 */
> -     if (vmx->nested.apic_access_page) {
> -             kvm_release_page_dirty(vmx->nested.apic_access_page);
> -             vmx->nested.apic_access_page = NULL;
> -     }
> -
>       kvm_vcpu_unmap(&vmx->nested.virtual_apic_map);
>       kvm_vcpu_unmap(&vmx->nested.pi_desc_map);
>       vmx->nested.pi_desc = NULL;
> @@ -14949,6 +14924,21 @@ static int vmx_set_nested_state(struct kvm_vcpu 
> *vcpu,
>       return 0;
>  }
>  
> +static bool vmx_nested_apic_access_addr(struct kvm_vcpu *vcpu,
> +                                     gpa_t gpa, hpa_t *hpa)
> +{
> +     struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
> +
> +     if (nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES) &&
> +         page_address_valid(vcpu, vmcs12->apic_access_addr) &&
> +         (gpa_to_gfn(gpa) == gpa_to_gfn(vmcs12->apic_access_addr))) {
> +             *hpa = vmx_apic_access_addr();
> +             return true;
> +     }
> +
> +     return false;
> +}
> +
>  static struct kvm_x86_ops vmx_x86_ops __ro_after_init = {
>       .cpu_has_kvm_support = cpu_has_kvm_support,
>       .disabled_by_bios = vmx_disabled_by_bios,
> @@ -15022,6 +15012,7 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init 
> = {
>       .update_cr8_intercept = update_cr8_intercept,
>       .set_virtual_apic_mode = vmx_set_virtual_apic_mode,
>       .set_apic_access_page_addr = vmx_set_apic_access_page_addr,
> +     .nested_apic_access_addr = vmx_nested_apic_access_addr,
>       .get_enable_apicv = vmx_get_enable_apicv,
>       .refresh_apicv_exec_ctrl = vmx_refresh_apicv_exec_ctrl,
>       .load_eoi_exitmap = vmx_load_eoi_exitmap,



Amazon Development Center Germany GmbH
Krausenstr. 38
10117 Berlin
Geschaeftsfuehrer: Christian Schlaeger, Ralf Herbrich
Ust-ID: DE 289 237 879
Eingetragen am Amtsgericht Charlottenburg HRB 149173 B

Reply via email to