Marcelo Tosatti wrote:
Examine guest pagetable and bring the shadow back in sync. At the moment
sync_page is simplistic and only cares about shadow present entries
whose gfn remains unchanged.

It might be worthwhile to prepopulate the shadow in advance.

FIXME: the RW->RO transition needs a local TLB flush.


Yes!

 static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
                                             gfn_t gfn,
                                             gva_t gaddr,
@@ -1536,6 +1581,7 @@ static int nonpaging_init_context(struct
        context->gva_to_gpa = nonpaging_gva_to_gpa;
        context->free = nonpaging_free;
        context->prefetch_page = nonpaging_prefetch_page;
+       context->sync_page = nonpaging_sync_page;
        context->root_level = 0;
        context->shadow_root_level = PT32E_ROOT_LEVEL;
        context->root_hpa = INVALID_PAGE;
@@ -1583,6 +1629,7 @@ static int paging64_init_context_common(
        context->page_fault = paging64_page_fault;
        context->gva_to_gpa = paging64_gva_to_gpa;
        context->prefetch_page = paging64_prefetch_page;
+       context->sync_page = paging64_sync_page;
        context->free = paging_free;
        context->root_level = level;
        context->shadow_root_level = level;
@@ -1604,6 +1651,7 @@ static int paging32_init_context(struct context->gva_to_gpa = paging32_gva_to_gpa;
        context->free = paging_free;
        context->prefetch_page = paging32_prefetch_page;
+       context->sync_page = paging32_sync_page;
        context->root_level = PT32_ROOT_LEVEL;
        context->shadow_root_level = PT32E_ROOT_LEVEL;
        context->root_hpa = INVALID_PAGE;
@@ -1623,6 +1671,7 @@ static int init_kvm_tdp_mmu(struct kvm_v
        context->page_fault = tdp_page_fault;
        context->free = nonpaging_free;
        context->prefetch_page = nonpaging_prefetch_page;
+       context->sync_page = nonpaging_sync_page;
        context->shadow_root_level = kvm_x86_ops->get_tdp_level();
        context->root_hpa = INVALID_PAGE;

Not sure this is right.

What if vcpu0 is in mode X, while vcpu1 is in mode Y. vcpu0 writes to some pagetable, causing both mode X and mode Y shadows to become unsynced, so on the next resync (either by vcpu0 or vcpu1) we need to sync both modes.

Same problem with kvm_mmu_pte_write(), which right now hacks around it.

Maybe we need a ->ops member.

+static int FNAME(sync_page)(struct kvm_vcpu *vcpu,
+                           struct kvm_mmu_page *sp)
+{
+       int i, nr_present = 0;
+       struct page *pt_page;
+       pt_element_t *pt;
+       void *gpte_kaddr;
+
+       pt_page = gfn_to_page_atomic(vcpu->kvm, sp->gfn);
+       if (is_error_page(pt_page)) {
+               kvm_release_page_clean(pt_page);
+               return -EFAULT;
+       }
+
+       gpte_kaddr = pt = kmap_atomic(pt_page, KM_USER0);
+
+       if (PTTYPE == 32)
+               pt += sp->role.quadrant << PT64_LEVEL_BITS;

Only works for level 1 pages (which is okay).

+
+       for (i = 0; i < PT64_ENT_PER_PAGE; i++) {
+               if (is_shadow_present_pte(sp->spt[i])) {

Helper function needed for contents of inner loop.

+                       struct page *page;
+                       u64 spte;
+                       unsigned pte_access;
+
+                       if (!is_present_pte(*pt)) {
+                               rmap_remove(vcpu->kvm, &sp->spt[i]);
+                               sp->spt[i] = shadow_notrap_nonpresent_pte;
+                               pt++;
+                               continue;
+                       }

Are we missing a tlb flush?  Or will the caller take care of it?

+
+                       pte_access = sp->role.access & FNAME(gpte_access)(vcpu, 
*pt);
+                       /* user */
+                       if (pte_access & ACC_USER_MASK)
+                               spte |= shadow_user_mask;

There are some special cases involving cr0.wp=0 and the user mask. so spte.u is not correlated exactly with gpte.u.

+                       /* guest->shadow accessed sync */
+                       if (!(*pt & PT_ACCESSED_MASK))
+                               spte &= ~PT_ACCESSED_MASK;

spte shouldn't be accessible at all if gpte is not accessed, so we can set gpte.a on the next access (similar to spte not being writeable if gpte is not dirty).

+                       /* shadow->guest accessed sync */
+                       if (spte & PT_ACCESSED_MASK)
+                               set_bit(PT_ACCESSED_SHIFT, (unsigned long *)pt);

host accessed and guest accessed are very different. We shouldn't set host accessed unless we're sure the guest will access the page very soon.


+                       set_shadow_pte(&sp->spt[i], spte);

What if permissions are reduced?

You can use PT_* instead of shadow_* as this will never be called when ept is active.

I'm worried about the duplication with kvm_mmu_set_pte(). Perhaps that can be refactored instead to be the inner loop.

--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to