On Aug 19, 2014, at 2:47 AM, Paolo Bonzini <pbonz...@redhat.com> wrote:

> 
>> I think this patch is auditable, page-fault is always called by holding
>> srcu-lock so that a page fault can’t go across synchronize_srcu_expedited.
>> Only these cases can happen:
>> 
>> 1)  page fault occurs before synchronize_srcu_expedited.
>>    In this case, vcpu will generate mmio-exit for the memslot being 
>> registered
>>    by the ioctl. That’s ok since the ioctl have not finished.
>> 
>> 2) page fault occurs after synchronize_srcu_expedited and during
>>   increasing generation-number.
>>   In this case, userspace may get wrong mmio-exit (that happen if handing
>>   page-fault is slower that the ioctl), that’s ok too since userspace need do
>>  the check anyway like i said above.
>> 
>> 3) page fault occurs after generation-number update
>>   that’s definitely correct. :)
>> 
>>> Another alternative could be to use the low bit to mark an in-progress
>>> change, and skip the caching if the low bit is set.  Similar to a
>>> seqcount (except if read_seqcount_retry fails, we just punt and not
>>> retry anything), you could use it even though the memory barriers
>>> provided by write_seqcount_begin/end are not too useful in this case.
>> 
>> I do not know how the bit works, page fault will cache the memslot before
>> the bit set and cache the generation-number after the bit set.
>> 
>> Maybe i missed your idea, could you please detail it?
> 
> Something like this:
> 
> -     update_memslots(slots, new, kvm->memslots->generation);
> +     /* ensure generation number is always increased. */
> +     slots->generation = old_memslots->generation + 1;
> +     update_memslots(slots, new);
>       rcu_assign_pointer(kvm->memslots, slots);
>       synchronize_srcu_expedited(&kvm->srcu);
> +     slots->generation++;
> 
> Then case 1 and 2 will just have a cache miss.

So, case 2 is what you concerned? :) I still think it is ok but i do not have
strong opinion on that. How about simplify it like this:

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 9314678..9fabf6a 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -234,16 +234,22 @@ static unsigned int get_mmio_spte_generation(u64 spte)
        return gen;
 }
 
-static unsigned int kvm_current_mmio_generation(struct kvm *kvm)
+static unsigned int __kvm_current_mmio_generation(struct kvm_memslots *slots)
 {
+
        /*
         * Init kvm generation close to MMIO_MAX_GEN to easily test the
         * code of handling generation number wrap-around.
         */
-       return (kvm_memslots(kvm)->generation +
+       return (slots->generation +
                      MMIO_MAX_GEN - 150) & MMIO_GEN_MASK;
 }
 
+static unsigned int kvm_current_mmio_generation(struct kvm *kvm)
+{
+       return __kvm_current_mmio_generation(kvm_memslots(kvm));
+}
+
 static void mark_mmio_spte(struct kvm *kvm, u64 *sptep, u64 gfn,
                           unsigned access)
 {
@@ -287,9 +293,15 @@ static bool set_mmio_spte(struct kvm *kvm, u64 *sptep, 
gfn_t gfn,
 
 static bool check_mmio_spte(struct kvm *kvm, u64 spte)
 {
+       struct kvm_memslots *slots = kvm_memslots(kvm);
        unsigned int kvm_gen, spte_gen;
 
-       kvm_gen = kvm_current_mmio_generation(kvm);
+       if (slots->updated)
+               return false;
+
+       smp_rmb();
+       
+       kvm_gen = __kvm_current_mmio_generation(slots);
        spte_gen = get_mmio_spte_generation(spte);
 
        trace_check_mmio_spte(spte, kvm_gen, spte_gen);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 4b6c01b..1d4e78f 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -96,7 +96,7 @@ static void hardware_disable_all(void);
 
 static void kvm_io_bus_destroy(struct kvm_io_bus *bus);
 static void update_memslots(struct kvm_memslots *slots,
-                           struct kvm_memory_slot *new, u64 last_generation);
+                           struct kvm_memory_slot *new);
 
 static void kvm_release_pfn_dirty(pfn_t pfn);
 static void mark_page_dirty_in_slot(struct kvm *kvm,
@@ -685,8 +685,7 @@ static void sort_memslots(struct kvm_memslots *slots)
 }
 
 static void update_memslots(struct kvm_memslots *slots,
-                           struct kvm_memory_slot *new,
-                           u64 last_generation)
+                           struct kvm_memory_slot *new)
 {
        if (new) {
                int id = new->id;
@@ -697,8 +696,6 @@ static void update_memslots(struct kvm_memslots *slots,
                if (new->npages != npages)
                        sort_memslots(slots);
        }
-
-       slots->generation = last_generation + 1;
 }
 
 static int check_memory_region_flags(struct kvm_userspace_memory_region *mem)
@@ -720,10 +717,17 @@ static struct kvm_memslots *install_new_memslots(struct 
kvm *kvm,
 {
        struct kvm_memslots *old_memslots = kvm->memslots;
 
-       update_memslots(slots, new, kvm->memslots->generation);
+       /* ensure generation number is always increased. */
+       slots->updated = true;
+       slots->generation = old_memslots->generation;
+       update_memslots(slots, new);
        rcu_assign_pointer(kvm->memslots, slots);
        synchronize_srcu_expedited(&kvm->srcu);
 
+       slots->generation++;
+       smp_wmb();
+       slots->updated = false;
+
        kvm_arch_memslots_updated(kvm);
 
        return old_memslots;

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to