In typical guest boot workload only 2-3 memslots are used
extensively, and at that it's mostly the same memslot
lookup operation.

Adding LRU cache improves average lookup time from
46 to 28 cycles (~40%) for this workload.

Signed-off-by: Igor Mammedov <[email protected]>
---
 include/linux/kvm_host.h | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 231dd94..1a37144 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -353,6 +353,7 @@ struct kvm_memslots {
        struct kvm_memory_slot memslots[KVM_MEM_SLOTS_NUM];
        /* The mapping table from slot id to the index in memslots[]. */
        short id_to_index[KVM_MEM_SLOTS_NUM];
+       atomic_t lru_slot;
 };
 
 struct kvm {
@@ -790,12 +791,19 @@ static inline void kvm_guest_exit(void)
 static inline struct kvm_memory_slot *
 search_memslots(struct kvm_memslots *slots, gfn_t gfn)
 {
-       struct kvm_memory_slot *memslot;
+       int slot = atomic_read(&slots->lru_slot);
+       struct kvm_memory_slot *memslot = &slots->memslots[slot];
+
+       if (gfn >= memslot->base_gfn &&
+           gfn < memslot->base_gfn + memslot->npages)
+               return memslot;
 
        kvm_for_each_memslot(memslot, slots)
                if (gfn >= memslot->base_gfn &&
-                     gfn < memslot->base_gfn + memslot->npages)
+                     gfn < memslot->base_gfn + memslot->npages) {
+                       atomic_set(&slots->lru_slot, memslot - slots->memslots);
                        return memslot;
+               }
 
        return NULL;
 }
-- 
1.8.3.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to