While caching the last used vma already does a nice job avoiding
having to iterate the rbtree in find_vma, we can improve. After
studying the hit rate on a load of workloads and environments,
it was seen that it was around 45-50% - constant for a standard
desktop system (gnome3 + evolution + firefox + a few xterms),
and multiple java related workloads (including Hadoop/terasort),
and aim7, which indicates it's better than the 35% value documented
in the code.

By also caching the largest vma, that is, the one that contains
most addresses, there is a steady 10-15% hit rate gain, putting
it above the 60% region. This improvement comes at a very low
overhead for a miss. Furthermore, systems with !CONFIG_MMU keep
the current logic.

This patch introduces a second mmap_cache pointer, which is just
as racy as the first, but as we already know, doesn't matter in
this context. For documentation purposes, I have also added the
ACCESS_ONCE() around mm->mmap_cache updates, keeping it consistent
with the reads.

Cc: Hugh Dickins <hu...@google.com>
Cc: Michel Lespinasse <wal...@google.com>
Cc: Ingo Molnar <mi...@kernel.org>
Cc: Mel Gorman <mgor...@suse.de>
Cc: Rik van Riel <r...@redhat.com>
Cc: Guan Xuetao <g...@mprc.pku.edu.cn>
Signed-off-by: Davidlohr Bueso <davidl...@hp.com>
---
Please note that nommu and unicore32 arch are *untested*.

I also have a patch on top of this one that caches the most 
used vma, which adds another 8-10% hit rate gain, However,
since it does add a counter to the vma structure and we have
to do more logic in find_vma to keep track, I was hesitant about
the overhead. If folks are interested I can send that out as well.


 Documentation/vm/locking                 |  4 +-
 arch/unicore32/include/asm/mmu_context.h |  2 +-
 include/linux/mm.h                       | 13 ++++++
 include/linux/mm_types.h                 | 15 ++++++-
 kernel/debug/debug_core.c                | 17 +++++++-
 kernel/fork.c                            |  2 +-
 mm/mmap.c                                | 68 ++++++++++++++++++++------------
 7 files changed, 87 insertions(+), 34 deletions(-)

diff --git a/Documentation/vm/locking b/Documentation/vm/locking
index f61228b..b4e8154 100644
--- a/Documentation/vm/locking
+++ b/Documentation/vm/locking
@@ -42,8 +42,8 @@ The rules are:
    for mm B.
 
 The caveats are:
-1. find_vma() makes use of, and updates, the mmap_cache pointer hint.
-The update of mmap_cache is racy (page stealer can race with other code
+1. find_vma() makes use of, and updates, the mmap_cache pointers hint.
+The updates of mmap_cache is racy (page stealer can race with other code
 that invokes find_vma with mmap_sem held), but that is okay, since it 
 is a hint. This can be fixed, if desired, by having find_vma grab the
 page_table_lock.
diff --git a/arch/unicore32/include/asm/mmu_context.h 
b/arch/unicore32/include/asm/mmu_context.h
index fb5e4c6..38cc7fc 100644
--- a/arch/unicore32/include/asm/mmu_context.h
+++ b/arch/unicore32/include/asm/mmu_context.h
@@ -73,7 +73,7 @@ do { \
                else \
                        mm->mmap = NULL; \
                rb_erase(&high_vma->vm_rb, &mm->mm_rb); \
-               mm->mmap_cache = NULL; \
+               vma_clear_caches(mm);                   \
                mm->map_count--; \
                remove_vma(high_vma); \
        } \
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 8b6e55e..2c0f8ed 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1534,8 +1534,21 @@ static inline void mm_populate(unsigned long addr, 
unsigned long len)
        /* Ignore errors */
        (void) __mm_populate(addr, len, 1);
 }
+
+static inline void vma_clear_caches(struct mm_struct *mm)
+{
+       int i;
+
+       for (i = 0; i < NR_VMA_CACHES; i++)
+               mm->mmap_cache[i] = NULL;
+}
 #else
 static inline void mm_populate(unsigned long addr, unsigned long len) {}
+
+static inline void vma_clear_caches(struct mm_struct *mm)
+{
+       mm->mmap_cache = NULL;
+}
 #endif
 
 /* These take the mm semaphore themselves */
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index d9851ee..7f92835 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -322,12 +322,23 @@ struct mm_rss_stat {
        atomic_long_t count[NR_MM_COUNTERS];
 };
 
+
+#ifdef CONFIG_MMU
+enum {
+       VMA_LAST_USED, /* last find_vma result */
+       VMA_LARGEST,   /* vma that contains most address */
+       NR_VMA_CACHES
+};
+#endif
+
 struct kioctx_table;
 struct mm_struct {
        struct vm_area_struct * mmap;           /* list of VMAs */
        struct rb_root mm_rb;
-       struct vm_area_struct * mmap_cache;     /* last find_vma result */
-#ifdef CONFIG_MMU
+#ifndef CONFIG_MMU
+       struct vm_area_struct *mmap_cache;      /* last find_vma result */
+#else
+       struct vm_area_struct *mmap_cache[NR_VMA_CACHES];
        unsigned long (*get_unmapped_area) (struct file *filp,
                                unsigned long addr, unsigned long len,
                                unsigned long pgoff, unsigned long flags);
diff --git a/kernel/debug/debug_core.c b/kernel/debug/debug_core.c
index 0506d44..d9d72e4 100644
--- a/kernel/debug/debug_core.c
+++ b/kernel/debug/debug_core.c
@@ -221,13 +221,26 @@ int __weak kgdb_skipexception(int exception, struct 
pt_regs *regs)
  */
 static void kgdb_flush_swbreak_addr(unsigned long addr)
 {
+       struct mm_struct *mm = current->mm;
        if (!CACHE_FLUSH_IS_SAFE)
                return;
 
-       if (current->mm && current->mm->mmap_cache) {
-               flush_cache_range(current->mm->mmap_cache,
+#ifdef CONFIG_MMU
+       if (mm) {
+               int i;
+
+               for (i = 0; i < NR_VMA_CACHES; i++)
+                       if (mm->mmap_cache[i])
+                               flush_cache_range(mm->mmap_cache[i],
+                                                 addr,
+                                                 addr + BREAK_INSTR_SIZE);
+       }
+#else
+       if (mm && mm->mmap_cache) {
+               flush_cache_range(mm->mmap_cache,
                                  addr, addr + BREAK_INSTR_SIZE);
        }
+#endif
        /* Force flush instruction cache if it was outside the mm */
        flush_icache_range(addr, addr + BREAK_INSTR_SIZE);
 }
diff --git a/kernel/fork.c b/kernel/fork.c
index 086fe73..7b92666 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -363,8 +363,8 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct 
*oldmm)
 
        mm->locked_vm = 0;
        mm->mmap = NULL;
-       mm->mmap_cache = NULL;
        mm->map_count = 0;
+       vma_clear_caches(mm);
        cpumask_clear(mm_cpumask(mm));
        mm->mm_rb = RB_ROOT;
        rb_link = &mm->mm_rb.rb_node;
diff --git a/mm/mmap.c b/mm/mmap.c
index 9d54851..29c3fc0 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -676,14 +676,17 @@ static inline void
 __vma_unlink(struct mm_struct *mm, struct vm_area_struct *vma,
                struct vm_area_struct *prev)
 {
+       int i;
        struct vm_area_struct *next;
 
        vma_rb_erase(vma, &mm->mm_rb);
        prev->vm_next = next = vma->vm_next;
        if (next)
                next->vm_prev = prev;
-       if (mm->mmap_cache == vma)
-               mm->mmap_cache = prev;
+
+       for (i = 0; i < NR_VMA_CACHES; i++)
+               if (mm->mmap_cache[i] == vma)
+                       mm->mmap_cache[i] = prev;
 }
 
 /*
@@ -1972,34 +1975,47 @@ EXPORT_SYMBOL(get_unmapped_area);
 /* Look up the first VMA which satisfies  addr < vm_end,  NULL if none. */
 struct vm_area_struct *find_vma(struct mm_struct *mm, unsigned long addr)
 {
+       unsigned long currlen = 0;
+       struct rb_node *rb_node;
        struct vm_area_struct *vma = NULL;
 
-       /* Check the cache first. */
-       /* (Cache hit rate is typically around 35%.) */
-       vma = ACCESS_ONCE(mm->mmap_cache);
-       if (!(vma && vma->vm_end > addr && vma->vm_start <= addr)) {
-               struct rb_node *rb_node;
+       /* Check the cache first */
+       vma = ACCESS_ONCE(mm->mmap_cache[VMA_LAST_USED]);
+       if (vma && vma->vm_end > addr && vma->vm_start <= addr)
+               goto ret;
 
-               rb_node = mm->mm_rb.rb_node;
-               vma = NULL;
+       vma = ACCESS_ONCE(mm->mmap_cache[VMA_LARGEST]);
+       if (vma) {
+               if (vma->vm_end > addr && vma->vm_start <= addr)
+                       goto ret;
+               currlen = vma->vm_end - vma->vm_start;
+       }
 
-               while (rb_node) {
-                       struct vm_area_struct *vma_tmp;
-
-                       vma_tmp = rb_entry(rb_node,
-                                          struct vm_area_struct, vm_rb);
-
-                       if (vma_tmp->vm_end > addr) {
-                               vma = vma_tmp;
-                               if (vma_tmp->vm_start <= addr)
-                                       break;
-                               rb_node = rb_node->rb_left;
-                       } else
-                               rb_node = rb_node->rb_right;
-               }
-               if (vma)
-                       mm->mmap_cache = vma;
+       /* Bad cache! iterate rbtree */
+       rb_node = mm->mm_rb.rb_node;
+       vma = NULL;
+
+       while (rb_node) {
+               struct vm_area_struct *vma_tmp;
+
+               vma_tmp = rb_entry(rb_node,
+                                  struct vm_area_struct, vm_rb);
+
+               if (vma_tmp->vm_end > addr) {
+                       vma = vma_tmp;
+                       if (vma_tmp->vm_start <= addr)
+                               break;
+                       rb_node = rb_node->rb_left;
+               } else
+                       rb_node = rb_node->rb_right;
+       }
+
+       if (vma) {
+               ACCESS_ONCE(mm->mmap_cache[VMA_LAST_USED]) = vma;
+               if (vma->vm_end - vma->vm_start > currlen)
+                       ACCESS_ONCE(mm->mmap_cache[VMA_LARGEST]) = vma;
        }
+ret:
        return vma;
 }
 
@@ -2371,7 +2387,7 @@ detach_vmas_to_be_unmapped(struct mm_struct *mm, struct 
vm_area_struct *vma,
        } else
                mm->highest_vm_end = prev ? prev->vm_end : 0;
        tail_vma->vm_next = NULL;
-       mm->mmap_cache = NULL;          /* Kill the cache. */
+       vma_clear_caches(mm);
 }
 
 /*
-- 
1.8.1.4



--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to