The patch titled
     Subject: mm: prevent endless growth of anon_vma hierarchy
has been removed from the -mm tree.  Its filename was
     mm-prevent-endless-growth-of-anon_vma-hierarchy.patch

This patch was dropped because it was merged into mainline or a subsystem tree

------------------------------------------------------
From: Konstantin Khlebnikov <[email protected]>
Subject: mm: prevent endless growth of anon_vma hierarchy

Constantly forking task causes unlimited grow of anon_vma chain.  Each
next child allocates new level of anon_vmas and links vma to all previous
levels because pages might be inherited from any level.

This patch adds heuristic which decides to reuse existing anon_vma instead
of forking new one.  It adds counter anon_vma->degree which counts linked
vmas and directly descending anon_vmas and reuses anon_vma if counter is
lower than two.  As a result each anon_vma has either vma or at least two
descending anon_vmas.  In such trees half of nodes are leafs with alive
vmas, thus count of anon_vmas is no more than two times bigger than count
of vmas.  This heuristic reuses anon_vmas as few as possible because each
reuse adds false aliasing among vmas and rmap walker ought to scan more
ptes when it searches where page is might be mapped.

Link: http://lkml.kernel.org/r/[email protected]
Fixes: 5beb49305251 ("mm: change anon_vma linking to fix multi-process server 
scalability issue")
[[email protected]: fix typo, per Rik]
Signed-off-by: Konstantin Khlebnikov <[email protected]>
Reported-by: Daniel Forrest <[email protected]>
Tested-by: Michal Hocko <[email protected]>
Tested-by: Jerome Marchand <[email protected]>
Reviewed-by: Michal Hocko <[email protected]>
Reviewed-by: Rik van Riel <[email protected]>
Cc: <[email protected]>    [2.6.34+]
Signed-off-by: Andrew Morton <[email protected]>
---

 include/linux/rmap.h |   10 +++++++++
 mm/rmap.c            |   42 ++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 51 insertions(+), 1 deletion(-)

diff -puN include/linux/rmap.h~mm-prevent-endless-growth-of-anon_vma-hierarchy 
include/linux/rmap.h
--- a/include/linux/rmap.h~mm-prevent-endless-growth-of-anon_vma-hierarchy
+++ a/include/linux/rmap.h
@@ -37,6 +37,16 @@ struct anon_vma {
        atomic_t refcount;
 
        /*
+        * Count of child anon_vmas and VMAs which points to this anon_vma.
+        *
+        * This counter is used for making decision about reusing anon_vma
+        * instead of forking new one. See comments in function anon_vma_clone.
+        */
+       unsigned degree;
+
+       struct anon_vma *parent;        /* Parent of this anon_vma */
+
+       /*
         * NOTE: the LSB of the rb_root.rb_node is set by
         * mm_take_all_locks() _after_ taking the above lock. So the
         * rb_root must only be read/written after taking the above lock
diff -puN mm/rmap.c~mm-prevent-endless-growth-of-anon_vma-hierarchy mm/rmap.c
--- a/mm/rmap.c~mm-prevent-endless-growth-of-anon_vma-hierarchy
+++ a/mm/rmap.c
@@ -72,6 +72,8 @@ static inline struct anon_vma *anon_vma_
        anon_vma = kmem_cache_alloc(anon_vma_cachep, GFP_KERNEL);
        if (anon_vma) {
                atomic_set(&anon_vma->refcount, 1);
+               anon_vma->degree = 1;   /* Reference for first vma */
+               anon_vma->parent = anon_vma;
                /*
                 * Initialise the anon_vma root to point to itself. If called
                 * from fork, the root will be reset to the parents anon_vma.
@@ -188,6 +190,8 @@ int anon_vma_prepare(struct vm_area_stru
                if (likely(!vma->anon_vma)) {
                        vma->anon_vma = anon_vma;
                        anon_vma_chain_link(vma, avc, anon_vma);
+                       /* vma reference or self-parent link for new root */
+                       anon_vma->degree++;
                        allocated = NULL;
                        avc = NULL;
                }
@@ -236,6 +240,14 @@ static inline void unlock_anon_vma_root(
 /*
  * Attach the anon_vmas from src to dst.
  * Returns 0 on success, -ENOMEM on failure.
+ *
+ * If dst->anon_vma is NULL this function tries to find and reuse existing
+ * anon_vma which has no vmas and only one child anon_vma. This prevents
+ * degradation of anon_vma hierarchy to endless linear chain in case of
+ * constantly forking task. On the other hand, an anon_vma with more than one
+ * child isn't reused even if there was no alive vma, thus rmap walker has a
+ * good chance of avoiding scanning the whole hierarchy when it searches where
+ * page is mapped.
  */
 int anon_vma_clone(struct vm_area_struct *dst, struct vm_area_struct *src)
 {
@@ -256,7 +268,21 @@ int anon_vma_clone(struct vm_area_struct
                anon_vma = pavc->anon_vma;
                root = lock_anon_vma_root(root, anon_vma);
                anon_vma_chain_link(dst, avc, anon_vma);
+
+               /*
+                * Reuse existing anon_vma if its degree lower than two,
+                * that means it has no vma and only one anon_vma child.
+                *
+                * Do not chose parent anon_vma, otherwise first child
+                * will always reuse it. Root anon_vma is never reused:
+                * it has self-parent reference and at least one child.
+                */
+               if (!dst->anon_vma && anon_vma != src->anon_vma &&
+                               anon_vma->degree < 2)
+                       dst->anon_vma = anon_vma;
        }
+       if (dst->anon_vma)
+               dst->anon_vma->degree++;
        unlock_anon_vma_root(root);
        return 0;
 
@@ -280,6 +306,9 @@ int anon_vma_fork(struct vm_area_struct
        if (!pvma->anon_vma)
                return 0;
 
+       /* Drop inherited anon_vma, we'll reuse existing or allocate new. */
+       vma->anon_vma = NULL;
+
        /*
         * First, attach the new VMA to the parent VMA's anon_vmas,
         * so rmap can find non-COWed pages in child processes.
@@ -288,6 +317,10 @@ int anon_vma_fork(struct vm_area_struct
        if (error)
                return error;
 
+       /* An existing anon_vma has been reused, all done then. */
+       if (vma->anon_vma)
+               return 0;
+
        /* Then add our own anon_vma. */
        anon_vma = anon_vma_alloc();
        if (!anon_vma)
@@ -301,6 +334,7 @@ int anon_vma_fork(struct vm_area_struct
         * lock any of the anon_vmas in this anon_vma tree.
         */
        anon_vma->root = pvma->anon_vma->root;
+       anon_vma->parent = pvma->anon_vma;
        /*
         * With refcounts, an anon_vma can stay around longer than the
         * process it belongs to. The root anon_vma needs to be pinned until
@@ -311,6 +345,7 @@ int anon_vma_fork(struct vm_area_struct
        vma->anon_vma = anon_vma;
        anon_vma_lock_write(anon_vma);
        anon_vma_chain_link(vma, avc, anon_vma);
+       anon_vma->parent->degree++;
        anon_vma_unlock_write(anon_vma);
 
        return 0;
@@ -341,12 +376,16 @@ void unlink_anon_vmas(struct vm_area_str
                 * Leave empty anon_vmas on the list - we'll need
                 * to free them outside the lock.
                 */
-               if (RB_EMPTY_ROOT(&anon_vma->rb_root))
+               if (RB_EMPTY_ROOT(&anon_vma->rb_root)) {
+                       anon_vma->parent->degree--;
                        continue;
+               }
 
                list_del(&avc->same_vma);
                anon_vma_chain_free(avc);
        }
+       if (vma->anon_vma)
+               vma->anon_vma->degree--;
        unlock_anon_vma_root(root);
 
        /*
@@ -357,6 +396,7 @@ void unlink_anon_vmas(struct vm_area_str
        list_for_each_entry_safe(avc, next, &vma->anon_vma_chain, same_vma) {
                struct anon_vma *anon_vma = avc->anon_vma;
 
+               BUG_ON(anon_vma->degree);
                put_anon_vma(anon_vma);
 
                list_del(&avc->same_vma);
_

Patches currently in -mm which might be from [email protected] are

mm-add-kpf_zero_page-flag-for-proc-kpageflags.patch

--
To unsubscribe from this list: send the line "unsubscribe stable" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to