Gitweb:     
http://git.kernel.org/git/?p=linux/kernel/git/torvalds/linux-2.6.git;a=commit;h=82369553d6d3bc67c54129a02e0bc0b5b88f3045
Commit:     82369553d6d3bc67c54129a02e0bc0b5b88f3045
Parent:     3be91277e754c7db04eae145ba622b3a3e3ad96d
Author:     Hugh Dickins <[EMAIL PROTECTED]>
AuthorDate: Thu Feb 7 00:14:22 2008 -0800
Committer:  Linus Torvalds <[EMAIL PROTECTED]>
CommitDate: Thu Feb 7 08:42:20 2008 -0800

    memcgroup: fix hang with shmem/tmpfs
    
    The memcgroup regime relies upon a cgroup reclaiming pages from itself 
within
    add_to_page_cache: which may involve some waiting.  Whereas shmem and tmpfs
    rely upon using add_to_page_cache while holding a spinlock: when it cannot
    wait.  The consequence is that when a cgroup reaches its limit, 
shmem_getpage
    just hangs - unless there is outside memory pressure too, neither kswapd nor
    radix_tree_preload get it out of the retry loop.
    
    In most cases we can mem_cgroup_cache_charge the page waitably first, to
    attach the page_cgroup in advance, so add_to_page_cache will do no more than
    increment a count; then mem_cgroup_uncharge_page after (in both success and
    failure cases) to balance the books again.
    
    And where there used to be a congestion_wait for kswapd (recently made
    redundant by radix_tree_preload), use mem_cgroup_cache_charge with NULL page
    to go through a cycle of allocation and freeing, without accounting to any
    particular page, and without updating the statistics vector.  This brings 
the
    cgroup below its limit so the next try usually succeeds.
    
    Signed-off-by: Hugh Dickins <[EMAIL PROTECTED]>
    Cc: Balbir Singh <[EMAIL PROTECTED]>
    Cc: KAMEZAWA Hiroyuki <[EMAIL PROTECTED]>
    Cc: Mel Gorman <[EMAIL PROTECTED]>
    Signed-off-by: Andrew Morton <[EMAIL PROTECTED]>
    Signed-off-by: Linus Torvalds <[EMAIL PROTECTED]>
---
 mm/memcontrol.c |   37 +++++++++++++++++++++----------------
 mm/shmem.c      |   28 +++++++++++++++++++++++++++-
 2 files changed, 48 insertions(+), 17 deletions(-)

diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index dbf5715..11b23f2 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -329,23 +329,26 @@ static int mem_cgroup_charge_common(struct page *page, 
struct mm_struct *mm,
         * with it
         */
 retry:
-       lock_page_cgroup(page);
-       pc = page_get_page_cgroup(page);
-       /*
-        * The page_cgroup exists and the page has already been accounted
-        */
-       if (pc) {
-               if (unlikely(!atomic_inc_not_zero(&pc->ref_cnt))) {
-                       /* this page is under being uncharged ? */
-                       unlock_page_cgroup(page);
-                       cpu_relax();
-                       goto retry;
-               } else {
-                       unlock_page_cgroup(page);
-                       goto done;
+       if (page) {
+               lock_page_cgroup(page);
+               pc = page_get_page_cgroup(page);
+               /*
+                * The page_cgroup exists and
+                * the page has already been accounted.
+                */
+               if (pc) {
+                       if (unlikely(!atomic_inc_not_zero(&pc->ref_cnt))) {
+                               /* this page is under being uncharged ? */
+                               unlock_page_cgroup(page);
+                               cpu_relax();
+                               goto retry;
+                       } else {
+                               unlock_page_cgroup(page);
+                               goto done;
+                       }
                }
+               unlock_page_cgroup(page);
        }
-       unlock_page_cgroup(page);
 
        pc = kzalloc(sizeof(struct page_cgroup), gfp_mask);
        if (pc == NULL)
@@ -404,7 +407,7 @@ retry:
        if (ctype == MEM_CGROUP_CHARGE_TYPE_CACHE)
                pc->flags |= PAGE_CGROUP_FLAG_CACHE;
 
-       if (page_cgroup_assign_new_page_cgroup(page, pc)) {
+       if (!page || page_cgroup_assign_new_page_cgroup(page, pc)) {
                /*
                 * Another charge has been added to this page already.
                 * We take lock_page_cgroup(page) again and read
@@ -413,6 +416,8 @@ retry:
                res_counter_uncharge(&mem->res, PAGE_SIZE);
                css_put(&mem->css);
                kfree(pc);
+               if (!page)
+                       goto done;
                goto retry;
        }
 
diff --git a/mm/shmem.c b/mm/shmem.c
index 0f246c4..85bed94 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -912,9 +912,13 @@ found:
        error = 1;
        if (!inode)
                goto out;
-       error = radix_tree_preload(GFP_KERNEL);
+       /* Precharge page while we can wait, compensate afterwards */
+       error = mem_cgroup_cache_charge(page, current->mm, GFP_KERNEL);
        if (error)
                goto out;
+       error = radix_tree_preload(GFP_KERNEL);
+       if (error)
+               goto uncharge;
        error = 1;
 
        spin_lock(&info->lock);
@@ -947,6 +951,8 @@ found:
                shmem_swp_unmap(ptr);
        spin_unlock(&info->lock);
        radix_tree_preload_end();
+uncharge:
+       mem_cgroup_uncharge_page(page);
 out:
        unlock_page(page);
        page_cache_release(page);
@@ -1308,6 +1314,13 @@ repeat:
                        spin_unlock(&info->lock);
                        unlock_page(swappage);
                        page_cache_release(swappage);
+                       if (error == -ENOMEM) {
+                               /* allow reclaim from this memory cgroup */
+                               error = mem_cgroup_cache_charge(NULL,
+                                       current->mm, gfp & ~__GFP_HIGHMEM);
+                               if (error)
+                                       goto failed;
+                       }
                        goto repeat;
                }
        } else if (sgp == SGP_READ && !filepage) {
@@ -1353,6 +1366,17 @@ repeat:
                                goto failed;
                        }
 
+                       /* Precharge page while we can wait, compensate after */
+                       error = mem_cgroup_cache_charge(filepage, current->mm,
+                                                       gfp & ~__GFP_HIGHMEM);
+                       if (error) {
+                               page_cache_release(filepage);
+                               shmem_unacct_blocks(info->flags, 1);
+                               shmem_free_blocks(inode, 1);
+                               filepage = NULL;
+                               goto failed;
+                       }
+
                        spin_lock(&info->lock);
                        entry = shmem_swp_alloc(info, idx, sgp);
                        if (IS_ERR(entry))
@@ -1364,6 +1388,7 @@ repeat:
                        if (error || swap.val || 0 != add_to_page_cache_lru(
                                        filepage, mapping, idx, GFP_NOWAIT)) {
                                spin_unlock(&info->lock);
+                               mem_cgroup_uncharge_page(filepage);
                                page_cache_release(filepage);
                                shmem_unacct_blocks(info->flags, 1);
                                shmem_free_blocks(inode, 1);
@@ -1372,6 +1397,7 @@ repeat:
                                        goto failed;
                                goto repeat;
                        }
+                       mem_cgroup_uncharge_page(filepage);
                        info->flags |= SHMEM_PAGEIN;
                }
 
-
To unsubscribe from this list: send the line "unsubscribe git-commits-head" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to