migration failing due to memcg limits

Wanpeng Li Thu, 12 Jul 2012 01:54:36 -0700

On Wed, Jul 11, 2012 at 07:02:13PM +0200, Johannes Weiner wrote:
>Compaction (and page migration in general) can currently be hindered
>through pages being owned by memory cgroups that are at their limits
>and unreclaimable.
>
>The reason is that the replacement page is being charged against the
>limit while the page being replaced is also still charged.  But this
>seems unnecessary, given that only one of the two pages will still be
>in use after migration finishes.
>
>This patch changes the memcg migration sequence so that the
>replacement page is not charged.  Whatever page is still in use after
>successful or failed migration gets to keep the charge of the page
>that was going to be replaced.
>
>The replacement page will still show up temporarily in the rss/cache
>statistics, this can be fixed in a later patch as it's less urgent.
>


So I want to know after this patch be merged if mem_cgroup_wait_acct_move
still make sense, if the answer is no, I will send a patch to remove it.

>Reported-by: David Rientjes <rient...@google.com>
>Signed-off-by: Johannes Weiner <han...@cmpxchg.org>
>Acked-by: KAMEZAWA Hiroyuki <kamezawa.hir...@jp.fujitsu.com>
>Acked-by: Michal Hocko <mho...@suse.cz>
>---
> include/linux/memcontrol.h |   11 +++----
> mm/memcontrol.c            |   67 +++++++++++++++++++++++--------------------
> mm/migrate.c               |   27 ++++--------------
> 3 files changed, 47 insertions(+), 58 deletions(-)
>
>diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
>index 5a3ee64..8d9489f 100644
>--- a/include/linux/memcontrol.h
>+++ b/include/linux/memcontrol.h
>@@ -98,9 +98,9 @@ int mm_match_cgroup(const struct mm_struct *mm, const struct 
>mem_cgroup *cgroup)
> 
> extern struct cgroup_subsys_state *mem_cgroup_css(struct mem_cgroup *memcg);
> 
>-extern int
>-mem_cgroup_prepare_migration(struct page *page,
>-      struct page *newpage, struct mem_cgroup **memcgp, gfp_t gfp_mask);
>+extern void
>+mem_cgroup_prepare_migration(struct page *page, struct page *newpage,
>+                           struct mem_cgroup **memcgp);
> extern void mem_cgroup_end_migration(struct mem_cgroup *memcg,
>       struct page *oldpage, struct page *newpage, bool migration_ok);
> 
>@@ -276,11 +276,10 @@ static inline struct cgroup_subsys_state
>       return NULL;
> }
> 
>-static inline int
>+static inline void
> mem_cgroup_prepare_migration(struct page *page, struct page *newpage,
>-      struct mem_cgroup **memcgp, gfp_t gfp_mask)
>+                           struct mem_cgroup **memcgp)
> {
>-      return 0;
> }
> 
> static inline void mem_cgroup_end_migration(struct mem_cgroup *memcg,
>diff --git a/mm/memcontrol.c b/mm/memcontrol.c
>index e8ddc00..12ee2de 100644
>--- a/mm/memcontrol.c
>+++ b/mm/memcontrol.c
>@@ -2977,7 +2977,8 @@ static void mem_cgroup_do_uncharge(struct mem_cgroup 
>*memcg,
>  * uncharge if !page_mapped(page)
>  */
> static struct mem_cgroup *
>-__mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype)
>+__mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype,
>+                           bool end_migration)
> {
>       struct mem_cgroup *memcg = NULL;
>       unsigned int nr_pages = 1;
>@@ -3021,7 +3022,16 @@ __mem_cgroup_uncharge_common(struct page *page, enum 
>charge_type ctype)
>               /* fallthrough */
>       case MEM_CGROUP_CHARGE_TYPE_DROP:
>               /* See mem_cgroup_prepare_migration() */
>-              if (page_mapped(page) || PageCgroupMigration(pc))
>+              if (page_mapped(page))
>+                      goto unlock_out;
>+              /*
>+               * Pages under migration may not be uncharged.  But
>+               * end_migration() /must/ be the one uncharging the
>+               * unused post-migration page and so it has to call
>+               * here with the migration bit still set.  See the
>+               * res_counter handling below.
>+               */
>+              if (!end_migration && PageCgroupMigration(pc))
>                       goto unlock_out;
>               break;
>       case MEM_CGROUP_CHARGE_TYPE_SWAPOUT:
>@@ -3055,7 +3065,12 @@ __mem_cgroup_uncharge_common(struct page *page, enum 
>charge_type ctype)
>               mem_cgroup_swap_statistics(memcg, true);
>               mem_cgroup_get(memcg);
>       }
>-      if (!mem_cgroup_is_root(memcg))
>+      /*
>+       * Migration does not charge the res_counter for the
>+       * replacement page, so leave it alone when phasing out the
>+       * page that is unused after the migration.
>+       */
>+      if (!end_migration && !mem_cgroup_is_root(memcg))
>               mem_cgroup_do_uncharge(memcg, nr_pages, ctype);
> 
>       return memcg;
>@@ -3071,14 +3086,14 @@ void mem_cgroup_uncharge_page(struct page *page)
>       if (page_mapped(page))
>               return;
>       VM_BUG_ON(page->mapping && !PageAnon(page));
>-      __mem_cgroup_uncharge_common(page, MEM_CGROUP_CHARGE_TYPE_ANON);
>+      __mem_cgroup_uncharge_common(page, MEM_CGROUP_CHARGE_TYPE_ANON, false);
> }
> 
> void mem_cgroup_uncharge_cache_page(struct page *page)
> {
>       VM_BUG_ON(page_mapped(page));
>       VM_BUG_ON(page->mapping);
>-      __mem_cgroup_uncharge_common(page, MEM_CGROUP_CHARGE_TYPE_CACHE);
>+      __mem_cgroup_uncharge_common(page, MEM_CGROUP_CHARGE_TYPE_CACHE, false);
> }
> 
> /*
>@@ -3142,7 +3157,7 @@ mem_cgroup_uncharge_swapcache(struct page *page, 
>swp_entry_t ent, bool swapout)
>       if (!swapout) /* this was a swap cache but the swap is unused ! */
>               ctype = MEM_CGROUP_CHARGE_TYPE_DROP;
> 
>-      memcg = __mem_cgroup_uncharge_common(page, ctype);
>+      memcg = __mem_cgroup_uncharge_common(page, ctype, false);
> 
>       /*
>        * record memcg information,  if swapout && memcg != NULL,
>@@ -3232,19 +3247,18 @@ static inline int 
>mem_cgroup_move_swap_account(swp_entry_t entry,
>  * Before starting migration, account PAGE_SIZE to mem_cgroup that the old
>  * page belongs to.
>  */
>-int mem_cgroup_prepare_migration(struct page *page,
>-      struct page *newpage, struct mem_cgroup **memcgp, gfp_t gfp_mask)
>+void mem_cgroup_prepare_migration(struct page *page, struct page *newpage,
>+                                struct mem_cgroup **memcgp)
> {
>       struct mem_cgroup *memcg = NULL;
>       struct page_cgroup *pc;
>       enum charge_type ctype;
>-      int ret = 0;
> 
>       *memcgp = NULL;
> 
>       VM_BUG_ON(PageTransHuge(page));
>       if (mem_cgroup_disabled())
>-              return 0;
>+              return;
> 
>       pc = lookup_page_cgroup(page);
>       lock_page_cgroup(pc);
>@@ -3289,24 +3303,9 @@ int mem_cgroup_prepare_migration(struct page *page,
>        * we return here.
>        */
>       if (!memcg)
>-              return 0;
>+              return;
> 
>       *memcgp = memcg;
>-      ret = __mem_cgroup_try_charge(NULL, gfp_mask, 1, memcgp, false);
>-      css_put(&memcg->css);/* drop extra refcnt */
>-      if (ret) {
>-              if (PageAnon(page)) {
>-                      lock_page_cgroup(pc);
>-                      ClearPageCgroupMigration(pc);
>-                      unlock_page_cgroup(pc);
>-                      /*
>-                       * The old page may be fully unmapped while we kept it.
>-                       */
>-                      mem_cgroup_uncharge_page(page);
>-              }
>-              /* we'll need to revisit this error code (we have -EINTR) */
>-              return -ENOMEM;
>-      }
>       /*
>        * We charge new page before it's used/mapped. So, even if unlock_page()
>        * is called before end_migration, we can catch all events on this new
>@@ -3319,8 +3318,12 @@ int mem_cgroup_prepare_migration(struct page *page,
>               ctype = MEM_CGROUP_CHARGE_TYPE_CACHE;
>       else
>               ctype = MEM_CGROUP_CHARGE_TYPE_SHMEM;
>+      /*
>+       * The page is committed to the memcg, but it's not actually
>+       * charged to the res_counter since we plan on replacing the
>+       * old one and only one page is going to be left afterwards.
>+       */
>       __mem_cgroup_commit_charge(memcg, newpage, 1, ctype, false);
>-      return ret;
> }
> 
> /* remove redundant charge if migration failed*/
>@@ -3342,6 +3345,12 @@ void mem_cgroup_end_migration(struct mem_cgroup *memcg,
>               used = newpage;
>               unused = oldpage;
>       }
>+      anon = PageAnon(used);
>+      __mem_cgroup_uncharge_common(unused,
>+              anon ? MEM_CGROUP_CHARGE_TYPE_ANON
>+                   : MEM_CGROUP_CHARGE_TYPE_CACHE,
>+              true);
>+      css_put(&memcg->css);
>       /*
>        * We disallowed uncharge of pages under migration because mapcount
>        * of the page goes down to zero, temporarly.
>@@ -3351,10 +3360,6 @@ void mem_cgroup_end_migration(struct mem_cgroup *memcg,
>       lock_page_cgroup(pc);
>       ClearPageCgroupMigration(pc);
>       unlock_page_cgroup(pc);
>-      anon = PageAnon(used);
>-      __mem_cgroup_uncharge_common(unused,
>-              anon ? MEM_CGROUP_CHARGE_TYPE_ANON
>-                   : MEM_CGROUP_CHARGE_TYPE_CACHE);
> 
>       /*
>        * If a page is a file cache, radix-tree replacement is very atomic
>diff --git a/mm/migrate.c b/mm/migrate.c
>index 8137aea..aa06bf4 100644
>--- a/mm/migrate.c
>+++ b/mm/migrate.c
>@@ -687,7 +687,6 @@ static int __unmap_and_move(struct page *page, struct page 
>*newpage,
> {
>       int rc = -EAGAIN;
>       int remap_swapcache = 1;
>-      int charge = 0;
>       struct mem_cgroup *mem;
>       struct anon_vma *anon_vma = NULL;
> 
>@@ -729,12 +728,7 @@ static int __unmap_and_move(struct page *page, struct 
>page *newpage,
>       }
> 
>       /* charge against new page */
>-      charge = mem_cgroup_prepare_migration(page, newpage, &mem, GFP_KERNEL);
>-      if (charge == -ENOMEM) {
>-              rc = -ENOMEM;
>-              goto unlock;
>-      }
>-      BUG_ON(charge);
>+      mem_cgroup_prepare_migration(page, newpage, &mem);
> 
>       if (PageWriteback(page)) {
>               /*
>@@ -824,8 +818,7 @@ static int __unmap_and_move(struct page *page, struct page 
>*newpage,
>               put_anon_vma(anon_vma);
> 
> uncharge:
>-      if (!charge)
>-              mem_cgroup_end_migration(mem, page, newpage, rc == 0);
>+      mem_cgroup_end_migration(mem, page, newpage, rc == 0);
> unlock:
>       unlock_page(page);
> out:
>@@ -1519,10 +1512,9 @@ migrate_misplaced_page(struct page *page, struct 
>mm_struct *mm, int node)
> {
>       struct page *oldpage = page, *newpage;
>       struct address_space *mapping = page_mapping(page);
>-      struct mem_cgroup *mcg;
>+      struct mem_cgroup *memcg;
>       unsigned int gfp;
>       int rc = 0;
>-      int charge = -ENOMEM;
> 
>       VM_BUG_ON(!PageLocked(page));
>       VM_BUG_ON(page_mapcount(page));
>@@ -1556,12 +1548,7 @@ migrate_misplaced_page(struct page *page, struct 
>mm_struct *mm, int node)
>       if (!trylock_page(newpage))
>               BUG();          /* new page should be unlocked!!! */
> 
>-      // XXX hnaz, is this right?
>-      charge = mem_cgroup_prepare_migration(page, newpage, &mcg, gfp);
>-      if (charge == -ENOMEM) {
>-              rc = charge;
>-              goto out;
>-      }
>+      mem_cgroup_prepare_migration(page, newpage, &memcg);
> 
>       newpage->index = page->index;
>       newpage->mapping = page->mapping;
>@@ -1581,11 +1568,9 @@ migrate_misplaced_page(struct page *page, struct 
>mm_struct *mm, int node)
>               page = newpage;
>       }
> 
>+      mem_cgroup_end_migration(memcg, oldpage, newpage, !rc);
> out:
>-      if (!charge)
>-              mem_cgroup_end_migration(mcg, oldpage, newpage, !rc);
>-
>-       if (oldpage != page)
>+      if (oldpage != page)
>                put_page(oldpage);
> 
>       if (rc) {
>-- 
>1.7.7.6
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [patch 01/10] mm: memcg: fix compaction/migration failing due to memcg limits

Reply via email to