> When a high-order allocation fails, kswapd is kicked so that it reclaims
> at a higher-order to avoid direct reclaimers stall and to help GFP_ATOMIC
> allocations. Something has changed in recent kernels that affect the timing
> where high-order GFP_ATOMIC allocations are now failing with more frequency,
> particularly under pressure. This patch forces kswapd to notice sooner that
> high-order allocations are occuring.
> 
> Signed-off-by: Mel Gorman <[email protected]>
> ---
>  mm/vmscan.c |    9 +++++++++
>  1 files changed, 9 insertions(+), 0 deletions(-)
> 
> diff --git a/mm/vmscan.c b/mm/vmscan.c
> index 64e4388..cd68109 100644
> --- a/mm/vmscan.c
> +++ b/mm/vmscan.c
> @@ -2016,6 +2016,15 @@ loop_again:
>                                       priority != DEF_PRIORITY)
>                               continue;
>  
> +                     /*
> +                      * Exit quickly to restart if it has been indicated
> +                      * that higher orders are required
> +                      */
> +                     if (pgdat->kswapd_max_order > order) {
> +                             all_zones_ok = 1;
> +                             goto out;
> +                     }
> +
>                       if (!zone_watermark_ok(zone, order,
>                                       high_wmark_pages(zone), end_zone, 0))
>                               all_zones_ok = 0;

this is simplest patch and seems reasonable.
        Reviewed-by: KOSAKI Motohiro <kosaki.motohiro>


btw, now balance_pgdat() have too complex flow. at least Vincent was
confused it.
Then, I think kswap_max_order handling should move into balance_pgdat()
at later release.
the following patch addressed my proposed concept.



>From 2c5be772f6db25a5ef82975960d0b5788736ec2b Mon Sep 17 00:00:00 2001
From: KOSAKI Motohiro <[email protected]>
Date: Mon, 26 Oct 2009 23:25:29 +0900
Subject: [PATCH] kswapd_max_order handling move into balance_pgdat()

Signed-off-by: KOSAKI Motohiro <[email protected]>
---
 mm/vmscan.c |   45 +++++++++++++++++++++------------------------
 1 files changed, 21 insertions(+), 24 deletions(-)

diff --git a/mm/vmscan.c b/mm/vmscan.c
index 64e4388..49001d3 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1915,7 +1915,7 @@ unsigned long try_to_free_mem_cgroup_pages(struct 
mem_cgroup *mem_cont,
  * interoperates with the page allocator fallback scheme to ensure that aging
  * of pages is balanced across the zones.
  */
-static unsigned long balance_pgdat(pg_data_t *pgdat, int order)
+static unsigned long balance_pgdat(pg_data_t *pgdat)
 {
        int all_zones_ok;
        int priority;
@@ -1928,7 +1928,7 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int 
order)
                .may_swap = 1,
                .swap_cluster_max = SWAP_CLUSTER_MAX,
                .swappiness = vm_swappiness,
-               .order = order,
+               .order = 0,
                .mem_cgroup = NULL,
                .isolate_pages = isolate_pages_global,
        };
@@ -1938,6 +1938,8 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int 
order)
         * free_pages == high_wmark_pages(zone).
         */
        int temp_priority[MAX_NR_ZONES];
+       int order = 0;
+       int new_order;
 
 loop_again:
        total_scanned = 0;
@@ -1945,6 +1947,11 @@ loop_again:
        sc.may_writepage = !laptop_mode;
        count_vm_event(PAGEOUTRUN);
 
+       new_order = pgdat->kswapd_max_order;
+       pgdat->kswapd_max_order = 0;
+       if (order < new_order)
+               order = sc.order = new_order;
+
        for (i = 0; i < pgdat->nr_zones; i++)
                temp_priority[i] = DEF_PRIORITY;
 
@@ -2087,11 +2094,17 @@ out:
 
                zone->prev_priority = temp_priority[i];
        }
-       if (!all_zones_ok) {
-               cond_resched();
 
-               try_to_freeze();
+       cond_resched();
+       try_to_freeze();
 
+       /*
+        * restart if someone wants a larger 'order' allocation
+        */
+       if (order < pgdat->kswapd_max_order)
+               goto loop_again;
+
+       if (!all_zones_ok) {
                /*
                 * Fragmentation may mean that the system cannot be
                 * rebalanced for high-order allocations in all zones.
@@ -2130,7 +2143,6 @@ out:
  */
 static int kswapd(void *p)
 {
-       unsigned long order;
        pg_data_t *pgdat = (pg_data_t*)p;
        struct task_struct *tsk = current;
        DEFINE_WAIT(wait);
@@ -2160,32 +2172,17 @@ static int kswapd(void *p)
        tsk->flags |= PF_MEMALLOC | PF_SWAPWRITE | PF_KSWAPD;
        set_freezable();
 
-       order = 0;
        for ( ; ; ) {
-               unsigned long new_order;
-
                prepare_to_wait(&pgdat->kswapd_wait, &wait, TASK_INTERRUPTIBLE);
-               new_order = pgdat->kswapd_max_order;
-               pgdat->kswapd_max_order = 0;
-               if (order < new_order) {
-                       /*
-                        * Don't sleep if someone wants a larger 'order'
-                        * allocation
-                        */
-                       order = new_order;
-               } else {
-                       if (!freezing(current))
-                               schedule();
-
-                       order = pgdat->kswapd_max_order;
-               }
+               if (!freezing(current))
+                       schedule();
                finish_wait(&pgdat->kswapd_wait, &wait);
 
                if (!try_to_freeze()) {
                        /* We can speed up thawing tasks if we don't call
                         * balance_pgdat after returning from the refrigerator
                         */
-                       balance_pgdat(pgdat, order);
+                       balance_pgdat(pgdat);
                }
        }
        return 0;
-- 
1.6.2.5






--
To unsubscribe from this list: send the line "unsubscribe kernel-testers" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to