2.6.37-stable review patch.  If anyone has any objections, please let us know.

------------------

From: Mel Gorman <[email protected]>

commit b44129b30652c8771db2265939bb8b463724043d upstream.

reduce_pgdat_percpu_threshold() and restore_pgdat_percpu_threshold() exist
to adjust the per-cpu vmstat thresholds while kswapd is awake to avoid
errors due to counter drift.  The functions duplicate some code so this
patch replaces them with a single set_pgdat_percpu_threshold() that takes
a callback function to calculate the desired threshold as a parameter.

[[email protected]: readability tweak]
[[email protected]: set_pgdat_percpu_threshold(): don't use 
for_each_online_cpu]
Signed-off-by: Mel Gorman <[email protected]>
Reviewed-by: Christoph Lameter <[email protected]>
Reviewed-by: KAMEZAWA Hiroyuki <[email protected]>
Signed-off-by: KOSAKI Motohiro <[email protected]>
Signed-off-by: Andrew Morton <[email protected]>
Signed-off-by: Linus Torvalds <[email protected]>
Signed-off-by: Greg Kroah-Hartman <[email protected]>

---
 include/linux/vmstat.h |   10 ++++++----
 mm/vmscan.c            |   19 +++++++++++++++++--
 mm/vmstat.c            |   36 +++++++-----------------------------
 3 files changed, 30 insertions(+), 35 deletions(-)

--- a/include/linux/vmstat.h
+++ b/include/linux/vmstat.h
@@ -254,8 +254,11 @@ extern void dec_zone_state(struct zone *
 extern void __dec_zone_state(struct zone *, enum zone_stat_item);
 
 void refresh_cpu_vm_stats(int);
-void reduce_pgdat_percpu_threshold(pg_data_t *pgdat);
-void restore_pgdat_percpu_threshold(pg_data_t *pgdat);
+
+int calculate_pressure_threshold(struct zone *zone);
+int calculate_normal_threshold(struct zone *zone);
+void set_pgdat_percpu_threshold(pg_data_t *pgdat,
+                               int (*calculate_pressure)(struct zone *));
 #else /* CONFIG_SMP */
 
 /*
@@ -300,8 +303,7 @@ static inline void __dec_zone_page_state
 #define dec_zone_page_state __dec_zone_page_state
 #define mod_zone_page_state __mod_zone_page_state
 
-static inline void reduce_pgdat_percpu_threshold(pg_data_t *pgdat) { }
-static inline void restore_pgdat_percpu_threshold(pg_data_t *pgdat) { }
+#define set_pgdat_percpu_threshold(pgdat, callback) { }
 
 static inline void refresh_cpu_vm_stats(int cpu) { }
 #endif
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -2448,9 +2448,24 @@ static int kswapd(void *p)
                                 */
                                if (!sleeping_prematurely(pgdat, order, 
remaining)) {
                                        
trace_mm_vmscan_kswapd_sleep(pgdat->node_id);
-                                       restore_pgdat_percpu_threshold(pgdat);
+
+                                       /*
+                                        * vmstat counters are not perfectly
+                                        * accurate and the estimated value
+                                        * for counters such as NR_FREE_PAGES
+                                        * can deviate from the true value by
+                                        * nr_online_cpus * threshold. To
+                                        * avoid the zone watermarks being
+                                        * breached while under pressure, we
+                                        * reduce the per-cpu vmstat threshold
+                                        * while kswapd is awake and restore
+                                        * them before going back to sleep.
+                                        */
+                                       set_pgdat_percpu_threshold(pgdat,
+                                               calculate_normal_threshold);
                                        schedule();
-                                       reduce_pgdat_percpu_threshold(pgdat);
+                                       set_pgdat_percpu_threshold(pgdat,
+                                               calculate_pressure_threshold);
                                } else {
                                        if (remaining)
                                                
count_vm_event(KSWAPD_LOW_WMARK_HIT_QUICKLY);
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -83,7 +83,7 @@ EXPORT_SYMBOL(vm_stat);
 
 #ifdef CONFIG_SMP
 
-static int calculate_pressure_threshold(struct zone *zone)
+int calculate_pressure_threshold(struct zone *zone)
 {
        int threshold;
        int watermark_distance;
@@ -107,7 +107,7 @@ static int calculate_pressure_threshold(
        return threshold;
 }
 
-static int calculate_threshold(struct zone *zone)
+int calculate_normal_threshold(struct zone *zone)
 {
        int threshold;
        int mem;        /* memory in 128 MB units */
@@ -166,7 +166,7 @@ static void refresh_zone_stat_thresholds
        for_each_populated_zone(zone) {
                unsigned long max_drift, tolerate_drift;
 
-               threshold = calculate_threshold(zone);
+               threshold = calculate_normal_threshold(zone);
 
                for_each_online_cpu(cpu)
                        per_cpu_ptr(zone->pageset, cpu)->stat_threshold
@@ -185,46 +185,24 @@ static void refresh_zone_stat_thresholds
        }
 }
 
-void reduce_pgdat_percpu_threshold(pg_data_t *pgdat)
+void set_pgdat_percpu_threshold(pg_data_t *pgdat,
+                               int (*calculate_pressure)(struct zone *))
 {
        struct zone *zone;
        int cpu;
        int threshold;
        int i;
 
-       get_online_cpus();
        for (i = 0; i < pgdat->nr_zones; i++) {
                zone = &pgdat->node_zones[i];
                if (!zone->percpu_drift_mark)
                        continue;
 
-               threshold = calculate_pressure_threshold(zone);
-               for_each_online_cpu(cpu)
-                       per_cpu_ptr(zone->pageset, cpu)->stat_threshold
-                                                       = threshold;
-       }
-       put_online_cpus();
-}
-
-void restore_pgdat_percpu_threshold(pg_data_t *pgdat)
-{
-       struct zone *zone;
-       int cpu;
-       int threshold;
-       int i;
-
-       get_online_cpus();
-       for (i = 0; i < pgdat->nr_zones; i++) {
-               zone = &pgdat->node_zones[i];
-               if (!zone->percpu_drift_mark)
-                       continue;
-
-               threshold = calculate_threshold(zone);
-               for_each_online_cpu(cpu)
+               threshold = (*calculate_pressure)(zone);
+               for_each_possible_cpu(cpu)
                        per_cpu_ptr(zone->pageset, cpu)->stat_threshold
                                                        = threshold;
        }
-       put_online_cpus();
 }
 
 /*


_______________________________________________
stable mailing list
[email protected]
http://linux.kernel.org/mailman/listinfo/stable

Reply via email to