Add toptier relcaim type in mem_cgroup_soft_limit_reclaim().
This option reclaims top tier memory from cgroups in the order of its
excess usage of top tier memory.

Signed-off-by: Tim Chen <[email protected]>
---
 include/linux/memcontrol.h |  9 ++++---
 mm/memcontrol.c            | 48 ++++++++++++++++++++++++--------------
 mm/vmscan.c                |  4 ++--
 3 files changed, 39 insertions(+), 22 deletions(-)

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 0ed8ddfd5436..c494c4b11ba2 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -21,6 +21,7 @@
 #include <linux/vmstat.h>
 #include <linux/writeback.h>
 #include <linux/page-flags.h>
+#include <linux/nodemask.h>
 
 struct mem_cgroup;
 struct obj_cgroup;
@@ -1003,7 +1004,8 @@ static inline void mod_memcg_lruvec_state(struct lruvec 
*lruvec,
 
 unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order,
                                                gfp_t gfp_mask,
-                                               unsigned long *total_scanned);
+                                               unsigned long *total_scanned,
+                                               enum node_states type);
 
 void __count_memcg_events(struct mem_cgroup *memcg, enum vm_event_item idx,
                          unsigned long count);
@@ -1421,8 +1423,9 @@ static inline void mod_lruvec_kmem_state(void *p, enum 
node_stat_item idx,
 
 static inline
 unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order,
-                                           gfp_t gfp_mask,
-                                           unsigned long *total_scanned)
+                                               gfp_t gfp_mask,
+                                               unsigned long *total_scanned,
+                                               enum node_states type)
 {
        return 0;
 }
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 8a7648b79635..9f75475ae833 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1875,7 +1875,8 @@ static bool mem_cgroup_out_of_memory(struct mem_cgroup 
*memcg, gfp_t gfp_mask,
 static int mem_cgroup_soft_reclaim(struct mem_cgroup *root_memcg,
                                   pg_data_t *pgdat,
                                   gfp_t gfp_mask,
-                                  unsigned long *total_scanned)
+                                  unsigned long *total_scanned,
+                                  enum node_states type)
 {
        struct mem_cgroup *victim = NULL;
        int total = 0;
@@ -1886,7 +1887,7 @@ static int mem_cgroup_soft_reclaim(struct mem_cgroup 
*root_memcg,
                .pgdat = pgdat,
        };
 
-       excess = soft_limit_excess(root_memcg, N_MEMORY);
+       excess = soft_limit_excess(root_memcg, type);
 
        while (1) {
                victim = mem_cgroup_iter(root_memcg, victim, &reclaim);
@@ -1915,7 +1916,7 @@ static int mem_cgroup_soft_reclaim(struct mem_cgroup 
*root_memcg,
                total += mem_cgroup_shrink_node(victim, gfp_mask, false,
                                        pgdat, &nr_scanned);
                *total_scanned += nr_scanned;
-               if (!soft_limit_excess(root_memcg, N_MEMORY))
+               if (!soft_limit_excess(root_memcg, type))
                        break;
        }
        mem_cgroup_iter_break(root_memcg, victim);
@@ -3524,7 +3525,8 @@ static int mem_cgroup_resize_max(struct mem_cgroup *memcg,
 
 unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order,
                                            gfp_t gfp_mask,
-                                           unsigned long *total_scanned)
+                                           unsigned long *total_scanned,
+                                           enum node_states type)
 {
        unsigned long nr_reclaimed = 0;
        struct mem_cgroup_per_node *mz, *next_mz = NULL;
@@ -3534,12 +3536,24 @@ unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t 
*pgdat, int order,
        unsigned long excess;
        unsigned long nr_scanned;
        int migration_nid;
+       enum node_states sibling_type;
 
        if (order > 0)
                return 0;
 
-       mctz = soft_limit_tree_node(pgdat->node_id, N_MEMORY);
-       mctz_sibling = soft_limit_tree_node(pgdat->node_id, N_TOPTIER);
+       if (type != N_MEMORY && type != N_TOPTIER)
+               return 0;
+
+       if (type == N_TOPTIER && !node_state(pgdat->node_id, N_TOPTIER))
+               return 0;
+
+       if (type == N_TOPTIER)
+               sibling_type = N_MEMORY;
+       else
+               sibling_type = N_TOPTIER;
+
+       mctz = soft_limit_tree_node(pgdat->node_id, type);
+       mctz_sibling = soft_limit_tree_node(pgdat->node_id, sibling_type);
 
        /*
         * Do not even bother to check the largest node if the root
@@ -3558,11 +3572,11 @@ unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t 
*pgdat, int order,
        if (migration_nid != -1) {
                struct mem_cgroup_tree_per_node *mmctz;
 
-               mmctz = soft_limit_tree_node(migration_nid);
+               mmctz = soft_limit_tree_node(migration_nid, type);
                if (mmctz && !RB_EMPTY_ROOT(&mmctz->rb_root)) {
                        pgdat = NODE_DATA(migration_nid);
                        return mem_cgroup_soft_limit_reclaim(pgdat, order,
-                               gfp_mask, total_scanned);
+                               gfp_mask, total_scanned, type);
                }
        }
 
@@ -3575,17 +3589,17 @@ unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t 
*pgdat, int order,
                if (next_mz)
                        mz = next_mz;
                else
-                       mz = mem_cgroup_largest_soft_limit_node(mctz, N_MEMORY);
+                       mz = mem_cgroup_largest_soft_limit_node(mctz, type);
                if (!mz)
                        break;
 
                nr_scanned = 0;
                reclaimed = mem_cgroup_soft_reclaim(mz->memcg, pgdat,
-                                                   gfp_mask, &nr_scanned);
+                                                   gfp_mask, &nr_scanned, 
type);
                nr_reclaimed += reclaimed;
                *total_scanned += nr_scanned;
                spin_lock_irq(&mctz->lock);
-               __mem_cgroup_remove_exceeded(mz, mctz, N_MEMORY);
+               __mem_cgroup_remove_exceeded(mz, mctz, type);
 
                /*
                 * If we failed to reclaim anything from this memory cgroup
@@ -3594,9 +3608,9 @@ unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t 
*pgdat, int order,
                next_mz = NULL;
                if (!reclaimed)
                        next_mz =
-                          __mem_cgroup_largest_soft_limit_node(mctz, N_MEMORY);
+                          __mem_cgroup_largest_soft_limit_node(mctz, type);
 
-               excess = soft_limit_excess(mz->memcg, N_MEMORY);
+               excess = soft_limit_excess(mz->memcg, type);
                /*
                 * One school of thought says that we should not add
                 * back the node to the tree if reclaim returns 0.
@@ -3606,17 +3620,17 @@ unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t 
*pgdat, int order,
                 * term TODO.
                 */
                /* If excess == 0, no tree ops */
-               __mem_cgroup_insert_exceeded(mz, mctz, excess, N_MEMORY);
+               __mem_cgroup_insert_exceeded(mz, mctz, excess, type);
                spin_unlock_irq(&mctz->lock);
 
                /* update both affected N_MEMORY and N_TOPTIER trees */
                if (mctz_sibling) {
                        spin_lock_irq(&mctz_sibling->lock);
                        __mem_cgroup_remove_exceeded(mz, mctz_sibling,
-                                                    N_TOPTIER);
-                       excess = soft_limit_excess(mz->memcg, N_TOPTIER);
+                                                    sibling_type);
+                       excess = soft_limit_excess(mz->memcg, sibling_type);
                        __mem_cgroup_insert_exceeded(mz, mctz, excess,
-                                                    N_TOPTIER);
+                                                    sibling_type);
                        spin_unlock_irq(&mctz_sibling->lock);
                }
 
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 3b200b7170a9..11bb0c6fa524 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -3134,7 +3134,7 @@ static void shrink_zones(struct zonelist *zonelist, 
struct scan_control *sc)
                        nr_soft_scanned = 0;
                        nr_soft_reclaimed = 
mem_cgroup_soft_limit_reclaim(zone->zone_pgdat,
                                                sc->order, sc->gfp_mask,
-                                               &nr_soft_scanned);
+                                               &nr_soft_scanned, N_MEMORY);
                        sc->nr_reclaimed += nr_soft_reclaimed;
                        sc->nr_scanned += nr_soft_scanned;
                        /* need some check for avoid more shrink_zone() */
@@ -3849,7 +3849,7 @@ static int balance_pgdat(pg_data_t *pgdat, int order, int 
highest_zoneidx)
                sc.nr_scanned = 0;
                nr_soft_scanned = 0;
                nr_soft_reclaimed = mem_cgroup_soft_limit_reclaim(pgdat, 
sc.order,
-                                               sc.gfp_mask, &nr_soft_scanned);
+                                               sc.gfp_mask, &nr_soft_scanned, 
N_MEMORY);
                sc.nr_reclaimed += nr_soft_reclaimed;
 
                /*
-- 
2.20.1

Reply via email to