Currently in addition to a shrink_control struct shrink_slab() takes two
arguments, nr_pages_scanned and lru_pages, which are used for balancing
slab reclaim versus page reclaim - roughly speaking, shrink_slab() will
try to scan nr_pages_scanned/lru_pages fraction of all slab objects.
However, shrink_slab() is not always called after page cache reclaim.
For example, drop_slab() uses shrink_slab() to drop as many slab objects
as possible and thus has to pass phony values 1000/1000 to it, which do
not make sense for nr_pages_scanned/lru_pages. Moreover, as soon as
kmemcg reclaim is introduced, we will have to make up phony values for
nr_pages_scanned and lru_pages again when doing kmem-only reclaim for a
memory cgroup, which is possible if the cgroup has its kmem limit less
than the total memory limit.

Signed-off-by: Vladimir Davydov <[email protected]>
Cc: Johannes Weiner <[email protected]>
Cc: Michal Hocko <[email protected]>
Cc: Andrew Morton <[email protected]>
Cc: Mel Gorman <[email protected]>
Cc: Rik van Riel <[email protected]>
---
 include/linux/mm.h            |    3 +--
 include/trace/events/vmscan.h |   20 ++++++++++----------
 mm/vmscan.c                   |   26 +++++++++++++-------------
 3 files changed, 24 insertions(+), 25 deletions(-)

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 1cedd00..71c7f50 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1926,8 +1926,7 @@ int drop_caches_sysctl_handler(struct ctl_table *, int,
 #endif
 
 unsigned long shrink_slab(struct shrink_control *shrink,
-                         unsigned long nr_pages_scanned,
-                         unsigned long lru_pages);
+                         unsigned long fraction, unsigned long denominator);
 
 #ifndef CONFIG_MMU
 #define randomize_va_space 0
diff --git a/include/trace/events/vmscan.h b/include/trace/events/vmscan.h
index 132a985..6bed4ab 100644
--- a/include/trace/events/vmscan.h
+++ b/include/trace/events/vmscan.h
@@ -181,11 +181,11 @@ DEFINE_EVENT(mm_vmscan_direct_reclaim_end_template, 
mm_vmscan_memcg_softlimit_re
 
 TRACE_EVENT(mm_shrink_slab_start,
        TP_PROTO(struct shrinker *shr, struct shrink_control *sc,
-               long nr_objects_to_shrink, unsigned long pgs_scanned,
-               unsigned long lru_pgs, unsigned long cache_items,
+               long nr_objects_to_shrink, unsigned long frac,
+               unsigned long denom, unsigned long cache_items,
                unsigned long long delta, unsigned long total_scan),
 
-       TP_ARGS(shr, sc, nr_objects_to_shrink, pgs_scanned, lru_pgs,
+       TP_ARGS(shr, sc, nr_objects_to_shrink, frac, denom,
                cache_items, delta, total_scan),
 
        TP_STRUCT__entry(
@@ -193,8 +193,8 @@ TRACE_EVENT(mm_shrink_slab_start,
                __field(void *, shrink)
                __field(long, nr_objects_to_shrink)
                __field(gfp_t, gfp_flags)
-               __field(unsigned long, pgs_scanned)
-               __field(unsigned long, lru_pgs)
+               __field(unsigned long, frac)
+               __field(unsigned long, denom)
                __field(unsigned long, cache_items)
                __field(unsigned long long, delta)
                __field(unsigned long, total_scan)
@@ -205,20 +205,20 @@ TRACE_EVENT(mm_shrink_slab_start,
                __entry->shrink = shr->scan_objects;
                __entry->nr_objects_to_shrink = nr_objects_to_shrink;
                __entry->gfp_flags = sc->gfp_mask;
-               __entry->pgs_scanned = pgs_scanned;
-               __entry->lru_pgs = lru_pgs;
+               __entry->frac = frac;
+               __entry->denom = denom;
                __entry->cache_items = cache_items;
                __entry->delta = delta;
                __entry->total_scan = total_scan;
        ),
 
-       TP_printk("%pF %p: objects to shrink %ld gfp_flags %s pgs_scanned %ld 
lru_pgs %ld cache items %ld delta %lld total_scan %ld",
+       TP_printk("%pF %p: objects to shrink %ld gfp_flags %s frac %ld denom 
%ld cache items %ld delta %lld total_scan %ld",
                __entry->shrink,
                __entry->shr,
                __entry->nr_objects_to_shrink,
                show_gfp_flags(__entry->gfp_flags),
-               __entry->pgs_scanned,
-               __entry->lru_pgs,
+               __entry->frac,
+               __entry->denom,
                __entry->cache_items,
                __entry->delta,
                __entry->total_scan)
diff --git a/mm/vmscan.c b/mm/vmscan.c
index eea668d..6946997 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -219,7 +219,7 @@ EXPORT_SYMBOL(unregister_shrinker);
 
 static unsigned long
 shrink_slab_node(struct shrink_control *shrinkctl, struct shrinker *shrinker,
-                unsigned long nr_pages_scanned, unsigned long lru_pages)
+                unsigned long fraction, unsigned long denominator)
 {
        unsigned long freed = 0;
        unsigned long long delta;
@@ -243,9 +243,9 @@ shrink_slab_node(struct shrink_control *shrinkctl, struct 
shrinker *shrinker,
        nr = atomic_long_xchg(&shrinker->nr_deferred[nid], 0);
 
        total_scan = nr;
-       delta = (4 * nr_pages_scanned) / shrinker->seeks;
+       delta = (4 * fraction) / shrinker->seeks;
        delta *= max_pass;
-       do_div(delta, lru_pages + 1);
+       do_div(delta, denominator + 1);
        total_scan += delta;
        if (total_scan < 0) {
                printk(KERN_ERR
@@ -278,7 +278,7 @@ shrink_slab_node(struct shrink_control *shrinkctl, struct 
shrinker *shrinker,
                total_scan = max_pass * 2;
 
        trace_mm_shrink_slab_start(shrinker, shrinkctl, nr,
-                               nr_pages_scanned, lru_pages,
+                               fraction, denominator,
                                max_pass, delta, total_scan);
 
        while (total_scan >= batch_size) {
@@ -322,23 +322,23 @@ shrink_slab_node(struct shrink_control *shrinkctl, struct 
shrinker *shrinker,
  * If the vm encountered mapped pages on the LRU it increase the pressure on
  * slab to avoid swapping.
  *
- * We do weird things to avoid (scanned*seeks*entries) overflowing 32 bits.
+ * We do weird things to avoid (fraction*seeks*entries) overflowing 32 bits.
  *
- * `lru_pages' represents the number of on-LRU pages in all the zones which
- * are eligible for the caller's allocation attempt.  It is used for balancing
- * slab reclaim versus page reclaim.
+ * `fraction' and `denominator' are used for balancing slab reclaim versus page
+ * reclaim. To scan slab objects proportionally to page cache, pass the number
+ * of pages scanned and the total number of on-LRU pages in all the zones which
+ * are eligible for the caller's allocation attempt respectively.
  *
  * Returns the number of slab objects which we shrunk.
  */
 unsigned long shrink_slab(struct shrink_control *shrinkctl,
-                         unsigned long nr_pages_scanned,
-                         unsigned long lru_pages)
+                         unsigned long fraction, unsigned long denominator)
 {
        struct shrinker *shrinker;
        unsigned long freed = 0;
 
-       if (nr_pages_scanned == 0)
-               nr_pages_scanned = SWAP_CLUSTER_MAX;
+       if (fraction == 0)
+               fraction = SWAP_CLUSTER_MAX;
 
        if (!down_read_trylock(&shrinker_rwsem)) {
                /*
@@ -361,7 +361,7 @@ unsigned long shrink_slab(struct shrink_control *shrinkctl,
                                break;
 
                        freed += shrink_slab_node(shrinkctl, shrinker,
-                                nr_pages_scanned, lru_pages);
+                                                 fraction, denominator);
 
                }
        }
-- 
1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to