Show count of oom killer invocations in /proc/vmstat and count of
processes killed in memory cgroup in knob "memory.events"
(in memory.oom_control for v1 cgroup).

Also describe difference between "oom" and "oom_kill" in memory
cgroup documentation. Currently oom in memory cgroup kills tasks
iff shortage has happened inside page fault.

These counters helps in monitoring oom kills - for now
the only way is grepping for magic words in kernel log.

Signed-off-by: Konstantin Khlebnikov <[email protected]>

---

v1: https://lkml.kernel.org/r/149520375057.74196.2843113275800730971.stgit@buzz

v2:
* count all oom kills in /proc/vmstat
* update counter for cgroup which tasks belongs to
---
 Documentation/cgroup-v2.txt   |   20 ++++++++++++++++----
 include/linux/memcontrol.h    |    5 ++++-
 include/linux/vm_event_item.h |    1 +
 mm/memcontrol.c               |    2 ++
 mm/oom_kill.c                 |    5 +++++
 mm/vmstat.c                   |    1 +
 6 files changed, 29 insertions(+), 5 deletions(-)

diff --git a/Documentation/cgroup-v2.txt b/Documentation/cgroup-v2.txt
index dc5e2dcdbef4..738b1c7023ad 100644
--- a/Documentation/cgroup-v2.txt
+++ b/Documentation/cgroup-v2.txt
@@ -826,13 +826,25 @@ PAGE_SIZE multiple when read back.
 
                The number of times the cgroup's memory usage was
                about to go over the max boundary.  If direct reclaim
-               fails to bring it down, the OOM killer is invoked.
+               fails to bring it down, the cgroup goes to OOM state.
 
          oom
 
-               The number of times the OOM killer has been invoked in
-               the cgroup.  This may not exactly match the number of
-               processes killed but should generally be close.
+               The number of time the cgroup's memory usage was
+               reached the limit and allocation was about to fail.
+
+               Depending on context result could be invocation of OOM
+               killer and retrying allocation or failing alloction.
+
+               Failed allocation in its turn could be returned into
+               userspace as -ENOMEM or siletly ignored in cases like
+               disk readahead.  For now OOM in memory cgroup kills
+               tasks iff shortage has happened inside page fault.
+
+         oom_kill
+
+               The number of processes belonging to this cgroup
+               killed by any kind of OOM killer.
 
   memory.stat
 
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 899949bbb2f9..42296f7001da 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -556,8 +556,11 @@ static inline void mem_cgroup_count_vm_event(struct 
mm_struct *mm,
 
        rcu_read_lock();
        memcg = mem_cgroup_from_task(rcu_dereference(mm->owner));
-       if (likely(memcg))
+       if (likely(memcg)) {
                this_cpu_inc(memcg->stat->events[idx]);
+               if (idx == OOM_KILL)
+                       cgroup_file_notify(&memcg->events_file);
+       }
        rcu_read_unlock();
 }
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h
index d84ae90ccd5c..1707e0a7d943 100644
--- a/include/linux/vm_event_item.h
+++ b/include/linux/vm_event_item.h
@@ -41,6 +41,7 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,
                KSWAPD_LOW_WMARK_HIT_QUICKLY, KSWAPD_HIGH_WMARK_HIT_QUICKLY,
                PAGEOUTRUN, PGROTATED,
                DROP_PAGECACHE, DROP_SLAB,
+               OOM_KILL,
 #ifdef CONFIG_NUMA_BALANCING
                NUMA_PTE_UPDATES,
                NUMA_HUGE_PTE_UPDATES,
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 94172089f52f..7011ebf2b90e 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -3574,6 +3574,7 @@ static int mem_cgroup_oom_control_read(struct seq_file 
*sf, void *v)
 
        seq_printf(sf, "oom_kill_disable %d\n", memcg->oom_kill_disable);
        seq_printf(sf, "under_oom %d\n", (bool)memcg->under_oom);
+       seq_printf(sf, "oom_kill %lu\n", memcg_sum_events(memcg, OOM_KILL));
        return 0;
 }
 
@@ -5165,6 +5166,7 @@ static int memory_events_show(struct seq_file *m, void *v)
        seq_printf(m, "high %lu\n", memcg_sum_events(memcg, MEMCG_HIGH));
        seq_printf(m, "max %lu\n", memcg_sum_events(memcg, MEMCG_MAX));
        seq_printf(m, "oom %lu\n", memcg_sum_events(memcg, MEMCG_OOM));
+       seq_printf(m, "oom_kill %lu\n", memcg_sum_events(memcg, OOM_KILL));
 
        return 0;
 }
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 04c9143a8625..dd30a045ef5b 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -876,6 +876,11 @@ static void oom_kill_process(struct oom_control *oc, const 
char *message)
        /* Get a reference to safely compare mm after task_unlock(victim) */
        mm = victim->mm;
        mmgrab(mm);
+
+       /* Raise event before sending signal: reaper must see this */
+       count_vm_event(OOM_KILL);
+       mem_cgroup_count_vm_event(mm, OOM_KILL);
+
        /*
         * We should send SIGKILL before setting TIF_MEMDIE in order to prevent
         * the OOM victim from depleting the memory reserves from the user
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 76f73670200a..fe80b81a86e0 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -1018,6 +1018,7 @@ const char * const vmstat_text[] = {
 
        "drop_pagecache",
        "drop_slab",
+       "oom_kill",
 
 #ifdef CONFIG_NUMA_BALANCING
        "numa_pte_updates",

Reply via email to