High order allocation detector monitors allocations of order greater
than zero, and generates an uevent if a configured number of allocations
happen within configured time.

In return to this uevent, userspace can enable event tracing. If a
stream of high-order allocations continues, the trace could help to
detect the code path causing them.

HOAD has a sysfs control interface, at /sys/kernel/mm/hoad/control:
- "enable ORDER COUNT MSECS"
  Sets up monitoring allocations of order ORDER: if COUNT such
  allocations are detected within MSECS, uevent is sent. Then further
  uevents is suspended, to avoid userspace races.
- "disable ORDER"
  Stops monitoring allocations of order ORDER.
- "resume [delay-msecs]"
  Allow sending a new uevent, either immediately or after the given
  delay.

The uevent is generated with ACTION="change", SUBSYSTEM="hoad", ORDER
set to the order of the allocation that has caused the uevent.

Also HOAD provides a tracepoint named "hoad", under kmem/ group, that
could be used for tracing. This tracepoint hits on every allocation of
order greater or equal to minimal order for which monitoring is enabled.

https://jira.sw.ru/browse/PSBM-92088
Signed-off-by: Nikita Yushchenko <[email protected]>
---
 include/trace/events/kmem.h |  12 ++
 mm/page_alloc.c             | 266 ++++++++++++++++++++++++++++++++++++
 2 files changed, 278 insertions(+)

diff --git a/include/trace/events/kmem.h b/include/trace/events/kmem.h
index 9cb647609df3..b425c6856bfd 100644
--- a/include/trace/events/kmem.h
+++ b/include/trace/events/kmem.h
@@ -305,6 +305,18 @@ TRACE_EVENT(mm_page_alloc_extfrag,
                __entry->alloc_migratetype == __entry->fallback_migratetype)
 );
 
+TRACE_EVENT(hoad,
+       TP_PROTO(int order),
+       TP_ARGS(order),
+       TP_STRUCT__entry(
+               __field(int, order)
+       ),
+       TP_fast_assign(
+               __entry->order = order;
+       ),
+       TP_printk("order=%d", __entry->order)
+);
+
 #endif /* _TRACE_KMEM_H */
 
 /* This part must be outside protection */
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 1ae193b26a1d..959b1bfbafef 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -3533,6 +3533,270 @@ static __always_inline void warn_high_order(int order, 
gfp_t gfp_mask)
        }
 }
 
+struct hoad_order_info {
+       unsigned long interval;
+       int max_allocs;
+       atomic_t counter;
+       unsigned long since_jiffies;
+       struct timer_list reset_counter_timer;
+};
+
+static struct hoad_order_info *hoad_table[MAX_ORDER];
+static DEFINE_MUTEX(hoad_mutex);
+static struct kobject *hoad_kobj;
+static int hoad_uevent_order;
+static unsigned long hoad_resume_jiffies;
+static int hoad_trace_min_order;
+
+#define MSEC_PER_MINUTE                (60 * MSEC_PER_SEC)
+#define MSEC_PER_HOUR          (60 * MSEC_PER_MINUTE)
+#define MSEC_PER_DAY           (60 * MSEC_PER_HOUR)
+
+static void hoad_reset_counter(struct timer_list *timer)
+{
+       struct hoad_order_info *hoi = container_of(timer,
+                       struct hoad_order_info, reset_counter_timer);
+
+       atomic_set(&hoi->counter, 0);
+}
+
+static void hoad_send_uevent(struct work_struct *work)
+{
+       char order_string[16];
+       char *envp[] = { order_string, NULL };
+
+       sprintf(order_string, "ORDER=%d", hoad_uevent_order);
+       kobject_uevent_env(hoad_kobj, KOBJ_CHANGE, envp);
+}
+static DECLARE_WORK(hoad_send_uevent_work, hoad_send_uevent);
+
+static void hoad_resume(unsigned long unused)
+{
+       hoad_uevent_order = 0;
+}
+static DEFINE_TIMER(hoad_resume_timer, hoad_resume, 0, 0);
+
+static void hoad_notice_alloc(int order, gfp_t gfp)
+{
+       struct hoad_order_info *hoi;
+       int count;
+       bool hit = false;
+
+       if (gfp & (__GFP_NORETRY | __GFP_ORDER_NOWARN))
+               return;
+
+       if (order >= hoad_trace_min_order)
+               trace_hoad(order);
+
+       rcu_read_lock();
+       hoi = rcu_dereference(hoad_table[order]);
+       if (hoi) {
+               count = atomic_inc_return(&hoi->counter);
+               if (count == 1) {
+                       hoi->since_jiffies = jiffies;
+                       mod_timer(&hoi->reset_counter_timer,
+                                       hoi->since_jiffies + hoi->interval);
+               }
+               hit = (count == hoi->max_allocs);
+       }
+       rcu_read_unlock();
+
+       if (hit) {
+               if (cmpxchg(&hoad_uevent_order, 0, order) == 0)
+                       schedule_work(&hoad_send_uevent_work);
+       }
+}
+
+static void hoad_install_order_info(int order, struct hoad_order_info *hoi)
+{
+       struct hoad_order_info *oldhoi;
+       int i;
+
+       mutex_lock(&hoad_mutex);
+       oldhoi = hoad_table[order];
+       rcu_assign_pointer(hoad_table[order], hoi);
+       for (i = 1; i < MAX_ORDER; i++) {
+               if (hoad_table[i])
+                       break;
+       }
+       hoad_trace_min_order = i;
+       mutex_unlock(&hoad_mutex);
+
+       if (oldhoi) {
+               synchronize_rcu();
+               del_timer_sync(&oldhoi->reset_counter_timer);
+               kfree(oldhoi);
+       }
+}
+
+static int hoad_enable_for_order(int order, int max_allocs,
+               unsigned int interval_msecs)
+{
+       struct hoad_order_info *hoi;
+       unsigned long interval;
+
+       if (order < 1 || order >= MAX_ORDER)
+               return -EINVAL;
+       if (max_allocs < 1)
+               return -EINVAL;
+       interval = msecs_to_jiffies(interval_msecs);
+       if (interval < 1)
+               return -EINVAL;
+
+       hoi = kzalloc(sizeof(*hoi), GFP_KERNEL);
+       if (!hoi)
+               return -ENOMEM;
+       hoi->interval = interval;
+       hoi->max_allocs = max_allocs;
+       timer_setup(&hoi->reset_counter_timer, hoad_reset_counter, 0);
+
+       hoad_install_order_info(order, hoi);
+       return 0;
+}
+
+static int hoad_disable_for_order(int order)
+{
+       if (order < 1 || order >= MAX_ORDER)
+               return -EINVAL;
+
+       hoad_install_order_info(order, NULL);
+       return 0;
+}
+
+static ssize_t hoad_control_show(struct kobject *kobj,
+               struct kobj_attribute *attr, char *buf)
+{
+       char *p = buf, *endp = &p[PAGE_SIZE - 1];
+       int order;
+       struct hoad_order_info *hoi;
+       int counter;
+       long d;
+       unsigned int msecs;
+
+       rcu_read_lock();
+       for (order = 1; order < MAX_ORDER; order++) {
+               hoi = rcu_dereference(hoad_table[order]);
+               if (hoi) {
+                       counter = atomic_read(&hoi->counter);
+                       msecs = counter ?
+                               jiffies_to_msecs(jiffies - hoi->since_jiffies) :
+                               0;
+                       p += snprintf(p, endp - p,
+                                       "order %u: %u/%u in %u/%u msecs\n",
+                                       order, counter, hoi->max_allocs,
+                                       msecs, jiffies_to_msecs(hoi->interval));
+               }
+       }
+       rcu_read_unlock();
+       if (hoad_uevent_order) {
+               p += snprintf(p, endp - p, "event generation suspended");
+               d = (long)(hoad_resume_jiffies - jiffies);
+               if (d > 0) {
+                       p += snprintf(p, endp - p, ", resume in ");
+                       msecs = jiffies_to_msecs(d);
+                       if (msecs >= 2 * MSEC_PER_HOUR)
+                               p += snprintf(p, endp - p, "%lu hours",
+                                       (msecs + (MSEC_PER_HOUR / 2)) /
+                                               MSEC_PER_HOUR);
+                       else if (msecs > 2 * MSEC_PER_MINUTE)
+                               p += snprintf(p, endp - p, "%lu minutes",
+                                       (msecs + (MSEC_PER_MINUTE) / 2) /
+                                               MSEC_PER_MINUTE);
+                       else
+                               p += snprintf(p, endp - p, "%lu seconds",
+                                       (msecs + MSEC_PER_SEC - 1) /
+                                               MSEC_PER_SEC);
+               }
+               p += snprintf(p, endp - p, "\n");
+       }
+
+       return p - buf;
+}
+
+static ssize_t hoad_control_store(struct kobject *kobj,
+               struct kobj_attribute *attr, const char *buf, size_t len)
+{
+       char *p, *q;
+       int order, max_allocs, ret;
+       unsigned int msecs;
+       unsigned long d;
+       char c;
+
+       if (len == 0)
+               return 0;
+       p = kstrdup(buf, GFP_KERNEL);
+       if (!p)
+               return -ENOMEM;
+       q = strim(p);
+       if (*q == '\0') {
+               ret = 0;
+               goto out;
+       }
+
+       if (sscanf(q, "enable %u %u %u%c",
+                               &order, &max_allocs, &msecs, &c) == 3)
+               ret = hoad_enable_for_order(order, max_allocs, msecs);
+       else if (sscanf(q, "disable %u%c", &order, &c) == 1)
+               ret = hoad_disable_for_order(order);
+       else if (sscanf(q, "resume %u%c", &msecs, &c) == 1) {
+               if (msecs > 5 * MSEC_PER_DAY)
+                       ret = -EINVAL;
+               else {
+do_resume:
+                       d = msecs_to_jiffies(msecs);
+                       hoad_resume_jiffies = jiffies + d;
+                       mod_timer(&hoad_resume_timer, hoad_resume_jiffies);
+                       ret = 0;
+               }
+       } else if (!strcmp(q, "resume")) {
+               msecs = 0;
+               goto do_resume;
+       } else {
+               ret = -EINVAL;
+       }
+
+out:
+       kfree(p);
+       return ret ? ret : len;
+}
+
+static struct kobj_attribute hoad_control_attr = {
+       .attr.name = "control",
+       .attr.mode = S_IRUSR | S_IWUSR,
+       .show = hoad_control_show,
+       .store = hoad_control_store,
+};
+
+static int hoad_init(void)
+{
+       struct kset *kset;
+       int ret;
+
+       /* To be able to generate uevents, need a kobject with kset defined.
+        *
+        * To avoid extra depth inside sysfs, create a kset and use it's
+        * internal kobject, by setting it's 'kset' field to itself.
+        */
+       kset = kset_create_and_add("hoad", NULL, mm_kobj);
+       if (!kset)
+               return -ENOMEM;
+       hoad_kobj = &kset->kobj;
+       hoad_kobj->kset = kset;
+
+       ret = sysfs_create_file(hoad_kobj, &hoad_control_attr.attr);
+       if (ret) {
+               hoad_kobj->kset = NULL;
+               hoad_kobj = NULL;
+               kset_put(kset);
+               return ret;
+       }
+
+       hoad_trace_min_order = MAX_ORDER;
+       hoad_resume_jiffies = jiffies;
+       return 0;
+}
+late_initcall(hoad_init);
+
 /*
  * This is the 'heart' of the zoned buddy allocator.
  */
@@ -3557,6 +3821,8 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order,
                !(current->flags & PF_MEMALLOC));
 
        warn_high_order(order, gfp_mask);
+       if (order > 0)
+               hoad_notice_alloc(order, gfp_mask);
 
        if (should_fail_alloc_page(gfp_mask, order))
                return NULL;
-- 
2.20.1

_______________________________________________
Devel mailing list
[email protected]
https://lists.openvz.org/mailman/listinfo/devel

Reply via email to