cgroup foreign inode handling has quite a bit of heuristics and
internal states which sometimes makes it difficult to understand
what's going on.  Add tracepoints to improve visibility.

Signed-off-by: Tejun Heo <t...@kernel.org>
---
 fs/fs-writeback.c                |    5 +
 include/trace/events/writeback.h |  123 +++++++++++++++++++++++++++++++++++++++
 mm/memcontrol.c                  |    5 +
 3 files changed, 133 insertions(+)

diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 658dc16c9e6d..8aaa7eec7b74 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -389,6 +389,8 @@ static void inode_switch_wbs_work_fn(struct work_struct 
*work)
        if (unlikely(inode->i_state & I_FREEING))
                goto skip_switch;
 
+       trace_inode_switch_wbs(inode, old_wb, new_wb);
+
        /*
         * Count and transfer stats.  Note that PAGECACHE_TAG_DIRTY points
         * to possibly dirty pages while PAGECACHE_TAG_WRITEBACK points to
@@ -673,6 +675,9 @@ void wbc_detach_inode(struct writeback_control *wbc)
                if (wbc->wb_id != max_id)
                        history |= (1U << slots) - 1;
 
+               if (history)
+                       trace_inode_foreign_history(inode, wbc, history);
+
                /*
                 * Switch if the current wb isn't the consistent winner.
                 * If there are multiple closely competing dirtiers, the
diff --git a/include/trace/events/writeback.h b/include/trace/events/writeback.h
index aa7f3aeac740..3dc9fb9e7c78 100644
--- a/include/trace/events/writeback.h
+++ b/include/trace/events/writeback.h
@@ -176,6 +176,129 @@ static inline unsigned int 
__trace_wbc_assign_cgroup(struct writeback_control *w
 #endif /* CONFIG_CGROUP_WRITEBACK */
 #endif /* CREATE_TRACE_POINTS */
 
+#ifdef CONFIG_CGROUP_WRITEBACK
+TRACE_EVENT(inode_foreign_history,
+
+       TP_PROTO(struct inode *inode, struct writeback_control *wbc,
+                unsigned int history),
+
+       TP_ARGS(inode, wbc, history),
+
+       TP_STRUCT__entry(
+               __array(char,           name, 32)
+               __field(unsigned long,  ino)
+               __field(unsigned int,   cgroup_ino)
+               __field(unsigned int,   history)
+       ),
+
+       TP_fast_assign(
+               strncpy(__entry->name, dev_name(inode_to_bdi(inode)->dev), 32);
+               __entry->ino            = inode->i_ino;
+               __entry->cgroup_ino     = __trace_wbc_assign_cgroup(wbc);
+               __entry->history        = history;
+       ),
+
+       TP_printk("bdi %s: ino=%lu cgroup_ino=%u history=0x%x",
+               __entry->name,
+               __entry->ino,
+               __entry->cgroup_ino,
+               __entry->history
+       )
+);
+
+TRACE_EVENT(inode_switch_wbs,
+
+       TP_PROTO(struct inode *inode, struct bdi_writeback *old_wb,
+                struct bdi_writeback *new_wb),
+
+       TP_ARGS(inode, old_wb, new_wb),
+
+       TP_STRUCT__entry(
+               __array(char,           name, 32)
+               __field(unsigned long,  ino)
+               __field(unsigned int,   old_cgroup_ino)
+               __field(unsigned int,   new_cgroup_ino)
+       ),
+
+       TP_fast_assign(
+               strncpy(__entry->name,  dev_name(old_wb->bdi->dev), 32);
+               __entry->ino            = inode->i_ino;
+               __entry->old_cgroup_ino = __trace_wb_assign_cgroup(old_wb);
+               __entry->new_cgroup_ino = __trace_wb_assign_cgroup(new_wb);
+       ),
+
+       TP_printk("bdi %s: ino=%lu old_cgroup_ino=%u new_cgroup_ino=%u",
+               __entry->name,
+               __entry->ino,
+               __entry->old_cgroup_ino,
+               __entry->new_cgroup_ino
+       )
+);
+
+TRACE_EVENT(track_foreign_dirty,
+
+       TP_PROTO(struct page *page, struct bdi_writeback *wb),
+
+       TP_ARGS(page, wb),
+
+       TP_STRUCT__entry(
+               __array(char,           name, 32)
+               __field(u64,            bdi_id)
+               __field(unsigned long,  ino)
+               __field(unsigned int,   memcg_id)
+               __field(unsigned int,   cgroup_ino)
+               __field(unsigned int,   page_cgroup_ino)
+       ),
+
+       TP_fast_assign(
+               strncpy(__entry->name,  dev_name(wb->bdi->dev), 32);
+               __entry->bdi_id         = wb->bdi->id;
+               __entry->ino            = page->mapping->host->i_ino;
+               __entry->memcg_id       = wb->memcg_css->id;
+               __entry->cgroup_ino     = __trace_wb_assign_cgroup(wb);
+               __entry->page_cgroup_ino = 
page->mem_cgroup->css.cgroup->kn->id.ino;
+       ),
+
+       TP_printk("bdi %s[%llu]: ino=%lu memcg_id=%u cgroup_ino=%u 
page_cgroup_ino=%u",
+               __entry->name,
+               __entry->bdi_id,
+               __entry->ino,
+               __entry->memcg_id,
+               __entry->cgroup_ino,
+               __entry->page_cgroup_ino
+       )
+);
+
+TRACE_EVENT(flush_foreign,
+
+       TP_PROTO(struct bdi_writeback *wb, unsigned int frn_bdi_id,
+                unsigned int frn_memcg_id),
+
+       TP_ARGS(wb, frn_bdi_id, frn_memcg_id),
+
+       TP_STRUCT__entry(
+               __array(char,           name, 32)
+               __field(unsigned int,   cgroup_ino)
+               __field(unsigned int,   frn_bdi_id)
+               __field(unsigned int,   frn_memcg_id)
+       ),
+
+       TP_fast_assign(
+               strncpy(__entry->name,  dev_name(wb->bdi->dev), 32);
+               __entry->cgroup_ino     = __trace_wb_assign_cgroup(wb);
+               __entry->frn_bdi_id     = frn_bdi_id;
+               __entry->frn_memcg_id   = frn_memcg_id;
+       ),
+
+       TP_printk("bdi %s: cgroup_ino=%u frn_bdi_id=%u frn_memcg_id=%u",
+               __entry->name,
+               __entry->cgroup_ino,
+               __entry->frn_bdi_id,
+               __entry->frn_memcg_id
+       )
+);
+#endif
+
 DECLARE_EVENT_CLASS(writeback_write_inode_template,
 
        TP_PROTO(struct inode *inode, struct writeback_control *wbc),
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index eb626a290d93..b74c9d143d5e 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -4159,6 +4159,8 @@ static int mem_cgroup_oom_control_write(struct 
cgroup_subsys_state *css,
 
 #ifdef CONFIG_CGROUP_WRITEBACK
 
+#include <trace/events/writeback.h>
+
 static int memcg_wb_domain_init(struct mem_cgroup *memcg, gfp_t gfp)
 {
        return wb_domain_init(&memcg->cgwb_domain, gfp);
@@ -4296,6 +4298,8 @@ void mem_cgroup_track_foreign_dirty_slowpath(struct page 
*page,
        int oldest = -1;
        int i;
 
+       trace_track_foreign_dirty(page, wb);
+
        /*
         * Pick the slot to use.  If there is already a slot for @wb, keep
         * using it.  If not replace the oldest one which isn't being
@@ -4356,6 +4360,7 @@ void mem_cgroup_flush_foreign(struct bdi_writeback *wb)
                if (time_after64(frn->at, now - intv) &&
                    atomic_read(&frn->done.cnt) == 1) {
                        frn->at = 0;
+                       trace_flush_foreign(wb, frn->bdi_id, frn->memcg_id);
                        cgroup_writeback_by_id(frn->bdi_id, frn->memcg_id, 0,
                                               WB_REASON_FOREIGN_FLUSH,
                                               &frn->done);

Reply via email to