Before this patch, jdata writes would sometimes get into an infinite
loop because function gfs2_ail1_flush refused to exit until all of
its pages were written out, but if the page was in use and another
process sets PageChecked, it would never be written out: The
PageChecked prevented the ail1 list from being written:

gfs2_logd() calls gfs2_ail1_start() because it decides it needs to
  gfs2_ail1_start() calls gfs2_ail1_flush() (unconditionally)
    gfs2_ail1_flush() calls gfs2_ail1_start_one() for each transaction
                      queued onto sd_ail1_list, to start that transaction
      gfs2_ail1_start_one() calls generic_writepages() for this
                      particular problematic transaction.
        generic_writepages() calls write_cache_pages() passing in
                      __writepage (unconditionally)
          write_cache_pages() calls (*writepage) which maps to __writepage()
            __writepage() calls mapping->a_ops->writepage which maps
                      to gfs2_jdata_writepage
              gfs2_jdata_writepage() sees the page is PageChecked
                      (i.e. dirty is pending) so it skips
                      __gfs2_jdata_writepage and instead redirties
                      the page, returns 0.
            __writepage() returns 0 to write_cache_pages()
          write_cache_pages() returns 0 to generic_writepages()
        generic_writepages() returns 0 to gfs2_ail1_start_one()
       gfs2_ail1_start_one() returns 1 to gfs2_ail1_flush(), which
                      causes it to goto restart; (Infinite loop)

Thus, logd goes into an infinite loop, chewing up 100% of
one CPU, and all other transactions get blocked by the flush.

This patch adds a new queue to the transactions, tr_redirty_list.
This temporarily holds buffer descriptor (bd / bufdata) elements
that need to be redirtied because they were PageChecked while the
ail1 list was being flushed.

The new queue allows all eligible pages to be written properly and
gfs2_ail1_flush to requeue and redirty PageChecked pages. Thus, the
pages are redirtied and go into the next ail1 flush cycle. The current
ail1 flush completes, since those pages are temporarily moved off the
list.

Signed-off-by: Bob Peterson <[email protected]>
---
 fs/gfs2/incore.h |  1 +
 fs/gfs2/log.c    | 27 +++++++++++++++++++++++++++
 fs/gfs2/trans.c  |  1 +
 3 files changed, 29 insertions(+)

diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index a5d81a261f77..eef686161953 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -514,6 +514,7 @@ struct gfs2_trans {
        unsigned int tr_first;
        struct list_head tr_ail1_list;
        struct list_head tr_ail2_list;
+       struct list_head tr_redirty_list;
 };
 
 struct gfs2_journal_extent {
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index 6e2728416b6b..7d7bae63b3e1 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -119,6 +119,10 @@ __acquires(&sdp->sd_ail_lock)
                if (gl == bd->bd_gl)
                        continue;
                gl = bd->bd_gl;
+               if (PageChecked(bh->b_page)) {
+                       list_move(&bd->bd_ail_st_list, &tr->tr_redirty_list);
+                       continue;
+               }
                list_move(&bd->bd_ail_st_list, &tr->tr_ail1_list);
                mapping = bh->b_page->mapping;
                if (!mapping)
@@ -219,11 +223,21 @@ static void gfs2_ail1_empty_one(struct gfs2_sbd *sdp, 
struct gfs2_trans *tr,
  * @sdp: The superblock
  *
  * Tries to empty the ail1 lists, starting with the oldest first
+ * Then it requeues any buffers that need to be redirtied, to the ail1 list.
+ * It returns true if the ail1 list was empty BEFORE the redirtied entries
+ * were requeued.
  */
 
 static int gfs2_ail1_empty(struct gfs2_sbd *sdp)
 {
        struct gfs2_trans *tr, *s;
+       struct gfs2_bufdata *bd;
+       struct writeback_control wbc = {
+               .sync_mode = WB_SYNC_NONE,
+               .nr_to_write = LONG_MAX,
+               .range_start = 0,
+               .range_end = LLONG_MAX,
+       };
        int oldest_tr = 1;
        int ret;
        bool withdraw = false;
@@ -237,6 +251,19 @@ static int gfs2_ail1_empty(struct gfs2_sbd *sdp)
                        oldest_tr = 0;
        }
        ret = list_empty(&sdp->sd_ail1_list);
+       /*
+        * Now requeue and redirty any bufdata elements that were not written
+        * because they were PageChecked.
+        */
+       list_for_each_entry_reverse(tr, &sdp->sd_ail1_list, tr_list) {
+               while (!list_empty(&tr->tr_redirty_list)) {
+                       bd = list_first_entry(&tr->tr_redirty_list,
+                                             struct gfs2_bufdata,
+                                             bd_ail_st_list);
+                       redirty_page_for_writepage(&wbc, bd->bd_bh->b_page);
+                       list_move(&bd->bd_ail_st_list, &tr->tr_ail1_list);
+               }
+       }
        spin_unlock(&sdp->sd_ail_lock);
 
        if (withdraw)
diff --git a/fs/gfs2/trans.c b/fs/gfs2/trans.c
index a685637a5b55..0545490cb4e3 100644
--- a/fs/gfs2/trans.c
+++ b/fs/gfs2/trans.c
@@ -52,6 +52,7 @@ int gfs2_trans_begin(struct gfs2_sbd *sdp, unsigned int 
blocks,
                tr->tr_reserved += gfs2_struct2blk(sdp, revokes);
        INIT_LIST_HEAD(&tr->tr_databuf);
        INIT_LIST_HEAD(&tr->tr_buf);
+       INIT_LIST_HEAD(&tr->tr_redirty_list);
 
        sb_start_intwrite(sdp->sd_vfs);
 
-- 
2.24.1

Reply via email to