Hi,

On 02/01/2019 21:12, Bob Peterson wrote:
Hi,

This patch is a working prototype to fix the hangs that result from
doing certain jdata I/O, primarily xfstests/269.

My earlier prototypes used a special "fs_specific" wbc flag that I suspect
the upstream community wouldn't like. So this version is a bit bigger and
more convoluted but accomplishes the same basic thing without the special
wbc flag.

It works by implementing a special new version of writepage that's used
for writing pages from an ail sync operation, as opposed to writes done
on behalf of an inode write operation. So far I've done more than 125
iterations of test 269 which consistently failed on the first iteration
before the patch.

Since jdata and ordered pages can both be put on the ail lists, I had
to add a special check in function start_writepage to see how to handle
the page.

It may not be perfect, but I wanted to get reactions from developers
to see if I'm off base or forgetting something.

Signed-off-by: Bob Peterson <rpete...@redhat.com>
---
  fs/gfs2/aops.c  | 28 +++++++++++++++++++++++++++-
  fs/gfs2/aops.h  |  4 ++++
  fs/gfs2/log.c   | 37 +++++++++++++++++++++++++++++++------
  fs/gfs2/log.h   |  3 ++-
  fs/gfs2/super.c |  2 +-
  5 files changed, 65 insertions(+), 9 deletions(-)

diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index 05dd78f4b2b3..bd6cb49f3b9a 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -192,6 +192,32 @@ static int __gfs2_jdata_writepage(struct page *page, 
struct writeback_control *w
        return gfs2_write_full_page(page, gfs2_get_block_noalloc, wbc);
  }
+/**
+ * gfs2_ail_writepage - Write complete page from the ail list
+ * @page: Page to write
+ * @wbc: The writeback control
+ *
+ * Returns: errno
+ *
+ */
+
+int gfs2_ail_writepage(struct page *page, struct writeback_control *wbc)
+{
+       struct inode *inode = page->mapping->host;
+       struct gfs2_inode *ip = GFS2_I(inode);
+       struct gfs2_sbd *sdp = GFS2_SB(inode);
+
+       if (gfs2_assert_withdraw(sdp, gfs2_glock_is_held_excl(ip->i_gl)))
+               goto out;
+       if (current->journal_info == NULL)
+               return gfs2_write_full_page(page, gfs2_get_block_noalloc, wbc);
+
+       redirty_page_for_writepage(wbc, page);
+out:
+       unlock_page(page);
+       return 0;
+}
+

Since this is only called from the ail flushing code, we cannot be in a transaction and in any case we do not want to defer the writeback, so this whole thing can be replaced by a simple call to gfs2_write_full_page()


  /**
   * gfs2_jdata_writepage - Write complete page
   * @page: Page to write
@@ -201,7 +227,7 @@ static int __gfs2_jdata_writepage(struct page *page, struct 
writeback_control *w
   *
   */
-static int gfs2_jdata_writepage(struct page *page, struct writeback_control *wbc)
+int gfs2_jdata_writepage(struct page *page, struct writeback_control *wbc)
  {
        struct inode *inode = page->mapping->host;
        struct gfs2_inode *ip = GFS2_I(inode);
diff --git a/fs/gfs2/aops.h b/fs/gfs2/aops.h
index fa8e5d0144dd..cbaaa372edc4 100644
--- a/fs/gfs2/aops.h
+++ b/fs/gfs2/aops.h
@@ -15,5 +15,9 @@ extern int gfs2_stuffed_write_end(struct inode *inode, struct 
buffer_head *dibh,
  extern void adjust_fs_space(struct inode *inode);
  extern void gfs2_page_add_databufs(struct gfs2_inode *ip, struct page *page,
                                   unsigned int from, unsigned int len);
+extern int gfs2_jdata_writepage(struct page *page,
+                               struct writeback_control *wbc);
+extern int gfs2_ail_writepage(struct page *page,
+                             struct writeback_control *wbc);
#endif /* __AOPS_DOT_H__ */
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index 5bfaf381921a..1c70471fb07d 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -23,6 +23,7 @@
  #include <linux/writeback.h>
  #include <linux/list_sort.h>
+#include "aops.h"
  #include "gfs2.h"
  #include "incore.h"
  #include "bmap.h"
@@ -82,18 +83,40 @@ static void gfs2_remove_from_ail(struct gfs2_bufdata *bd)
        brelse(bd->bd_bh);
  }
+/*
+ * Function used by generic_writepages to call the real writepage
+ * function and set the mapping flags on error
+ */
+static int start_writepage(struct page *page, struct writeback_control *wbc,
+                          void *data)
+{
+       struct address_space *mapping = page->mapping;
+       int ail_start = *(int *)data;
+       int ret;
+       int (*writepage) (struct page *page, struct writeback_control *wbc);
+
+       writepage = mapping->a_ops->writepage;
+
+       if (ail_start && writepage == gfs2_jdata_writepage)
+               writepage = gfs2_ail_writepage;
+       ret = writepage(page, wbc);
+       mapping_set_error(mapping, ret);
+       return ret;
+}
+
If you are passing this directly to write_cache_pages() when we need it, then why do we need the switch to a different writepage here? In both cases we are doing the same thing - writing back in-place data after it has been through the journal, so we should be able to use the same function.
  /**
   * gfs2_ail1_start_one - Start I/O on a part of the AIL
   * @sdp: the filesystem
   * @wbc: The writeback control structure
   * @ai: The ail structure
+ * @start: True if called from gfs2_ail1_start
   *
   */
static int gfs2_ail1_start_one(struct gfs2_sbd *sdp,
                               struct writeback_control *wbc,
-                              struct gfs2_trans *tr,
-                              bool *withdraw)
+                              struct gfs2_trans *tr, bool *withdraw,
+                              int ail_start)
  __releases(&sdp->sd_ail_lock)
  __acquires(&sdp->sd_ail_lock)
  {
@@ -128,7 +151,8 @@ __acquires(&sdp->sd_ail_lock)
                if (!mapping)
                        continue;
                spin_unlock(&sdp->sd_ail_lock);
-               generic_writepages(mapping, wbc);
+               write_cache_pages(mapping, wbc, start_writepage,
+                                 &ail_start);
                spin_lock(&sdp->sd_ail_lock);
                if (wbc->nr_to_write <= 0)
                        break;
@@ -148,7 +172,8 @@ __acquires(&sdp->sd_ail_lock)
   * writeback control structure
   */
-void gfs2_ail1_flush(struct gfs2_sbd *sdp, struct writeback_control *wbc)
+void gfs2_ail1_flush(struct gfs2_sbd *sdp, struct writeback_control *wbc,
+                    int start)
  {
        struct list_head *head = &sdp->sd_ail1_list;
        struct gfs2_trans *tr;
@@ -162,7 +187,7 @@ void gfs2_ail1_flush(struct gfs2_sbd *sdp, struct 
writeback_control *wbc)
        list_for_each_entry_reverse(tr, head, tr_list) {
                if (wbc->nr_to_write <= 0)
                        break;
-               if (gfs2_ail1_start_one(sdp, wbc, tr, &withdraw))
+               if (gfs2_ail1_start_one(sdp, wbc, tr, &withdraw, start))
                        goto restart;
        }
        spin_unlock(&sdp->sd_ail_lock);
@@ -186,7 +211,7 @@ static void gfs2_ail1_start(struct gfs2_sbd *sdp)
                .range_end = LLONG_MAX,
        };
- return gfs2_ail1_flush(sdp, &wbc);
+       return gfs2_ail1_flush(sdp, &wbc, 1);
  }
/**
diff --git a/fs/gfs2/log.h b/fs/gfs2/log.h
index 1bc9bd444b28..3ebad7e505e4 100644
--- a/fs/gfs2/log.h
+++ b/fs/gfs2/log.h
@@ -74,7 +74,8 @@ extern void gfs2_write_log_header(struct gfs2_sbd *sdp, 
struct gfs2_jdesc *jd,
  extern void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl,
                           u32 type);
  extern void gfs2_log_commit(struct gfs2_sbd *sdp, struct gfs2_trans *trans);
-extern void gfs2_ail1_flush(struct gfs2_sbd *sdp, struct writeback_control 
*wbc);
+extern void gfs2_ail1_flush(struct gfs2_sbd *sdp, struct writeback_control 
*wbc,
+                           int start);
extern void gfs2_log_shutdown(struct gfs2_sbd *sdp);
  extern int gfs2_logd(void *data);
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index d4b11c903971..31e7948c3c0c 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -762,7 +762,7 @@ static int gfs2_write_inode(struct inode *inode, struct 
writeback_control *wbc)
                               GFS2_LOG_HEAD_FLUSH_NORMAL |
                               GFS2_LFC_WRITE_INODE);
        if (bdi->wb.dirty_exceeded)
-               gfs2_ail1_flush(sdp, wbc);
+               gfs2_ail1_flush(sdp, wbc, 0);
        else
                filemap_fdatawrite(metamapping);
        if (flush_all)

Reply via email to