On 3/13/19 5:13 PM, Andreas Gruenbacher wrote:
Hi Edwin,

On Wed, 6 Mar 2019 at 12:08, Edwin Török <edvin.to...@citrix.com> wrote:
Hello,

I've been trying to debug a GFS2 deadlock that we see in our lab quite 
frequently with a 4.19 kernel. With 4.4 and older kernels we were not able to 
reproduce this.
See below for lockdep dumps and stacktraces.

thanks for the thorough bug report.  Does the below fix work for you?

Hi Andreas,

I've tested the patch and it doesn't fix the issue. As far as I can see, current->backing_dev_info is not used by any of the code called from balance_dirty_pages_ratelimited() so I don't see how it could work.

I found a way of consistently reproducing the issue almost immediately (tested with the latest master commit):

# cat a.py
import os

fd = os.open("f", os.O_CREAT|os.O_TRUNC|os.O_WRONLY)

for i in range(1000):
    os.mkdir("xxx" + str(i), 0777)

buf = 'x' * 4096

while True:
    count = os.write(fd, buf)
    if count <= 0:
        break

# cat b.py
import os
while True:
  os.mkdir("x", 0777)
  os.rmdir("x")

# echo 8192 > /proc/sys/vm/dirty_bytes
# cd /gfs2mnt
# (mkdir tmp1; cd tmp1; python2 ~/a.py) &
# (mkdir tmp2; cd tmp2; python2 ~/a.py) &
# (mkdir tmp3; cd tmp3; python2 ~/b.py) &

This should deadlock almost immediately. One of the processes will be waiting in balance_dirty_pages() and holding sd_log_flush_lock and several others will be waiting for sd_log_flush_lock.

I came up with the following patch which seems to resolve the issue by failing to write the inode if it can't take the lock, but it seems like a dirty workaround rather than a proper fix:

diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index ee20ea42e7b5..0659560bb8c6 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -766,24 +766,29 @@ static void log_write_header(struct gfs2_sbd *sdp, u32 flags)
 }

 /**
- * gfs2_log_flush - flush incore transaction(s)
+ * __gfs2_log_flush - flush incore transaction(s)
  * @sdp: the filesystem
  * @gl: The glock structure to flush.  If NULL, flush the whole incore log
  * @flags: The log header flags: GFS2_LOG_HEAD_FLUSH_* and debug flags
+ * @wait: Wait for the sd_log_flush_lock
  *
  */

-void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl, u32 flags)
+int __gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl, u32 flags,
+                     bool wait)
 {
        struct gfs2_trans *tr;
        enum gfs2_freeze_state state = atomic_read(&sdp->sd_freeze_state);

-       down_write(&sdp->sd_log_flush_lock);
+       if (wait)
+               down_write(&sdp->sd_log_flush_lock);
+       else if (!down_write_trylock(&sdp->sd_log_flush_lock))
+               return -EAGAIN;

        /* Log might have been flushed while we waited for the flush lock */
        if (gl && !test_bit(GLF_LFLUSH, &gl->gl_flags)) {
                up_write(&sdp->sd_log_flush_lock);
-               return;
+               return 0;
        }
        trace_gfs2_log_flush(sdp, 1, flags);

@@ -857,6 +862,12 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl, u32 flags)
        up_write(&sdp->sd_log_flush_lock);

        kfree(tr);
+       return 0;
+}
+
+void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl, u32 flags)
+{
+       __gfs2_log_flush(sdp, gl, flags, true);
 }

 /**
diff --git a/fs/gfs2/log.h b/fs/gfs2/log.h
index 20241436126d..2404167004bb 100644
--- a/fs/gfs2/log.h
+++ b/fs/gfs2/log.h
@@ -72,6 +72,8 @@ extern void gfs2_log_release(struct gfs2_sbd *sdp, unsigned int blks);
 extern int gfs2_log_reserve(struct gfs2_sbd *sdp, unsigned int blks);
extern void gfs2_write_log_header(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd,
                                  u64 seq, u32 tail, u32 flags, int op_flags);
+extern int __gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl,
+                           u32 type, bool wait);
 extern void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl,
                           u32 type);
extern void gfs2_log_commit(struct gfs2_sbd *sdp, struct gfs2_trans *trans);
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index a971862b186e..369502a16ad6 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -756,10 +756,14 @@ static int gfs2_write_inode(struct inode *inode, struct writeback_control *wbc)
        int ret = 0;
        bool flush_all = (wbc->sync_mode == WB_SYNC_ALL || gfs2_is_jdata(ip));

-       if (flush_all)
-               gfs2_log_flush(GFS2_SB(inode), ip->i_gl,
+       if (flush_all) {
+               ret = __gfs2_log_flush(GFS2_SB(inode), ip->i_gl,
                               GFS2_LOG_HEAD_FLUSH_NORMAL |
-                              GFS2_LFC_WRITE_INODE);
+                              GFS2_LFC_WRITE_INODE,
+                              wbc->sync_mode == WB_SYNC_ALL);
+               if (ret)
+                       return ret;
+       }
        if (bdi->wb.dirty_exceeded)
                gfs2_ail1_flush(sdp, wbc);
        else
---

Regards,
--
Ross Lagerwall

Reply via email to