Gitweb:     
http://git.kernel.org/git/?p=linux/kernel/git/torvalds/linux-2.6.git;a=commit;h=585e6d8856526a846b90b485abf37ec40e5da1cf
Commit:     585e6d8856526a846b90b485abf37ec40e5da1cf
Parent:     dac61f521b1e4d2c6c48023f2f2743c6096b48ca
Author:     David Chinner <[EMAIL PROTECTED]>
AuthorDate: Sat Feb 10 18:32:29 2007 +1100
Committer:  Tim Shimmin <[EMAIL PROTECTED]>
CommitDate: Sat Feb 10 18:32:29 2007 +1100

    [XFS] Fix a synchronous buftarg flush deadlock when freezing.
    
    At the last stage of a freeze, we flush the buftarg synchronously over and
    over again until it succeeds twice without skipping any buffers.
    
    The delwri list flush skips pinned buffers, but tries to flush all others.
    It removes the buffers from the delwri list, then tries to lock them one
    at a time as it traverses the list to issue the I/O. It holds them locked
    until we issue all of the I/O and then unlocks them once we've waited for
    it to complete.
    
    The problem is that during a freeze, the filesystem may still be doing
    stuff - like flushing delalloc data buffers - in the background and hence
    we can be trying to lock buffers that were on the delwri list at the same
    time. Hence we can get ABBA deadlocks between threads doing allocation and
    the buftarg flush (freeze) thread.
    
    Fix it by skipping locked (and pinned) buffers as we traverse the delwri
    buffer list.
    
    SGI-PV: 957195
    SGI-Modid: xfs-linux-melb:xfs-kern:27535a
    
    Signed-off-by: David Chinner <[EMAIL PROTECTED]>
    Signed-off-by: Tim Shimmin <[EMAIL PROTECTED]>
---
 fs/xfs/linux-2.6/xfs_buf.c |  117 ++++++++++++++++++++++---------------------
 1 files changed, 60 insertions(+), 57 deletions(-)

diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index 4fb01ff..946b00b 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -1679,21 +1679,59 @@ xfsbufd_wakeup(
        return 0;
 }
 
+/*
+ * Move as many buffers as specified to the supplied list
+ * idicating if we skipped any buffers to prevent deadlocks.
+ */
+STATIC int
+xfs_buf_delwri_split(
+       xfs_buftarg_t   *target,
+       struct list_head *list,
+       unsigned long   age,
+       int             flags)
+{
+       xfs_buf_t       *bp, *n;
+       struct list_head *dwq = &target->bt_delwrite_queue;
+       spinlock_t      *dwlk = &target->bt_delwrite_lock;
+       int             skipped = 0;
+
+       INIT_LIST_HEAD(list);
+       spin_lock(dwlk);
+       list_for_each_entry_safe(bp, n, dwq, b_list) {
+               XB_TRACE(bp, "walkq1", (long)xfs_buf_ispin(bp));
+               ASSERT(bp->b_flags & XBF_DELWRI);
+
+               if (!xfs_buf_ispin(bp) && !xfs_buf_cond_lock(bp)) {
+                       if (!(flags & XBT_FORCE_FLUSH) &&
+                           time_before(jiffies, bp->b_queuetime + age)) {
+                               xfs_buf_unlock(bp);
+                               break;
+                       }
+
+                       bp->b_flags &= ~(XBF_DELWRI|_XBF_DELWRI_Q|
+                                        _XBF_RUN_QUEUES);
+                       bp->b_flags |= XBF_WRITE;
+                       list_move_tail(&bp->b_list, list);
+               } else
+                       skipped++;
+       }
+       spin_unlock(dwlk);
+
+       return skipped;
+
+}
+
 STATIC int
 xfsbufd(
-       void                    *data)
+       void            *data)
 {
-       struct list_head        tmp;
-       unsigned long           age;
-       xfs_buftarg_t           *target = (xfs_buftarg_t *)data;
-       xfs_buf_t               *bp, *n;
-       struct list_head        *dwq = &target->bt_delwrite_queue;
-       spinlock_t              *dwlk = &target->bt_delwrite_lock;
-       int                     count;
+       struct list_head tmp;
+       xfs_buftarg_t   *target = (xfs_buftarg_t *)data;
+       int             count;
+       xfs_buf_t       *bp;
 
        current->flags |= PF_MEMALLOC;
 
-       INIT_LIST_HEAD(&tmp);
        do {
                if (unlikely(freezing(current))) {
                        set_bit(XBT_FORCE_SLEEP, &target->bt_flags);
@@ -1705,37 +1743,19 @@ xfsbufd(
                schedule_timeout_interruptible(
                        xfs_buf_timer_centisecs * msecs_to_jiffies(10));
 
-               count = 0;
-               age = xfs_buf_age_centisecs * msecs_to_jiffies(10);
-               spin_lock(dwlk);
-               list_for_each_entry_safe(bp, n, dwq, b_list) {
-                       XB_TRACE(bp, "walkq1", (long)xfs_buf_ispin(bp));
-                       ASSERT(bp->b_flags & XBF_DELWRI);
-
-                       if (!xfs_buf_ispin(bp) && !xfs_buf_cond_lock(bp)) {
-                               if (!test_bit(XBT_FORCE_FLUSH,
-                                               &target->bt_flags) &&
-                                   time_before(jiffies,
-                                               bp->b_queuetime + age)) {
-                                       xfs_buf_unlock(bp);
-                                       break;
-                               }
-
-                               bp->b_flags &= ~(XBF_DELWRI|_XBF_DELWRI_Q|
-                                                _XBF_RUN_QUEUES);
-                               bp->b_flags |= XBF_WRITE;
-                               list_move_tail(&bp->b_list, &tmp);
-                               count++;
-                       }
-               }
-               spin_unlock(dwlk);
+               xfs_buf_delwri_split(target, &tmp,
+                               xfs_buf_age_centisecs * msecs_to_jiffies(10),
+                               test_bit(XBT_FORCE_FLUSH, &target->bt_flags)
+                                               ? XBT_FORCE_FLUSH : 0);
 
+               count = 0;
                while (!list_empty(&tmp)) {
                        bp = list_entry(tmp.next, xfs_buf_t, b_list);
                        ASSERT(target == bp->b_target);
 
                        list_del_init(&bp->b_list);
                        xfs_buf_iostrategy(bp);
+                       count++;
                }
 
                if (as_list_len > 0)
@@ -1756,40 +1776,23 @@ xfsbufd(
  */
 int
 xfs_flush_buftarg(
-       xfs_buftarg_t           *target,
-       int                     wait)
+       xfs_buftarg_t   *target,
+       int             wait)
 {
-       struct list_head        tmp;
-       xfs_buf_t               *bp, *n;
-       int                     pincount = 0;
-       struct list_head        *dwq = &target->bt_delwrite_queue;
-       spinlock_t              *dwlk = &target->bt_delwrite_lock;
+       struct list_head tmp;
+       xfs_buf_t       *bp, *n;
+       int             pincount = 0;
 
        xfs_buf_runall_queues(xfsdatad_workqueue);
        xfs_buf_runall_queues(xfslogd_workqueue);
 
-       INIT_LIST_HEAD(&tmp);
-       spin_lock(dwlk);
-       list_for_each_entry_safe(bp, n, dwq, b_list) {
-               ASSERT(bp->b_target == target);
-               ASSERT(bp->b_flags & (XBF_DELWRI | _XBF_DELWRI_Q));
-               XB_TRACE(bp, "walkq2", (long)xfs_buf_ispin(bp));
-               if (xfs_buf_ispin(bp)) {
-                       pincount++;
-                       continue;
-               }
-
-               list_move_tail(&bp->b_list, &tmp);
-       }
-       spin_unlock(dwlk);
+       pincount = xfs_buf_delwri_split(target, &tmp, 0, XBT_FORCE_FLUSH);
 
        /*
         * Dropped the delayed write list lock, now walk the temporary list
         */
        list_for_each_entry_safe(bp, n, &tmp, b_list) {
-               xfs_buf_lock(bp);
-               bp->b_flags &= ~(XBF_DELWRI|_XBF_DELWRI_Q|_XBF_RUN_QUEUES);
-               bp->b_flags |= XBF_WRITE;
+               ASSERT(target == bp->b_target);
                if (wait)
                        bp->b_flags &= ~XBF_ASYNC;
                else
-
To unsubscribe from this list: send the line "unsubscribe git-commits-head" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to