Gitweb:     
http://git.kernel.org/git/?p=linux/kernel/git/torvalds/linux-2.6.git;a=commit;h=0e0f4fc22ece8e593167eccbb1a4154565c11faa
Commit:     0e0f4fc22ece8e593167eccbb1a4154565c11faa
Parent:     670e4def6ef5f44315d62748134e535b479c784f
Author:     Ken Chen <[EMAIL PROTECTED]>
AuthorDate: Tue Oct 16 23:30:38 2007 -0700
Committer:  Linus Torvalds <[EMAIL PROTECTED]>
CommitDate: Wed Oct 17 08:43:02 2007 -0700

    writeback: fix periodic superblock dirty inode flushing
    
    Current -mm tree has bucketful of bug fixes in periodic writeback path.
    However, we still hit a glitch where dirty pages on a given inode aren't
    completely flushed to the disk, and system will accumulate large amount of
    dirty pages beyond what dirty_expire_interval is designed for.
    
    The problem is __sync_single_inode() will move an inode to sb->s_dirty list
    even when there are more pending dirty pages on that inode.  If there is
    another inode with a small number of dirty pages, we hit a case where the 
loop
    iteration in wb_kupdate() terminates prematurely because wbc.nr_to_write > 
0.
    Thus leaving the inode that has large amount of dirty pages behind and it 
has
    to wait for another dirty_writeback_interval before we flush it again.  We
    effectively only write out MAX_WRITEBACK_PAGES every 
dirty_writeback_interval.
    If the rate of dirtying is sufficiently high, the system will start
    accumulate a large number of dirty pages.
    
    So fix it by having another sb->s_more_io list on which to park the inode
    while we iterate through sb->s_io and to allow each dirty inode which 
resides
    on that sb to have an equal chance of flushing some amount of dirty pages.
    
    Signed-off-by: Ken Chen <[EMAIL PROTECTED]>
    Signed-off-by: Andrew Morton <[EMAIL PROTECTED]>
    Signed-off-by: Linus Torvalds <[EMAIL PROTECTED]>
---
 fs/fs-writeback.c  |   36 ++++++++++++++----------------------
 fs/super.c         |    1 +
 include/linux/fs.h |    1 +
 3 files changed, 16 insertions(+), 22 deletions(-)

diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 39fadfa..c9d105f 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -165,25 +165,11 @@ static void redirty_tail(struct inode *inode)
 }
 
 /*
- * Redirty an inode, but mark it as the very next-to-be-written inode on its
- * superblock's dirty-inode list.
- * We need to preserve s_dirty's reverse-time-orderedness, so we cheat by
- * setting this inode's dirtied_when to the same value as that of the inode
- * which is presently head-of-list, if present head-of-list is newer than this
- * inode. (head-of-list is the least-recently-dirtied inode: the oldest one).
+ * requeue inode for re-scanning after sb->s_io list is exhausted.
  */
-static void redirty_head(struct inode *inode)
+static void requeue_io(struct inode *inode)
 {
-       struct super_block *sb = inode->i_sb;
-
-       if (!list_empty(&sb->s_dirty)) {
-               struct inode *head_inode;
-
-               head_inode = list_entry(sb->s_dirty.prev, struct inode, i_list);
-               if (time_after(inode->dirtied_when, head_inode->dirtied_when))
-                       inode->dirtied_when = head_inode->dirtied_when;
-       }
-       list_move_tail(&inode->i_list, &sb->s_dirty);
+       list_move(&inode->i_list, &inode->i_sb->s_more_io);
 }
 
 /*
@@ -255,7 +241,7 @@ __sync_single_inode(struct inode *inode, struct 
writeback_control *wbc)
                                 * uncongested.
                                 */
                                inode->i_state |= I_DIRTY_PAGES;
-                               redirty_head(inode);
+                               requeue_io(inode);
                        } else {
                                /*
                                 * Otherwise fully redirty the inode so that
@@ -315,7 +301,7 @@ __writeback_single_inode(struct inode *inode, struct 
writeback_control *wbc)
                 * on s_io.  We'll have another go at writing back this inode
                 * when the s_dirty iodes get moved back onto s_io.
                 */
-               redirty_head(inode);
+               requeue_io(inode);
 
                /*
                 * Even if we don't actually write the inode itself here,
@@ -410,14 +396,14 @@ sync_sb_inodes(struct super_block *sb, struct 
writeback_control *wbc)
                        wbc->encountered_congestion = 1;
                        if (!sb_is_blkdev_sb(sb))
                                break;          /* Skip a congested fs */
-                       redirty_head(inode);
+                       requeue_io(inode);
                        continue;               /* Skip a congested blockdev */
                }
 
                if (wbc->bdi && bdi != wbc->bdi) {
                        if (!sb_is_blkdev_sb(sb))
                                break;          /* fs has the wrong queue */
-                       redirty_head(inode);
+                       requeue_io(inode);
                        continue;               /* blockdev has wrong queue */
                }
 
@@ -427,8 +413,10 @@ sync_sb_inodes(struct super_block *sb, struct 
writeback_control *wbc)
 
                /* Was this inode dirtied too recently? */
                if (wbc->older_than_this && time_after(inode->dirtied_when,
-                                               *wbc->older_than_this))
+                                               *wbc->older_than_this)) {
+                       list_splice_init(&sb->s_io, sb->s_dirty.prev);
                        break;
+               }
 
                /* Is another pdflush already flushing this queue? */
                if (current_is_pdflush() && !writeback_acquire(bdi))
@@ -458,6 +446,10 @@ sync_sb_inodes(struct super_block *sb, struct 
writeback_control *wbc)
                if (wbc->nr_to_write <= 0)
                        break;
        }
+
+       if (list_empty(&sb->s_io))
+               list_splice_init(&sb->s_more_io, &sb->s_io);
+
        return;         /* Leave any unwritten inodes on s_io */
 }
 
diff --git a/fs/super.c b/fs/super.c
index fc8ebed..1bfcca2 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -67,6 +67,7 @@ static struct super_block *alloc_super(struct 
file_system_type *type)
                }
                INIT_LIST_HEAD(&s->s_dirty);
                INIT_LIST_HEAD(&s->s_io);
+               INIT_LIST_HEAD(&s->s_more_io);
                INIT_LIST_HEAD(&s->s_files);
                INIT_LIST_HEAD(&s->s_instances);
                INIT_HLIST_HEAD(&s->s_anon);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 30aca33..0b38a89 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1002,6 +1002,7 @@ struct super_block {
        struct list_head        s_inodes;       /* all inodes */
        struct list_head        s_dirty;        /* dirty inodes */
        struct list_head        s_io;           /* parked for writeback */
+       struct list_head        s_more_io;      /* parked for more writeback */
        struct hlist_head       s_anon;         /* anonymous dentries for (nfs) 
exporting */
        struct list_head        s_files;
 
-
To unsubscribe from this list: send the line "unsubscribe git-commits-head" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to