ChangeSet 1.1555, 2005/02/26 06:59:14-03:00, [EMAIL PROTECTED]

        [PATCH] BUG on error handlings in Ext3 under I/O
        
                I found bugs on error handlings in the functions arround the 
file system,
        which cause inadequate completions of synchronous write I/O operations
        when disk I/O failures occur.
        
                I carried out following experiment:
        
        1.  Mount a ext3 file system on a SCSI disk with ordered mode.
        2.  Open a file on the file system with O_SYNC|O_RDWR|O_TRUNC|O_CREAT 
flag.
        3.  Write 512 bytes data to the file by calling write() every 5 
seconds, and
             examine return values from the syscall.
             from write().
        4.  Disconnect the SCSI cable,  and examine messages from the kernel.
        
                After the SCSI cable is disconnected, write() must fail.  But 
write()
        succeeded for a while even though messages of the kernel notified SCSI 
I/O error.
        
                By applying following modifications, the above problem was 
solved.
        In fact I have send patch for both 2.4 and 2.6 to Andrew, but only 2.6 
was
        accepted.
        So I send you my patch for the 2.4 kernel.
        
        Please consider applying this patch to the 2.4 mainline kernel.
        
        
        Signed-off-by: Hisashi Hifumi  <[EMAIL PROTECTED]>



 fs/ext3/fsync.c      |    2 +-
 fs/ext3/super.c      |    5 +++--
 fs/jbd/commit.c      |   18 +++++++++++++++++-
 fs/jbd/journal.c     |   12 ++++++++++--
 fs/jbd/transaction.c |    4 ++--
 include/linux/jbd.h  |    2 +-
 mm/filemap.c         |    7 ++++++-
 7 files changed, 40 insertions(+), 10 deletions(-)


diff -Nru a/fs/ext3/fsync.c b/fs/ext3/fsync.c
--- a/fs/ext3/fsync.c   2005-02-26 10:03:46 -08:00
+++ b/fs/ext3/fsync.c   2005-02-26 10:03:46 -08:00
@@ -69,7 +69,7 @@
        if (test_opt(inode->i_sb, DATA_FLAGS) == EXT3_MOUNT_WRITEBACK_DATA)
                ret |= fsync_inode_data_buffers(inode);
 
-       ext3_force_commit(inode->i_sb);
+       ret |= ext3_force_commit(inode->i_sb);
 
        return ret;
 }
diff -Nru a/fs/ext3/super.c b/fs/ext3/super.c
--- a/fs/ext3/super.c   2005-02-26 10:03:46 -08:00
+++ b/fs/ext3/super.c   2005-02-26 10:03:46 -08:00
@@ -1608,12 +1608,13 @@
 
 static int ext3_sync_fs(struct super_block *sb)
 {
+       int err;
        tid_t target;
        
        sb->s_dirt = 0;
        target = log_start_commit(EXT3_SB(sb)->s_journal, NULL);
-       log_wait_commit(EXT3_SB(sb)->s_journal, target);
-       return 0;
+       err = log_wait_commit(EXT3_SB(sb)->s_journal, target);
+       return err;
 }
 
 /*
diff -Nru a/fs/jbd/commit.c b/fs/jbd/commit.c
--- a/fs/jbd/commit.c   2005-02-26 10:03:46 -08:00
+++ b/fs/jbd/commit.c   2005-02-26 10:03:46 -08:00
@@ -92,7 +92,7 @@
        struct buffer_head *wbuf[64];
        int bufs;
        int flags;
-       int err;
+       int err = 0;
        unsigned long blocknr;
        char *tagp = NULL;
        journal_header_t *header;
@@ -299,6 +299,8 @@
                        spin_unlock(&journal_datalist_lock);
                        unlock_journal(journal);
                        wait_on_buffer(bh);
+                       if (unlikely(!buffer_uptodate(bh)))
+                               err = -EIO;
                        /* the journal_head may have been removed now */
                        lock_journal(journal);
                        goto write_out_data;
@@ -326,6 +328,8 @@
                        spin_unlock(&journal_datalist_lock);
                        unlock_journal(journal);
                        wait_on_buffer(bh);
+                       if (unlikely(!buffer_uptodate(bh)))
+                               err = -EIO;
                        lock_journal(journal);
                        spin_lock(&journal_datalist_lock);
                        continue;       /* List may have changed */
@@ -351,6 +355,9 @@
        }
        spin_unlock(&journal_datalist_lock);
 
+       if (err)
+               __journal_abort_hard(journal);
+
        /*
         * If we found any dirty or locked buffers, then we should have
         * looped back up to the write_out_data label.  If there weren't
@@ -541,6 +548,8 @@
                if (buffer_locked(bh)) {
                        unlock_journal(journal);
                        wait_on_buffer(bh);
+                       if (unlikely(!buffer_uptodate(bh)))
+                               err = -EIO;
                        lock_journal(journal);
                        goto wait_for_iobuf;
                }
@@ -602,6 +611,8 @@
                if (buffer_locked(bh)) {
                        unlock_journal(journal);
                        wait_on_buffer(bh);
+                       if (unlikely(!buffer_uptodate(bh)))
+                               err = -EIO;
                        lock_journal(journal);
                        goto wait_for_ctlbuf;
                }
@@ -650,6 +661,8 @@
                bh->b_end_io = journal_end_buffer_io_sync;
                submit_bh(WRITE, bh);
                wait_on_buffer(bh);
+               if (unlikely(!buffer_uptodate(bh)))
+                       err = -EIO;
                put_bh(bh);             /* One for getblk() */
                journal_unlock_journal_head(descriptor);
        }
@@ -661,6 +674,9 @@
 
 skip_commit: /* The journal should be unlocked by now. */
 
+       if (err)
+               __journal_abort_hard(journal);
+       
        /* Call any callbacks that had been registered for handles in this
         * transaction.  It is up to the callback to free any allocated
         * memory.
diff -Nru a/fs/jbd/journal.c b/fs/jbd/journal.c
--- a/fs/jbd/journal.c  2005-02-26 10:03:46 -08:00
+++ b/fs/jbd/journal.c  2005-02-26 10:03:46 -08:00
@@ -582,8 +582,10 @@
  * Wait for a specified commit to complete.
  * The caller may not hold the journal lock.
  */
-void log_wait_commit (journal_t *journal, tid_t tid)
+int log_wait_commit (journal_t *journal, tid_t tid)
 {
+       int err = 0;
+
        lock_kernel();
 #ifdef CONFIG_JBD_DEBUG
        lock_journal(journal);
@@ -600,6 +602,12 @@
                sleep_on(&journal->j_wait_done_commit);
        }
        unlock_kernel();
+
+       if (unlikely(is_journal_aborted(journal))) {
+               printk(KERN_EMERG "journal commit I/O error\n");
+               err = -EIO;
+       }
+       return err;
 }
 
 /*
@@ -1326,7 +1334,7 @@
 
        /* Wait for the log commit to complete... */
        if (transaction)
-               log_wait_commit(journal, transaction->t_tid);
+               err = log_wait_commit(journal, transaction->t_tid);
 
        /* ...and flush everything in the log out to disk. */
        lock_journal(journal);
diff -Nru a/fs/jbd/transaction.c b/fs/jbd/transaction.c
--- a/fs/jbd/transaction.c      2005-02-26 10:03:46 -08:00
+++ b/fs/jbd/transaction.c      2005-02-26 10:03:46 -08:00
@@ -1484,7 +1484,7 @@
                 * to wait for the commit to complete.  
                 */
                if (handle->h_sync && !(current->flags & PF_MEMALLOC))
-                       log_wait_commit(journal, tid);
+                       err = log_wait_commit(journal, tid);
        }
        kfree(handle);
        return err;
@@ -1509,7 +1509,7 @@
                goto out;
        }
        handle->h_sync = 1;
-       journal_stop(handle);
+       ret = journal_stop(handle);
 out:
        unlock_kernel();
        return ret;
diff -Nru a/include/linux/jbd.h b/include/linux/jbd.h
--- a/include/linux/jbd.h       2005-02-26 10:03:46 -08:00
+++ b/include/linux/jbd.h       2005-02-26 10:03:46 -08:00
@@ -848,7 +848,7 @@
 
 extern int     log_space_left (journal_t *); /* Called with journal locked */
 extern tid_t   log_start_commit (journal_t *, transaction_t *);
-extern void    log_wait_commit (journal_t *, tid_t);
+extern int     log_wait_commit (journal_t *, tid_t);
 extern int     log_do_checkpoint (journal_t *, int);
 
 extern void    log_wait_for_space(journal_t *, int nblocks);
diff -Nru a/mm/filemap.c b/mm/filemap.c
--- a/mm/filemap.c      2005-02-26 10:03:46 -08:00
+++ b/mm/filemap.c      2005-02-26 10:03:46 -08:00
@@ -3261,7 +3261,12 @@
                        status = generic_osync_inode(inode, 
OSYNC_METADATA|OSYNC_DATA);
        }
        
-       err = written ? written : status;
+       /*
+        * generic_osync_inode always returns 0 or negative value.
+        * So 'status < written' is always true, and written should
+        * be returned if status >= 0.
+        */
+       err = (status < 0) ? status : written;
 out:
 
        return err;
-
To unsubscribe from this list: send the line "unsubscribe bk-commits-24" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to