Gitweb:     
http://git.kernel.org/git/?p=linux/kernel/git/torvalds/linux-2.6.git;a=commit;h=859d718279b6e1d6bc27a701db47c1be720b5907
Commit:     859d718279b6e1d6bc27a701db47c1be720b5907
Parent:     ba532a980b7dcccf5eebd2cd409a9cb37faa2bb4
Author:     Vlad Apostolov <[EMAIL PROTECTED]>
AuthorDate: Thu Oct 11 17:44:18 2007 +1000
Committer:  Tim Shimmin <[EMAIL PROTECTED]>
CommitDate: Tue Oct 16 12:21:15 2007 +1000

    [XFS] get_bulkall() could return incorrect inode state
    
    In the following scenario xfs_bulkstat() returns incorrect stale inode
    state:
    
    1. File_A is created and its inode synced to disk. 2. File_A is unlinked
    and doesn't exist anymore. 3. Filesystem sync is invoked. 4. File_B is
    created. File_B happens to reclaim File_A's inode. 5. xfs_bulkstat() is
    called and detects File_B but reports the
    
    incorrect File_A inode state.
    
    Explanation for the incorrect inode state is that inodes are not
    immediately synced on file create for performance reasons. This leaves the
    on-disk inode buffer uninitialized (or with old state from a previous
    generation inode) and this is what xfs_bulkstat() would report.
    
    The patch marks the on-disk inode buffer "dirty" on unlink. When the inode
    is reclaimed (by a new file create), xfs_bulkstat() would filter this
    inode by the "dirty" mark. Once the inode is flushed to disk, the on-disk
    buffer "dirty" mark is automatically removed and a following
    xfs_bulkstat() would return the correct inode state.
    
    Marking the on-disk inode buffer "dirty" on unlink is achieved by setting
    the on-disk di_nlink field to 0. Note that the in-core di_nlink has
    already been set to 0 and a corresponding transaction logged by
    xfs_droplink(). This is an exception from the rule that any on-disk inode
    buffer changes has to be followed by a disk write (inode flush).
    Synchronizing the in-core to on-disk di_nlink values in advance (before
    the actual inode flush to disk) should be fine in this case because the
    inode is already unlinked and it would never change its di_nlink again for
    this inode generation.
    
    SGI-PV: 970842
    SGI-Modid: xfs-linux-melb:xfs-kern:29757a
    
    Signed-off-by: Vlad Apostolov <[EMAIL PROTECTED]>
    Signed-off-by: Alex Elder <[EMAIL PROTECTED]>
    Signed-off-by: David Chinner <[EMAIL PROTECTED]>
    Signed-off-by: Christoph Hellwig <[EMAIL PROTECTED]>
    Signed-off-by: Mark Goodwin <[EMAIL PROTECTED]>
    Signed-off-by: Tim Shimmin <[EMAIL PROTECTED]>
---
 fs/xfs/xfs_inode.c  |   26 ++++++++++++++++++++------
 fs/xfs/xfs_itable.c |   10 +++++++++-
 2 files changed, 29 insertions(+), 7 deletions(-)

diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 3d8ba8f..abf509a 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -1931,9 +1931,9 @@ xfs_iunlink(
         */
        error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, agdaddr,
                                   XFS_FSS_TO_BB(mp, 1), 0, &agibp);
-       if (error) {
+       if (error)
                return error;
-       }
+
        /*
         * Validate the magic number of the agi block.
         */
@@ -1957,6 +1957,24 @@ xfs_iunlink(
        ASSERT(agi->agi_unlinked[bucket_index]);
        ASSERT(be32_to_cpu(agi->agi_unlinked[bucket_index]) != agino);
 
+       error = xfs_itobp(mp, tp, ip, &dip, &ibp, 0, 0);
+       if (error)
+               return error;
+
+       /*
+        * Clear the on-disk di_nlink. This is to prevent xfs_bulkstat
+        * from picking up this inode when it is reclaimed (its incore state
+        * initialzed but not flushed to disk yet). The in-core di_nlink is
+        * already cleared in xfs_droplink() and a corresponding transaction
+        * logged. The hack here just synchronizes the in-core to on-disk
+        * di_nlink value in advance before the actual inode sync to disk.
+        * This is OK because the inode is already unlinked and would never
+        * change its di_nlink again for this inode generation.
+        * This is a temporary hack that would require a proper fix
+        * in the future.
+        */
+       dip->di_core.di_nlink = 0;
+
        if (be32_to_cpu(agi->agi_unlinked[bucket_index]) != NULLAGINO) {
                /*
                 * There is already another inode in the bucket we need
@@ -1964,10 +1982,6 @@ xfs_iunlink(
                 * Here we put the head pointer into our next pointer,
                 * and then we fall through to point the head at us.
                 */
-               error = xfs_itobp(mp, tp, ip, &dip, &ibp, 0, 0);
-               if (error) {
-                       return error;
-               }
                ASSERT(be32_to_cpu(dip->di_next_unlinked) == NULLAGINO);
                /* both on-disk, don't endian flip twice */
                dip->di_next_unlinked = agi->agi_unlinked[bucket_index];
diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c
index efeeafe..1edd9af 100644
--- a/fs/xfs/xfs_itable.c
+++ b/fs/xfs/xfs_itable.c
@@ -290,8 +290,16 @@ xfs_bulkstat_use_dinode(
                return 1;
        dip = (xfs_dinode_t *)
                        xfs_buf_offset(bp, clustidx << mp->m_sb.sb_inodelog);
+       /*
+        * Check the buffer containing the on-disk inode for di_nlink == 0.
+        * This is to prevent xfs_bulkstat from picking up just reclaimed
+        * inodes that have their in-core state initialized but not flushed
+        * to disk yet. This is a temporary hack that would require a proper
+        * fix in the future.
+        */
        if (be16_to_cpu(dip->di_core.di_magic) != XFS_DINODE_MAGIC ||
-           !XFS_DINODE_GOOD_VERSION(dip->di_core.di_version))
+           !XFS_DINODE_GOOD_VERSION(dip->di_core.di_version) ||
+           !dip->di_core.di_nlink)
                return 0;
        if (flags & BULKSTAT_FG_QUICK) {
                *dipp = dip;
-
To unsubscribe from this list: send the line "unsubscribe git-commits-head" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to